threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,232 @@
1
+ """Embedding model loader, vectorization, and cosine/FTS/RRF search primitives
2
+ over notes and dialog_messages.
3
+
4
+ Two cosine paths:
5
+ - Fast: sqlite-vec `vec0` virtual tables (notes_vec, dialog_vec) when the
6
+ extension is loaded. Sub-linear search via the vec0 KNN backend.
7
+ - Fallback: legacy Python-side dot product over BLOB column. Used when
8
+ sqlite-vec isn't available (extension build disabled / package missing).
9
+ Correct, just slower at scale.
10
+
11
+ Embeddings are dual-written: every new note/dialog_message gets its
12
+ vector in BOTH the BLOB column AND the vec0 virtual table, so the legacy
13
+ path keeps working and we can roll back without data loss. Old rows are
14
+ backfilled to vec0 lazily by the ingester.
15
+ """
16
+ import sqlite3
17
+ from typing import Optional
18
+
19
+ from .config import SEMANTIC_AVAILABLE, EMBED_MODEL_NAME
20
+ from . import db as _db
21
+
22
+
23
+ def _vec_on() -> bool:
24
+ """Indirect lookup so monkeypatching db.vec_available in tests works."""
25
+ return _db.vec_available()
26
+
27
+ _model = None
28
+
29
+ def _get_model():
30
+ global _model
31
+ if not SEMANTIC_AVAILABLE:
32
+ return None
33
+ if _model is None:
34
+ from sentence_transformers import SentenceTransformer # type: ignore
35
+ _model = SentenceTransformer(EMBED_MODEL_NAME)
36
+ return _model
37
+
38
+ def _embed(text: str) -> Optional[bytes]:
39
+ m = _get_model()
40
+ if m is None:
41
+ return None
42
+ v = m.encode([text], normalize_embeddings=True)[0].astype("float32")
43
+ return v.tobytes()
44
+
45
+
46
+ def _cosine_search(conn: sqlite3.Connection, query: str, k: int) -> list[dict]:
47
+ """Top-k cosine over notes. Uses vec0 ANN when available."""
48
+ m = _get_model()
49
+ if m is None:
50
+ return []
51
+ import numpy as np # type: ignore
52
+ qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
53
+ if _vec_on():
54
+ try:
55
+ return _vec0_notes_search(conn, qv.tobytes(), k)
56
+ except sqlite3.OperationalError:
57
+ pass # fall through to legacy
58
+ # Legacy Python-side path
59
+ rows = conn.execute(
60
+ "SELECT id, content, kind, thread_id, created_at, embedding "
61
+ "FROM notes WHERE embedding IS NOT NULL"
62
+ ).fetchall()
63
+ if not rows:
64
+ return []
65
+ scored = []
66
+ for r in rows:
67
+ v = np.frombuffer(r["embedding"], dtype="float32")
68
+ scored.append((float(np.dot(qv, v)), r))
69
+ scored.sort(key=lambda x: -x[0])
70
+ return [{"score": s, **dict(r)} for s, r in scored[:k]]
71
+
72
+
73
+ def _vec0_notes_search(conn: sqlite3.Connection, qv_blob: bytes,
74
+ k: int) -> list[dict]:
75
+ """vec0 KNN over notes_vec, joined back to notes for payload.
76
+ Distance is squared-Euclidean on normalized vectors; we convert to
77
+ cosine score for compatibility with the legacy result shape:
78
+ cos(q, v) = 1 - dist²/2 for unit-norm vectors.
79
+ """
80
+ rows = conn.execute(
81
+ "SELECT n.id, n.content, n.kind, n.thread_id, n.created_at, "
82
+ " v.distance "
83
+ "FROM notes_vec v "
84
+ "JOIN notes n ON n.id = v.id "
85
+ "WHERE v.embedding MATCH ? AND k = ? "
86
+ "ORDER BY v.distance",
87
+ (qv_blob, max(1, int(k))),
88
+ ).fetchall()
89
+ out = []
90
+ for r in rows:
91
+ score = max(-1.0, min(1.0, 1.0 - (r["distance"] ** 2) / 2.0))
92
+ d = {k_: r[k_] for k_ in ("id", "content", "kind",
93
+ "thread_id", "created_at")}
94
+ d["score"] = float(score)
95
+ out.append(d)
96
+ return out
97
+
98
+
99
+ def _dialog_cosine_search(conn, query: str, k: int) -> list[dict]:
100
+ """Top-k cosine over dialog_messages. Uses vec0 ANN when available."""
101
+ m = _get_model()
102
+ if m is None:
103
+ return []
104
+ import numpy as np # type: ignore
105
+ qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
106
+ if _vec_on():
107
+ try:
108
+ return _vec0_dialog_search(conn, qv.tobytes(), k)
109
+ except sqlite3.OperationalError:
110
+ pass
111
+ rows = conn.execute(
112
+ "SELECT uuid, role, project, session_id, content, created_at, embedding "
113
+ "FROM dialog_messages WHERE embedding IS NOT NULL"
114
+ ).fetchall()
115
+ if not rows:
116
+ return []
117
+ scored = []
118
+ for r in rows:
119
+ v = np.frombuffer(r["embedding"], dtype="float32")
120
+ scored.append((float(np.dot(qv, v)), r))
121
+ scored.sort(key=lambda x: -x[0])
122
+ return [{"score": s, **dict(r)} for s, r in scored[:k]]
123
+
124
+
125
+ def _vec_upsert_note(conn: sqlite3.Connection, note_id: int,
126
+ emb_blob: Optional[bytes]) -> None:
127
+ """Mirror a note's embedding into notes_vec. No-op when vec0 isn't
128
+ loaded or the blob is None. Safe to call multiple times — uses
129
+ INSERT OR REPLACE keyed by integer id."""
130
+ if not _vec_on() or emb_blob is None:
131
+ return
132
+ try:
133
+ conn.execute(
134
+ "INSERT OR REPLACE INTO notes_vec(id, embedding) VALUES (?, ?)",
135
+ (note_id, emb_blob),
136
+ )
137
+ except sqlite3.OperationalError:
138
+ pass # vec0 table missing on this connection — silent fall-through
139
+
140
+
141
+ def _vec_upsert_dialog(conn: sqlite3.Connection, uuid: str,
142
+ emb_blob: Optional[bytes]) -> None:
143
+ """Mirror a dialog_message embedding into dialog_vec via the uuid map.
144
+ Resolves or assigns a rowid for the given uuid in dialog_vec_map, then
145
+ INSERT-OR-REPLACE keyed by that rowid in dialog_vec."""
146
+ if not _vec_on() or emb_blob is None:
147
+ return
148
+ try:
149
+ row = conn.execute(
150
+ "SELECT rowid FROM dialog_vec_map WHERE uuid=?", (uuid,)
151
+ ).fetchone()
152
+ if row is None:
153
+ cur = conn.execute(
154
+ "INSERT INTO dialog_vec_map(uuid) VALUES (?)", (uuid,)
155
+ )
156
+ vec_rowid = cur.lastrowid
157
+ else:
158
+ vec_rowid = row[0] if not hasattr(row, "keys") else row["rowid"]
159
+ conn.execute(
160
+ "INSERT OR REPLACE INTO dialog_vec(rowid, embedding) VALUES (?, ?)",
161
+ (vec_rowid, emb_blob),
162
+ )
163
+ except sqlite3.OperationalError:
164
+ pass
165
+
166
+
167
+ def _vec0_dialog_search(conn: sqlite3.Connection, qv_blob: bytes,
168
+ k: int) -> list[dict]:
169
+ """vec0 KNN over dialog_vec, joined via dialog_vec_map.uuid back to
170
+ dialog_messages for payload."""
171
+ rows = conn.execute(
172
+ "SELECT d.uuid, d.role, d.project, d.session_id, d.content, "
173
+ " d.created_at, v.distance "
174
+ "FROM dialog_vec v "
175
+ "JOIN dialog_vec_map m ON m.rowid = v.rowid "
176
+ "JOIN dialog_messages d ON d.uuid = m.uuid "
177
+ "WHERE v.embedding MATCH ? AND k = ? "
178
+ "ORDER BY v.distance",
179
+ (qv_blob, max(1, int(k))),
180
+ ).fetchall()
181
+ out = []
182
+ for r in rows:
183
+ score = max(-1.0, min(1.0, 1.0 - (r["distance"] ** 2) / 2.0))
184
+ d = {k_: r[k_] for k_ in ("uuid", "role", "project",
185
+ "session_id", "content", "created_at")}
186
+ d["score"] = float(score)
187
+ out.append(d)
188
+ return out
189
+
190
+ def _fts_search(conn: sqlite3.Connection, query: str,
191
+ k: int) -> list[dict]:
192
+ """FTS5 search over dialog_fts joined to dialog_messages. FTS5 ranks
193
+ by BM25 (lower = better); we keep insertion order from the result for
194
+ RRF (already ranked best-first by FTS5)."""
195
+ try:
196
+ rows = conn.execute(
197
+ "SELECT f.uuid, d.role, d.session_id, d.content, d.created_at "
198
+ "FROM dialog_fts f "
199
+ "JOIN dialog_messages d ON d.uuid = f.uuid "
200
+ "WHERE dialog_fts MATCH ? ORDER BY rank LIMIT ?",
201
+ (query, max(1, int(k))),
202
+ ).fetchall()
203
+ except sqlite3.OperationalError:
204
+ # FTS reserved-char syntax error or table missing
205
+ return []
206
+ return [
207
+ {
208
+ "uuid": r["uuid"],
209
+ "role": r["role"],
210
+ "session_id": r["session_id"],
211
+ "content": r["content"],
212
+ "created_at": r["created_at"],
213
+ }
214
+ for r in rows
215
+ ]
216
+
217
+ def _rrf_combine(lists: list[list[dict]], top_n: int,
218
+ k_rrf: int = 60) -> list[dict]:
219
+ """Reciprocal Rank Fusion. score = Σ 1/(rank + k_rrf) across input lists.
220
+ De-duplicates by uuid. Returns up to top_n payloads sorted by score."""
221
+ scores: dict[str, float] = {}
222
+ payloads: dict[str, dict] = {}
223
+ for lst in lists:
224
+ for rank, item in enumerate(lst):
225
+ uid = item.get("uuid")
226
+ if not uid:
227
+ continue
228
+ scores[uid] = scores.get(uid, 0.0) + 1.0 / (rank + k_rrf)
229
+ if uid not in payloads:
230
+ payloads[uid] = item
231
+ ranked = sorted(scores.items(), key=lambda x: -x[1])[:top_n]
232
+ return [payloads[uid] for uid, _ in ranked]
@@ -0,0 +1,125 @@
1
+ """Extract daemon — periodic auto-harvest of decision-shaped utterances
2
+ from dialog_messages into the extract_candidates queue.
3
+
4
+ Architecture mirror of shadow_review:
5
+
6
+ 1. Daemon thread wakes every EXTRACT_INTERVAL_S seconds (0 = off).
7
+ 2. Calls extract_recent(window_min=EXTRACT_WINDOW_MIN) — same logic as
8
+ the MCP tool, just invoked automatically instead of waiting for the
9
+ agent to remember to call it.
10
+ 3. Records events.kind='extract_pass' with the per-pass counters so
11
+ `extract_review_status()` can show health at a glance.
12
+
13
+ Where shadow_review extracts CLASS-LEVEL durable RULES (skills, lessons),
14
+ extract harvests PER-INCIDENT DECISION-SHAPED utterances and adds them
15
+ to the agent's review queue. The agent then materializes the survivors
16
+ via review_candidates() + accept_candidate().
17
+
18
+ Designed around the empirical finding (audit log, 2026-05-16): no
19
+ parallel session calls `note()` / `verbatim_user()` / `open_thread()`
20
+ on its own. Memory bookkeeping fights against the agent's primary task
21
+ focus. This daemon side-steps the incentive problem by harvesting from
22
+ what the agent ALREADY said out loud — no agent-side action required.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import sqlite3
29
+ import threading
30
+ import time
31
+
32
+ from .config import EXTRACT_INTERVAL_S, EXTRACT_WINDOW_MIN
33
+ from .db import get_db
34
+ from . import identity
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ _started = False
39
+
40
+
41
+ def _last_extract_ts(conn: sqlite3.Connection) -> int:
42
+ """High-water timestamp of the most recent extract pass, or 0."""
43
+ try:
44
+ row = conn.execute(
45
+ "SELECT target FROM events WHERE kind='extract_pass' "
46
+ "ORDER BY id DESC LIMIT 1"
47
+ ).fetchone()
48
+ except sqlite3.OperationalError:
49
+ return 0
50
+ if not row or not row["target"]:
51
+ return 0
52
+ try:
53
+ return int(row["target"])
54
+ except (ValueError, TypeError):
55
+ return 0
56
+
57
+
58
+ def _record_extract_pass(conn: sqlite3.Connection,
59
+ ts: int,
60
+ outcome: str) -> None:
61
+ try:
62
+ conn.execute(
63
+ "INSERT INTO events (session_id, kind, target, summary, "
64
+ "created_at) VALUES (?, 'extract_pass', ?, ?, ?)",
65
+ (identity._session_id or "", str(ts), outcome[:300],
66
+ int(time.time())),
67
+ )
68
+ conn.commit()
69
+ except sqlite3.OperationalError:
70
+ logger.debug("extract_daemon: failed to record pass", exc_info=True)
71
+
72
+
73
+ def run_extract_pass(force: bool = False) -> str:
74
+ """Execute one extract pass synchronously. Used by the daemon AND by
75
+ the MCP tool for manual triggering.
76
+
77
+ Returns the same status string `extract_recent` returns ("ok
78
+ window=… scanned=… verbatim=… distill=… concept=… note=…
79
+ skipped_existing=…" or "no_dialog window=…m"). Plus advances the
80
+ `extract_pass` cursor for telemetry.
81
+ """
82
+ if EXTRACT_INTERVAL_S <= 0 and not force:
83
+ return "disabled"
84
+ # Late import — tools.extract registers MCP tools at import time, and
85
+ # the daemon module loads before all tools are registered.
86
+ from .tools.extract import extract_recent
87
+ try:
88
+ result = extract_recent(window_min=EXTRACT_WINDOW_MIN)
89
+ except Exception as e:
90
+ logger.debug("extract_daemon: pass failed", exc_info=True)
91
+ _record_extract_pass(get_db(), int(time.time()),
92
+ f"error: {e}")
93
+ return f"error: {e}"
94
+ _record_extract_pass(get_db(), int(time.time()), str(result)[:200])
95
+ return str(result)
96
+
97
+
98
+ def _serve_loop() -> None:
99
+ """Daemon body. Sleep → tick → sleep, until process dies."""
100
+ while True:
101
+ try:
102
+ run_extract_pass()
103
+ except Exception:
104
+ logger.debug("extract_daemon tick failed", exc_info=True)
105
+ time.sleep(EXTRACT_INTERVAL_S)
106
+
107
+
108
+ def start_extract_daemon() -> None:
109
+ """Idempotent daemon starter. Honors env: no-op when
110
+ EXTRACT_INTERVAL_S<=0. Same cascade-prevention as shadow_review:
111
+ slim children (SEMANTIC_AVAILABLE=False) refuse to start the
112
+ daemon so spawn() doesn't recurse."""
113
+ global _started
114
+ if _started:
115
+ return
116
+ if EXTRACT_INTERVAL_S <= 0:
117
+ return
118
+ from .config import SEMANTIC_AVAILABLE
119
+ if not SEMANTIC_AVAILABLE:
120
+ return # slim child: don't fire extract from here
121
+ t = threading.Thread(
122
+ target=_serve_loop, name="extract_daemon", daemon=True,
123
+ )
124
+ t.start()
125
+ _started = True
@@ -0,0 +1,101 @@
1
+ """Stateless utility helpers used across the package: time formatting,
2
+ short-quoting, ID generation, process-aliveness check."""
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import secrets
7
+ import sqlite3
8
+ import subprocess
9
+ from typing import Optional
10
+
11
+
12
+ def fmt_age(seconds: int) -> str:
13
+ """Compact human-readable age. 0..59 → 's', then 'm', 'h', 'd'."""
14
+ if seconds < 60:
15
+ return f"{seconds}s"
16
+ m = seconds // 60
17
+ if m < 60:
18
+ return f"{m}m"
19
+ h = m // 60
20
+ if h < 24:
21
+ return f"{h}h"
22
+ d = h // 24
23
+ return f"{d}d"
24
+
25
+
26
+ def q(s: str) -> str:
27
+ """Compact double-quote escape for brief lines."""
28
+ return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
29
+
30
+
31
+ def _gen_short_id(conn: sqlite3.Connection, prefix: str, table: str,
32
+ id_col: str = "id") -> str:
33
+ """prefix + 3 hex chars (4096 unique). Retries on collision; extends to
34
+ 5 hex if collision space exhausted (~1M unique fallback)."""
35
+ for _ in range(64):
36
+ cand = prefix + secrets.token_hex(2)[:3]
37
+ if not conn.execute(
38
+ f"SELECT 1 FROM {table} WHERE {id_col}=?", (cand,)
39
+ ).fetchone():
40
+ return cand
41
+ return prefix + secrets.token_hex(3)[:5]
42
+
43
+
44
+ def gen_thread_id(conn: sqlite3.Connection) -> str:
45
+ return _gen_short_id(conn, "T", "threads")
46
+
47
+
48
+ def gen_probe_id(conn: sqlite3.Connection) -> str:
49
+ return _gen_short_id(conn, "P", "probes")
50
+
51
+
52
+ def gen_concept_id(conn: sqlite3.Connection) -> str:
53
+ return _gen_short_id(conn, "C", "concepts")
54
+
55
+
56
+ def gen_distill_id(conn: sqlite3.Connection) -> str:
57
+ return _gen_short_id(conn, "D", "distill")
58
+
59
+
60
+ def gen_dialectic_id(conn: sqlite3.Connection) -> str:
61
+ return _gen_short_id(conn, "UC", "user_dialectic")
62
+
63
+
64
+ def alive(pid: int) -> bool:
65
+ """True if pid corresponds to a running (non-zombie) process. Reaps
66
+ zombies opportunistically when pid is our own child. pid<=0 sentinel
67
+ (used for visible spawns where we don't track) → False."""
68
+ if pid is None or pid <= 0:
69
+ return False
70
+ try:
71
+ wpid, _ = os.waitpid(pid, os.WNOHANG)
72
+ if wpid == pid:
73
+ return False
74
+ except (ChildProcessError, OSError):
75
+ pass
76
+ try:
77
+ os.kill(pid, 0)
78
+ except ProcessLookupError:
79
+ return False
80
+ except PermissionError:
81
+ return True
82
+ except OSError:
83
+ return False
84
+ # Process exists; distinguish zombie via `ps -o state=`. Zombies show
85
+ # 'Z' on macOS/Linux. If ps fails (rare), assume alive.
86
+ try:
87
+ r = subprocess.run(
88
+ ["ps", "-p", str(pid), "-o", "state="],
89
+ capture_output=True, text=True, timeout=2,
90
+ )
91
+ state = (r.stdout or "").strip()
92
+ if state.startswith("Z") or state == "":
93
+ return False
94
+ except (subprocess.SubprocessError, OSError):
95
+ pass
96
+ return True
97
+
98
+
99
+ def normalize_text(s: str) -> str:
100
+ """Whitespace-collapsed lower for fuzzy duplicate detection."""
101
+ return " ".join(s.lower().strip().split())