threadkeeper 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- threadkeeper/__init__.py +8 -0
- threadkeeper/_mcp.py +6 -0
- threadkeeper/_setup.py +299 -0
- threadkeeper/adapters/__init__.py +40 -0
- threadkeeper/adapters/_hook_helpers.py +72 -0
- threadkeeper/adapters/base.py +152 -0
- threadkeeper/adapters/claude_code.py +178 -0
- threadkeeper/adapters/claude_desktop.py +128 -0
- threadkeeper/adapters/codex.py +259 -0
- threadkeeper/adapters/copilot.py +195 -0
- threadkeeper/adapters/gemini.py +169 -0
- threadkeeper/adapters/vscode.py +144 -0
- threadkeeper/brief.py +735 -0
- threadkeeper/config.py +216 -0
- threadkeeper/curator.py +390 -0
- threadkeeper/db.py +474 -0
- threadkeeper/embeddings.py +232 -0
- threadkeeper/extract_daemon.py +125 -0
- threadkeeper/helpers.py +101 -0
- threadkeeper/i18n.py +342 -0
- threadkeeper/identity.py +237 -0
- threadkeeper/ingest.py +507 -0
- threadkeeper/lessons.py +170 -0
- threadkeeper/nudges.py +257 -0
- threadkeeper/process_health.py +202 -0
- threadkeeper/review_prompts.py +207 -0
- threadkeeper/search_proxy.py +160 -0
- threadkeeper/server.py +55 -0
- threadkeeper/shadow_review.py +358 -0
- threadkeeper/skill_watcher.py +96 -0
- threadkeeper/spawn_budget.py +246 -0
- threadkeeper/tools/__init__.py +2 -0
- threadkeeper/tools/concepts.py +111 -0
- threadkeeper/tools/consolidate.py +222 -0
- threadkeeper/tools/core_memory.py +109 -0
- threadkeeper/tools/correlation.py +116 -0
- threadkeeper/tools/curator.py +121 -0
- threadkeeper/tools/dialectic.py +359 -0
- threadkeeper/tools/dialog.py +131 -0
- threadkeeper/tools/distill.py +184 -0
- threadkeeper/tools/extract.py +411 -0
- threadkeeper/tools/graph.py +183 -0
- threadkeeper/tools/invariants.py +177 -0
- threadkeeper/tools/lessons.py +110 -0
- threadkeeper/tools/missed_spawns.py +142 -0
- threadkeeper/tools/peers.py +579 -0
- threadkeeper/tools/pickup.py +148 -0
- threadkeeper/tools/probes.py +251 -0
- threadkeeper/tools/process_health.py +90 -0
- threadkeeper/tools/session.py +34 -0
- threadkeeper/tools/shadow_review.py +106 -0
- threadkeeper/tools/skills.py +856 -0
- threadkeeper/tools/spawn.py +871 -0
- threadkeeper/tools/style.py +44 -0
- threadkeeper/tools/threads.py +299 -0
- threadkeeper-0.4.0.dist-info/METADATA +351 -0
- threadkeeper-0.4.0.dist-info/RECORD +61 -0
- threadkeeper-0.4.0.dist-info/WHEEL +5 -0
- threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
- threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
- threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
"""Auto-extraction MCP tools.
|
|
2
|
+
|
|
3
|
+
Extracted from server.py. Heuristic candidates for note/concept/distill/
|
|
4
|
+
verbatim from recent dialog_messages. Each candidate lands status='pending';
|
|
5
|
+
session reviews in batch via review_candidates() then accept/reject.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import time
|
|
10
|
+
import re as _re_extract
|
|
11
|
+
|
|
12
|
+
from .._mcp import mcp
|
|
13
|
+
from ..db import get_db
|
|
14
|
+
from ..config import SEMANTIC_AVAILABLE
|
|
15
|
+
from ..helpers import fmt_age, q, gen_concept_id, gen_distill_id
|
|
16
|
+
from .. import identity
|
|
17
|
+
from ..identity import _ensure_session, _detect_self_cid, _emit
|
|
18
|
+
from ..embeddings import _embed
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Locale-aware heuristic matchers — patterns live in i18n.py so this
|
|
22
|
+
# module stays English-only. Locale-independent patterns (header,
|
|
23
|
+
# bullet list) stay inline.
|
|
24
|
+
from ..i18n import (
|
|
25
|
+
WANT_RE as _WANT_RE,
|
|
26
|
+
INSIGHT_MARKERS_RE as _INSIGHT_MARKERS_RE,
|
|
27
|
+
EXAMPLE_RE as _EXAMPLE_RE,
|
|
28
|
+
FRAME_RE as _FRAME_RE,
|
|
29
|
+
)
|
|
30
|
+
_HEADER_RE = _re_extract.compile(r"^##+\s", _re_extract.MULTILINE)
|
|
31
|
+
_BULLET_RE = _re_extract.compile(
|
|
32
|
+
r"^\s*(?:[-*•]|\d+[.)])\s", _re_extract.MULTILINE
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Message-level noise filter (complements session-level
|
|
37
|
+
# _INTERNAL_PROMPT_PREFIXES). These prefixes appear in INDIVIDUAL
|
|
38
|
+
# messages of otherwise-valid sessions — context-compaction summaries
|
|
39
|
+
# from Anthropic CLI, subagent task prompts, SKILL.md injections, and
|
|
40
|
+
# `[Request interrupted by user for tool use]` service markers. None
|
|
41
|
+
# of them are user-intent signals; all of them were polluting extract
|
|
42
|
+
# candidates in the first calibration pass (2026-05-16 audit, 13
|
|
43
|
+
# candidates → ~25% precision; ~half of misses were these patterns).
|
|
44
|
+
_NOISE_CONTENT_PREFIXES: tuple[str, ...] = (
|
|
45
|
+
"This session is being continued from a previous conversation",
|
|
46
|
+
"Base directory for this skill:",
|
|
47
|
+
"In the repo at /",
|
|
48
|
+
"[Request interrupted by user",
|
|
49
|
+
# Generic subagent/spawn role prompts. Subagents and `claude -p`
|
|
50
|
+
# children commonly open with one of these. They're never user-
|
|
51
|
+
# intent signals — they're task framing injected by the parent.
|
|
52
|
+
"You are the ",
|
|
53
|
+
"You are a ",
|
|
54
|
+
"You are an ",
|
|
55
|
+
"Research task",
|
|
56
|
+
"Design task",
|
|
57
|
+
"Context:",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Verbatim-specific minimum length. The global 30-char floor is too
|
|
62
|
+
# permissive for "I want X"-style user_want matches — short fragments
|
|
63
|
+
# like "[Request interrupted by user for tool use]" (39 chars) and
|
|
64
|
+
# CLI metadata strings sneak through. 50 chars is the empirical
|
|
65
|
+
# threshold below which user_want matches are almost always noise.
|
|
66
|
+
_VERBATIM_MIN_LEN = 50
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_log_content(text: str) -> bool:
|
|
70
|
+
"""Heuristic: high density of pass/fail/checkmark glyphs → this is
|
|
71
|
+
a test runner / CI log output, not natural-language signal.
|
|
72
|
+
|
|
73
|
+
Returns True if marker count ≥ 3 in the first 2KB of content. Logs
|
|
74
|
+
from Detox / Maestro / mocha / pytest etc. trigger user_want false
|
|
75
|
+
matches on lines like "✓ runFixture:registerUser → $ahmed (2.0s)"
|
|
76
|
+
and entire blocks dump into a single dialog_message turn.
|
|
77
|
+
"""
|
|
78
|
+
if len(text) < 100:
|
|
79
|
+
return False
|
|
80
|
+
sample = text[:2000]
|
|
81
|
+
markers = ("✓", "✗", "[OK]", "[FAIL]", "PASS", "FAIL", "skipped",
|
|
82
|
+
"runFixture:")
|
|
83
|
+
return sum(sample.count(m) for m in markers) >= 3
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _candidate_exists(conn, source_uuid, content):
|
|
87
|
+
if source_uuid:
|
|
88
|
+
if conn.execute(
|
|
89
|
+
"SELECT 1 FROM extract_candidates WHERE source_uuid=? "
|
|
90
|
+
"AND status IN ('pending','accepted') LIMIT 1",
|
|
91
|
+
(source_uuid,),
|
|
92
|
+
).fetchone():
|
|
93
|
+
return True
|
|
94
|
+
return bool(
|
|
95
|
+
conn.execute(
|
|
96
|
+
"SELECT 1 FROM extract_candidates WHERE content=? "
|
|
97
|
+
"AND status IN ('pending','accepted') LIMIT 1",
|
|
98
|
+
(content[:500],),
|
|
99
|
+
).fetchone()
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _enqueue(conn, kind, source_uuid, source_cid, content, rationale):
|
|
104
|
+
if _candidate_exists(conn, source_uuid, content):
|
|
105
|
+
return None
|
|
106
|
+
cur = conn.execute(
|
|
107
|
+
"INSERT INTO extract_candidates (kind, source_uuid, source_cid, "
|
|
108
|
+
"content, rationale, status, created_at) VALUES (?,?,?,?,?,?,?)",
|
|
109
|
+
(kind, source_uuid, source_cid, content, rationale,
|
|
110
|
+
"pending", int(time.time())),
|
|
111
|
+
)
|
|
112
|
+
return cur.lastrowid
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@mcp.tool()
|
|
116
|
+
def extract_recent(window_min: int = 60, max_messages: int = 500) -> str:
|
|
117
|
+
"""Scan recent dialog_messages and enqueue heuristic candidates.
|
|
118
|
+
|
|
119
|
+
H1 user_want → verbatim (normative phrasing)
|
|
120
|
+
H2 long_insight → distill (assistant ≥500ch + ## headers + conclusion marker)
|
|
121
|
+
H3 example_regularity→ concept (bullets≥3 OR example-marker≥2 + abstract frame)
|
|
122
|
+
H4 paraphrase_repeat → note (≥3 msgs cosine ≥0.80 within same session)"""
|
|
123
|
+
conn = get_db()
|
|
124
|
+
_ensure_session(conn)
|
|
125
|
+
now = int(time.time())
|
|
126
|
+
cutoff = now - max(1, int(window_min)) * 60
|
|
127
|
+
# Exclude our own internal-prompted child sessions (shadow_review
|
|
128
|
+
# observer + close_thread auto-reviewer + curator) — otherwise their
|
|
129
|
+
# promp text becomes extract candidates, the same self-pollution
|
|
130
|
+
# we fixed in shadow_review._collect_window.
|
|
131
|
+
from ..shadow_review import _INTERNAL_PROMPT_PREFIXES
|
|
132
|
+
# Session-level filter — drop entire sessions started by our own
|
|
133
|
+
# spawn prompts.
|
|
134
|
+
sess_prefix_clauses = " OR ".join(
|
|
135
|
+
["substr(content, 1, ?) = ?"] * len(_INTERNAL_PROMPT_PREFIXES)
|
|
136
|
+
)
|
|
137
|
+
sess_prefix_params: list = []
|
|
138
|
+
for p in _INTERNAL_PROMPT_PREFIXES:
|
|
139
|
+
sess_prefix_params.extend([len(p), p])
|
|
140
|
+
# Message-level filter — drop individual noise messages (compaction
|
|
141
|
+
# summaries, SKILL injections, subagent prompts) inside otherwise
|
|
142
|
+
# valid sessions. SQL LIKE with '%' suffix on user-controlled prefix
|
|
143
|
+
# is safe: each pattern is a literal in source code, not user input.
|
|
144
|
+
msg_noise_clauses = " AND ".join(
|
|
145
|
+
[f"content NOT LIKE ?"] * len(_NOISE_CONTENT_PREFIXES)
|
|
146
|
+
)
|
|
147
|
+
msg_noise_params = [p + "%" for p in _NOISE_CONTENT_PREFIXES]
|
|
148
|
+
rows = conn.execute(
|
|
149
|
+
"SELECT uuid, role, content, session_id, created_at, embedding "
|
|
150
|
+
"FROM dialog_messages WHERE created_at >= ? "
|
|
151
|
+
"AND role IN ('user','assistant') "
|
|
152
|
+
"AND content NOT LIKE '[tool_result]%' AND content NOT LIKE '[Image%' "
|
|
153
|
+
f"AND {msg_noise_clauses} "
|
|
154
|
+
"AND length(content) >= 30 "
|
|
155
|
+
"AND session_id NOT IN ("
|
|
156
|
+
" SELECT DISTINCT session_id FROM dialog_messages "
|
|
157
|
+
f" WHERE role = 'user' AND ({sess_prefix_clauses})"
|
|
158
|
+
") "
|
|
159
|
+
"ORDER BY created_at ASC LIMIT ?",
|
|
160
|
+
(cutoff, *msg_noise_params, *sess_prefix_params,
|
|
161
|
+
max(10, int(max_messages))),
|
|
162
|
+
).fetchall()
|
|
163
|
+
if not rows:
|
|
164
|
+
return f"no_dialog window={window_min}m"
|
|
165
|
+
counts = {"verbatim": 0, "distill": 0, "concept": 0, "note": 0}
|
|
166
|
+
skipped = 0
|
|
167
|
+
for r in rows:
|
|
168
|
+
uuid, cid, content, role = (
|
|
169
|
+
r["uuid"], r["session_id"], r["content"], r["role"]
|
|
170
|
+
)
|
|
171
|
+
# Test-runner / CI log dumps trigger user_want false matches on
|
|
172
|
+
# log labels like "Earnings Withdrawal → ✓ runFixture:…"
|
|
173
|
+
if _is_log_content(content):
|
|
174
|
+
skipped += 1
|
|
175
|
+
continue
|
|
176
|
+
if (role == "user" and _WANT_RE.search(content)
|
|
177
|
+
and len(content) >= _VERBATIM_MIN_LEN):
|
|
178
|
+
res = _enqueue(conn, "verbatim", uuid, cid, content[:2000],
|
|
179
|
+
"H1 user_want pattern")
|
|
180
|
+
if res:
|
|
181
|
+
counts["verbatim"] += 1
|
|
182
|
+
else:
|
|
183
|
+
skipped += 1
|
|
184
|
+
if role == "assistant":
|
|
185
|
+
if (len(content) >= 500 and _HEADER_RE.search(content)
|
|
186
|
+
and _INSIGHT_MARKERS_RE.search(content)):
|
|
187
|
+
res = _enqueue(conn, "distill", uuid, cid, content[:4000],
|
|
188
|
+
"H2 long_insight (headers + conclusion marker)")
|
|
189
|
+
if res:
|
|
190
|
+
counts["distill"] += 1
|
|
191
|
+
else:
|
|
192
|
+
skipped += 1
|
|
193
|
+
bullets = len(_BULLET_RE.findall(content))
|
|
194
|
+
examples = len(_EXAMPLE_RE.findall(content))
|
|
195
|
+
if (bullets >= 3 or examples >= 2) and _FRAME_RE.search(content):
|
|
196
|
+
res = _enqueue(
|
|
197
|
+
conn, "concept", uuid, cid, content[:3000],
|
|
198
|
+
f"H3 example_regularity (bullets={bullets}, examples={examples})",
|
|
199
|
+
)
|
|
200
|
+
if res:
|
|
201
|
+
counts["concept"] += 1
|
|
202
|
+
else:
|
|
203
|
+
skipped += 1
|
|
204
|
+
if SEMANTIC_AVAILABLE:
|
|
205
|
+
try:
|
|
206
|
+
import numpy as _np # type: ignore
|
|
207
|
+
except ImportError:
|
|
208
|
+
_np = None
|
|
209
|
+
if _np is not None:
|
|
210
|
+
with_emb = [r for r in rows if r["embedding"]]
|
|
211
|
+
by_sess: dict = {}
|
|
212
|
+
for r in with_emb:
|
|
213
|
+
by_sess.setdefault(r["session_id"] or "", []).append(r)
|
|
214
|
+
for sid, msgs in by_sess.items():
|
|
215
|
+
if len(msgs) < 3:
|
|
216
|
+
continue
|
|
217
|
+
embs = _np.stack([
|
|
218
|
+
_np.frombuffer(m["embedding"], dtype="float32") for m in msgs
|
|
219
|
+
])
|
|
220
|
+
sim = embs @ embs.T
|
|
221
|
+
clustered = [False] * len(msgs)
|
|
222
|
+
for i in range(len(msgs)):
|
|
223
|
+
if clustered[i]:
|
|
224
|
+
continue
|
|
225
|
+
members = [i]
|
|
226
|
+
for j in range(i + 1, len(msgs)):
|
|
227
|
+
if not clustered[j] and sim[i, j] >= 0.80:
|
|
228
|
+
members.append(j)
|
|
229
|
+
if len(members) >= 3:
|
|
230
|
+
for k in members:
|
|
231
|
+
clustered[k] = True
|
|
232
|
+
sub = sim[_np.ix_(members, members)]
|
|
233
|
+
rep_idx = members[int(_np.argmax(sub.mean(axis=1)))]
|
|
234
|
+
rep = msgs[rep_idx]
|
|
235
|
+
member_uuids = sorted(msgs[k]["uuid"] for k in members)
|
|
236
|
+
cluster_key = "cluster:" + ",".join(
|
|
237
|
+
u[:8] for u in member_uuids[:6]
|
|
238
|
+
)
|
|
239
|
+
if conn.execute(
|
|
240
|
+
"SELECT 1 FROM extract_candidates WHERE source_uuid=? "
|
|
241
|
+
"AND status IN ('pending','accepted')",
|
|
242
|
+
(cluster_key,),
|
|
243
|
+
).fetchone():
|
|
244
|
+
skipped += 1
|
|
245
|
+
continue
|
|
246
|
+
conn.execute(
|
|
247
|
+
"INSERT INTO extract_candidates (kind, source_uuid, "
|
|
248
|
+
"source_cid, content, rationale, status, created_at) "
|
|
249
|
+
"VALUES (?,?,?,?,?,?,?)",
|
|
250
|
+
("note", cluster_key, sid, rep["content"][:2000],
|
|
251
|
+
f"H4 paraphrase_repeat n={len(members)} "
|
|
252
|
+
f"sess={sid[:8]} centroid={rep['uuid'][:8]}",
|
|
253
|
+
"pending", now),
|
|
254
|
+
)
|
|
255
|
+
counts["note"] += 1
|
|
256
|
+
_emit(conn, "extract_recent",
|
|
257
|
+
summary=" ".join(f"{k}={v}" for k, v in counts.items()))
|
|
258
|
+
conn.commit()
|
|
259
|
+
return (
|
|
260
|
+
f"ok window={window_min}m scanned={len(rows)} "
|
|
261
|
+
f"verbatim={counts['verbatim']} distill={counts['distill']} "
|
|
262
|
+
f"concept={counts['concept']} note={counts['note']} "
|
|
263
|
+
f"skipped_existing={skipped}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@mcp.tool()
|
|
268
|
+
def review_candidates(status: str = "pending", k: int = 20) -> str:
|
|
269
|
+
"""status ∈ {pending, accepted, rejected, all}. Newest first."""
|
|
270
|
+
valid = ("pending", "accepted", "rejected", "all")
|
|
271
|
+
if status not in valid:
|
|
272
|
+
return f"ERR bad_status={status}"
|
|
273
|
+
conn = get_db()
|
|
274
|
+
sql = (
|
|
275
|
+
"SELECT id, kind, source_uuid, source_cid, content, rationale, "
|
|
276
|
+
"status, created_at FROM extract_candidates "
|
|
277
|
+
)
|
|
278
|
+
if status == "all":
|
|
279
|
+
rows = conn.execute(
|
|
280
|
+
sql + "ORDER BY created_at DESC LIMIT ?", (max(1, int(k)),)
|
|
281
|
+
).fetchall()
|
|
282
|
+
else:
|
|
283
|
+
rows = conn.execute(
|
|
284
|
+
sql + "WHERE status=? ORDER BY created_at DESC LIMIT ?",
|
|
285
|
+
(status, max(1, int(k))),
|
|
286
|
+
).fetchall()
|
|
287
|
+
if not rows:
|
|
288
|
+
return f"no_candidates status={status}"
|
|
289
|
+
now = int(time.time())
|
|
290
|
+
out = [f"candidates n={len(rows)} status={status}"]
|
|
291
|
+
for r in rows:
|
|
292
|
+
snip = r["content"][:240].replace("\n", " ")
|
|
293
|
+
if len(r["content"]) > 240:
|
|
294
|
+
snip += "…"
|
|
295
|
+
out.append(
|
|
296
|
+
f" #{r['id']} {r['kind']} cid={(r['source_cid'] or '-')[:8]} "
|
|
297
|
+
f"age={fmt_age(now - r['created_at'])}_ago"
|
|
298
|
+
)
|
|
299
|
+
out.append(f" why={r['rationale'] or '?'}")
|
|
300
|
+
out.append(f" {q(snip)}")
|
|
301
|
+
return "\n".join(out)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
_VALID_TARGET_KINDS = ("note", "concept", "distill", "verbatim")
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@mcp.tool()
|
|
308
|
+
def accept_candidate(id: int, target_kind: str = "",
|
|
309
|
+
thread_id: str = "") -> str:
|
|
310
|
+
"""Materialize candidate into its target table.
|
|
311
|
+
target_kind overrides candidate's kind. thread_id optional."""
|
|
312
|
+
conn = get_db()
|
|
313
|
+
_ensure_session(conn)
|
|
314
|
+
r = conn.execute(
|
|
315
|
+
"SELECT * FROM extract_candidates WHERE id=?", (int(id),)
|
|
316
|
+
).fetchone()
|
|
317
|
+
if not r:
|
|
318
|
+
return f"ERR candidate_not_found={id}"
|
|
319
|
+
if r["status"] != "pending":
|
|
320
|
+
return f"ERR not_pending status={r['status']}"
|
|
321
|
+
kind = (target_kind or r["kind"]).strip()
|
|
322
|
+
if kind not in _VALID_TARGET_KINDS:
|
|
323
|
+
return f"ERR bad_target_kind={kind}"
|
|
324
|
+
tid = thread_id.strip() or None
|
|
325
|
+
if tid and not conn.execute(
|
|
326
|
+
"SELECT 1 FROM threads WHERE id=?", (tid,)
|
|
327
|
+
).fetchone():
|
|
328
|
+
return f"ERR thread_not_found={tid}"
|
|
329
|
+
now = int(time.time())
|
|
330
|
+
content = r["content"]
|
|
331
|
+
placed = ""
|
|
332
|
+
if kind == "verbatim":
|
|
333
|
+
cur = conn.execute(
|
|
334
|
+
"INSERT INTO verbatim (speaker, content, thread_id, created_at, "
|
|
335
|
+
"session_id) VALUES (?,?,?,?,?)",
|
|
336
|
+
("user", content, tid, now, identity._session_id),
|
|
337
|
+
)
|
|
338
|
+
placed = f"verbatim id={cur.lastrowid}"
|
|
339
|
+
elif kind == "note":
|
|
340
|
+
emb = _embed(content)
|
|
341
|
+
cur = conn.execute(
|
|
342
|
+
"INSERT INTO notes (thread_id, content, kind, created_at, "
|
|
343
|
+
"session_id, embedding) VALUES (?,?,?,?,?,?)",
|
|
344
|
+
(tid, content, "insight", now, identity._session_id, emb),
|
|
345
|
+
)
|
|
346
|
+
placed = f"note id={cur.lastrowid} thread={tid or '-'}"
|
|
347
|
+
elif kind == "concept":
|
|
348
|
+
pid = gen_concept_id(conn)
|
|
349
|
+
cid = _detect_self_cid()
|
|
350
|
+
conn.execute(
|
|
351
|
+
"INSERT INTO concepts (id, description, triangulation_notes, "
|
|
352
|
+
"confidence, source_thread, registered_by_cid, registered_at, "
|
|
353
|
+
"last_evidence_at) VALUES (?,?,?,?,?,?,?,?)",
|
|
354
|
+
(pid, content, r["rationale"], "low", tid, cid, now, now),
|
|
355
|
+
)
|
|
356
|
+
placed = f"concept id={pid}"
|
|
357
|
+
elif kind == "distill":
|
|
358
|
+
pid = gen_distill_id(conn)
|
|
359
|
+
cid = _detect_self_cid()
|
|
360
|
+
conn.execute(
|
|
361
|
+
"INSERT INTO distill (id, content, kind, confidence, "
|
|
362
|
+
"source_thread, source_cid, created_at) "
|
|
363
|
+
"VALUES (?,?,?,?,?,?,?)",
|
|
364
|
+
(pid, content, "insight", "medium", tid, cid, now),
|
|
365
|
+
)
|
|
366
|
+
if cid:
|
|
367
|
+
conn.execute(
|
|
368
|
+
"INSERT INTO distill_votes (distill_id, voter_cid, weight, "
|
|
369
|
+
"voted_at) VALUES (?,?,?,?)",
|
|
370
|
+
(pid, cid, 1.0, now),
|
|
371
|
+
)
|
|
372
|
+
conn.execute(
|
|
373
|
+
"UPDATE distill SET vote_sum=1.0, vote_count=1 WHERE id=?",
|
|
374
|
+
(pid,),
|
|
375
|
+
)
|
|
376
|
+
placed = f"distill id={pid}"
|
|
377
|
+
conn.execute(
|
|
378
|
+
"UPDATE extract_candidates SET status='accepted', decided_at=? "
|
|
379
|
+
"WHERE id=?",
|
|
380
|
+
(now, int(id)),
|
|
381
|
+
)
|
|
382
|
+
_emit(conn, f"accept_candidate:{kind}", target=str(id), summary=placed)
|
|
383
|
+
conn.commit()
|
|
384
|
+
return f"ok accepted #{id} → {placed}"
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@mcp.tool()
|
|
388
|
+
def reject_candidate(id: int, reason: str = "") -> str:
|
|
389
|
+
"""Mark rejected. Reason appended to rationale for heuristic tuning."""
|
|
390
|
+
conn = get_db()
|
|
391
|
+
_ensure_session(conn)
|
|
392
|
+
r = conn.execute(
|
|
393
|
+
"SELECT id, rationale, status FROM extract_candidates WHERE id=?",
|
|
394
|
+
(int(id),),
|
|
395
|
+
).fetchone()
|
|
396
|
+
if not r:
|
|
397
|
+
return f"ERR candidate_not_found={id}"
|
|
398
|
+
if r["status"] != "pending":
|
|
399
|
+
return f"ERR not_pending status={r['status']}"
|
|
400
|
+
now = int(time.time())
|
|
401
|
+
new_r = r["rationale"] or ""
|
|
402
|
+
if reason:
|
|
403
|
+
new_r = (new_r + f" | rejected: {reason}").lstrip(" |")[:500]
|
|
404
|
+
conn.execute(
|
|
405
|
+
"UPDATE extract_candidates SET status='rejected', decided_at=?, "
|
|
406
|
+
"rationale=? WHERE id=?",
|
|
407
|
+
(now, new_r, int(id)),
|
|
408
|
+
)
|
|
409
|
+
_emit(conn, "reject_candidate", target=str(id), summary=reason)
|
|
410
|
+
conn.commit()
|
|
411
|
+
return f"ok rejected #{id}"
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Knowledge-graph MCP tools.
|
|
2
|
+
|
|
3
|
+
Extracted from server.py. Provides typed edges between entities
|
|
4
|
+
(threads, notes, concepts, distillates, tasks, signals, probes)
|
|
5
|
+
with link/unlink primitives and a BFS traversal (`neighbors`).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import time
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .._mcp import mcp
|
|
13
|
+
from ..db import get_db
|
|
14
|
+
from ..helpers import fmt_age
|
|
15
|
+
from ..identity import _ensure_session, _detect_self_cid, _emit
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
EDGE_KINDS = ("thread", "note", "concept", "distill", "task", "signal", "probe")
|
|
19
|
+
EDGE_RELATIONS_HINT = (
|
|
20
|
+
"refines", "contradicts", "exemplifies", "depends_on",
|
|
21
|
+
"mentions", "elaborates", "supersedes",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _entity_table(kind: str) -> Optional[str]:
|
|
26
|
+
"""Map kind → table name for existence checks."""
|
|
27
|
+
return {
|
|
28
|
+
"thread": "threads", "note": "notes", "concept": "concepts",
|
|
29
|
+
"distill": "distill", "task": "tasks", "signal": "signals",
|
|
30
|
+
"probe": "probes",
|
|
31
|
+
}.get(kind)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _entity_exists(conn: sqlite3.Connection, kind: str, eid: str) -> bool:
|
|
35
|
+
table = _entity_table(kind)
|
|
36
|
+
if not table:
|
|
37
|
+
return False
|
|
38
|
+
try:
|
|
39
|
+
return conn.execute(
|
|
40
|
+
f"SELECT 1 FROM {table} WHERE id=?", (eid,)
|
|
41
|
+
).fetchone() is not None
|
|
42
|
+
except sqlite3.OperationalError:
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _snippet_for(conn: sqlite3.Connection, kind: str, eid: str) -> str:
|
|
47
|
+
"""Pull a short content preview for an entity, regardless of kind."""
|
|
48
|
+
table = _entity_table(kind)
|
|
49
|
+
if not table:
|
|
50
|
+
return "?"
|
|
51
|
+
field_map = {
|
|
52
|
+
"thread": "question", "note": "content", "concept": "description",
|
|
53
|
+
"distill": "content", "task": "prompt", "signal": "content",
|
|
54
|
+
"probe": "prompt",
|
|
55
|
+
}
|
|
56
|
+
field = field_map.get(kind, "")
|
|
57
|
+
if not field:
|
|
58
|
+
return "?"
|
|
59
|
+
try:
|
|
60
|
+
row = conn.execute(
|
|
61
|
+
f"SELECT {field} v FROM {table} WHERE id=?", (eid,)
|
|
62
|
+
).fetchone()
|
|
63
|
+
except sqlite3.OperationalError:
|
|
64
|
+
return "?"
|
|
65
|
+
if not row or not row["v"]:
|
|
66
|
+
return "(empty)"
|
|
67
|
+
text = row["v"][:90].replace("\n", " ")
|
|
68
|
+
if len(row["v"]) > 90:
|
|
69
|
+
text += "…"
|
|
70
|
+
return text
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@mcp.tool()
|
|
74
|
+
def link(from_kind: str, from_id: str, to_kind: str, to_id: str,
|
|
75
|
+
relation: str, weight: float = 1.0) -> str:
|
|
76
|
+
"""Create a typed edge between two entities.
|
|
77
|
+
|
|
78
|
+
Kinds: thread, note, concept, distill, task, signal, probe.
|
|
79
|
+
Relations (suggested, free-form ok): refines, contradicts, exemplifies,
|
|
80
|
+
depends_on, mentions, elaborates, supersedes.
|
|
81
|
+
|
|
82
|
+
Existing edge with same (from, to, relation) is replaced (re-linking
|
|
83
|
+
means updating weight/timestamp, not duplicating)."""
|
|
84
|
+
if from_kind not in EDGE_KINDS:
|
|
85
|
+
return f"ERR bad_from_kind={from_kind} (use {','.join(EDGE_KINDS)})"
|
|
86
|
+
if to_kind not in EDGE_KINDS:
|
|
87
|
+
return f"ERR bad_to_kind={to_kind}"
|
|
88
|
+
if not relation.strip():
|
|
89
|
+
return "ERR empty_relation"
|
|
90
|
+
if not (-10 <= weight <= 10):
|
|
91
|
+
return f"ERR weight_out_of_range={weight}"
|
|
92
|
+
conn = get_db()
|
|
93
|
+
_ensure_session(conn)
|
|
94
|
+
if not _entity_exists(conn, from_kind, from_id.strip()):
|
|
95
|
+
return f"ERR from_not_found={from_kind}:{from_id}"
|
|
96
|
+
if not _entity_exists(conn, to_kind, to_id.strip()):
|
|
97
|
+
return f"ERR to_not_found={to_kind}:{to_id}"
|
|
98
|
+
existing = conn.execute(
|
|
99
|
+
"SELECT id FROM edges WHERE from_kind=? AND from_id=? AND "
|
|
100
|
+
"to_kind=? AND to_id=? AND relation=?",
|
|
101
|
+
(from_kind, from_id.strip(), to_kind, to_id.strip(), relation.strip()),
|
|
102
|
+
).fetchone()
|
|
103
|
+
now_t = int(time.time())
|
|
104
|
+
cid = _detect_self_cid()
|
|
105
|
+
if existing:
|
|
106
|
+
conn.execute(
|
|
107
|
+
"UPDATE edges SET weight=?, created_by_cid=?, created_at=? "
|
|
108
|
+
"WHERE id=?",
|
|
109
|
+
(weight, cid, now_t, existing["id"]),
|
|
110
|
+
)
|
|
111
|
+
eid = existing["id"]
|
|
112
|
+
else:
|
|
113
|
+
cur = conn.execute(
|
|
114
|
+
"INSERT INTO edges (from_kind, from_id, to_kind, to_id, "
|
|
115
|
+
"relation, weight, created_by_cid, created_at) "
|
|
116
|
+
"VALUES (?,?,?,?,?,?,?,?)",
|
|
117
|
+
(from_kind, from_id.strip(), to_kind, to_id.strip(),
|
|
118
|
+
relation.strip(), weight, cid, now_t),
|
|
119
|
+
)
|
|
120
|
+
eid = cur.lastrowid
|
|
121
|
+
_emit(conn, "link", target=f"{from_kind}:{from_id}",
|
|
122
|
+
summary=f"-{relation}-> {to_kind}:{to_id}")
|
|
123
|
+
conn.commit()
|
|
124
|
+
return f"ok edge={eid} {from_kind}:{from_id} -{relation}-> {to_kind}:{to_id}"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@mcp.tool()
|
|
128
|
+
def unlink(edge_id: int) -> str:
|
|
129
|
+
"""Remove an edge by id."""
|
|
130
|
+
conn = get_db()
|
|
131
|
+
_ensure_session(conn)
|
|
132
|
+
cur = conn.execute("DELETE FROM edges WHERE id=?", (int(edge_id),))
|
|
133
|
+
if cur.rowcount == 0:
|
|
134
|
+
return f"ERR edge_not_found={edge_id}"
|
|
135
|
+
_emit(conn, "unlink", target=str(edge_id))
|
|
136
|
+
conn.commit()
|
|
137
|
+
return "ok"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@mcp.tool()
|
|
141
|
+
def neighbors(kind: str, id: str, depth: int = 1, max_n: int = 12) -> str:
|
|
142
|
+
"""BFS the graph from a starting node up to `depth` hops away.
|
|
143
|
+
Returns each visited node with its kind, id, and a short content snippet
|
|
144
|
+
pulled from its native table. Both directions of edges traversed."""
|
|
145
|
+
if kind not in EDGE_KINDS:
|
|
146
|
+
return f"ERR bad_kind={kind}"
|
|
147
|
+
conn = get_db()
|
|
148
|
+
if not _entity_exists(conn, kind, id.strip()):
|
|
149
|
+
return f"ERR not_found={kind}:{id}"
|
|
150
|
+
depth = max(1, min(int(depth), 4))
|
|
151
|
+
max_n = max(1, min(int(max_n), 50))
|
|
152
|
+
visited: set[tuple[str, str]] = {(kind, id.strip())}
|
|
153
|
+
frontier = [(kind, id.strip(), 0)]
|
|
154
|
+
nodes_out: list[tuple[str, str, int, str]] = []
|
|
155
|
+
while frontier and len(nodes_out) < max_n:
|
|
156
|
+
k, eid, d = frontier.pop(0)
|
|
157
|
+
if d >= depth:
|
|
158
|
+
continue
|
|
159
|
+
rows = conn.execute(
|
|
160
|
+
"SELECT to_kind AS nk, to_id AS nid, relation, weight FROM edges "
|
|
161
|
+
"WHERE from_kind=? AND from_id=? "
|
|
162
|
+
"UNION "
|
|
163
|
+
"SELECT from_kind AS nk, from_id AS nid, relation, weight FROM edges "
|
|
164
|
+
"WHERE to_kind=? AND to_id=?",
|
|
165
|
+
(k, eid, k, eid),
|
|
166
|
+
).fetchall()
|
|
167
|
+
for r in rows:
|
|
168
|
+
nk, nid = r["nk"], r["nid"]
|
|
169
|
+
key = (nk, nid)
|
|
170
|
+
if key in visited:
|
|
171
|
+
continue
|
|
172
|
+
visited.add(key)
|
|
173
|
+
snippet = _snippet_for(conn, nk, nid)
|
|
174
|
+
nodes_out.append((nk, nid, d + 1, snippet))
|
|
175
|
+
frontier.append((nk, nid, d + 1))
|
|
176
|
+
if len(nodes_out) >= max_n:
|
|
177
|
+
break
|
|
178
|
+
if not nodes_out:
|
|
179
|
+
return f"no_neighbors {kind}:{id} depth={depth}"
|
|
180
|
+
lines = [f"neighbors {kind}:{id} depth={depth} n={len(nodes_out)}"]
|
|
181
|
+
for k, eid, d, sn in nodes_out:
|
|
182
|
+
lines.append(f" [+{d}] {k}:{eid} — {sn}")
|
|
183
|
+
return "\n".join(lines)
|