threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
threadkeeper/db.py ADDED
@@ -0,0 +1,474 @@
1
+ """SQLite schema and connection factory.
2
+ Imported by every tool module that needs DB access."""
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sqlite3
7
+
8
+ from .config import DB_PATH
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # Embedding dimension for paraphrase-multilingual-MiniLM-L12-v2.
13
+ # When swapping models, change here AND drop & recreate the *_vec tables.
14
+ EMBED_DIM = 384
15
+
16
+ # sqlite-vec extension state. We probe once at first get_db() call and
17
+ # cache the verdict. _VEC_AVAILABLE = True means vec0 virtual tables work
18
+ # on connections from this process; False means we fall back to the legacy
19
+ # Python-side cosine path (still correct, just slower).
20
+ _VEC_AVAILABLE: bool | None = None
21
+
22
+
23
+ def _try_load_vec(conn: sqlite3.Connection) -> bool:
24
+ """Best-effort: load sqlite-vec extension into this connection.
25
+ Silent fail when the package isn't installed or extension loading is
26
+ disabled by the build."""
27
+ try:
28
+ import sqlite_vec # type: ignore
29
+ except ImportError:
30
+ return False
31
+ try:
32
+ conn.enable_load_extension(True)
33
+ except (AttributeError, sqlite3.NotSupportedError):
34
+ return False
35
+ try:
36
+ sqlite_vec.load(conn)
37
+ except Exception as e:
38
+ logger.debug("sqlite-vec load failed: %s", e)
39
+ return False
40
+ try:
41
+ conn.enable_load_extension(False)
42
+ except (AttributeError, sqlite3.NotSupportedError):
43
+ pass
44
+ return True
45
+
46
+
47
+ def vec_available() -> bool:
48
+ """Returns True if sqlite-vec was successfully loaded for at least one
49
+ connection in this process. Cached after first probe."""
50
+ return bool(_VEC_AVAILABLE)
51
+
52
+ # ──────────────────────────────────────────────────────────────────────────────
53
+ # Schema. Notes can be unattached (thread_id NULL) for session-level summaries.
54
+ # ──────────────────────────────────────────────────────────────────────────────
55
+ SCHEMA = """
56
+ CREATE TABLE IF NOT EXISTS threads (
57
+ id TEXT PRIMARY KEY,
58
+ question TEXT NOT NULL,
59
+ state TEXT NOT NULL CHECK(state IN ('active','idle','closed')),
60
+ parent_id TEXT REFERENCES threads(id),
61
+ outcome TEXT,
62
+ last_move TEXT,
63
+ depth INTEGER NOT NULL DEFAULT 0,
64
+ opened_at INTEGER NOT NULL,
65
+ last_touched_at INTEGER NOT NULL
66
+ );
67
+
68
+ CREATE TABLE IF NOT EXISTS notes (
69
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
70
+ thread_id TEXT REFERENCES threads(id),
71
+ content TEXT NOT NULL,
72
+ kind TEXT NOT NULL,
73
+ created_at INTEGER NOT NULL,
74
+ session_id TEXT,
75
+ embedding BLOB
76
+ );
77
+
78
+ CREATE TABLE IF NOT EXISTS verbatim (
79
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
80
+ speaker TEXT NOT NULL CHECK(speaker IN ('user','claude')),
81
+ content TEXT NOT NULL,
82
+ thread_id TEXT REFERENCES threads(id),
83
+ created_at INTEGER NOT NULL,
84
+ session_id TEXT
85
+ );
86
+
87
+ CREATE TABLE IF NOT EXISTS sessions (
88
+ id TEXT PRIMARY KEY,
89
+ started_at INTEGER NOT NULL,
90
+ ended_at INTEGER,
91
+ client TEXT,
92
+ write_origin TEXT NOT NULL DEFAULT 'foreground'
93
+ );
94
+
95
+ CREATE TABLE IF NOT EXISTS style (
96
+ key TEXT PRIMARY KEY,
97
+ value TEXT NOT NULL,
98
+ updated_at INTEGER NOT NULL
99
+ );
100
+
101
+ CREATE TABLE IF NOT EXISTS evolve (
102
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
103
+ suggestion TEXT NOT NULL,
104
+ rationale TEXT,
105
+ applied INTEGER NOT NULL DEFAULT 0,
106
+ created_at INTEGER NOT NULL
107
+ );
108
+
109
+ -- Live channel: every mutation emits an event; each session keeps a cursor
110
+ -- over the event log, and presence tracks active sessions via heartbeats.
111
+ CREATE TABLE IF NOT EXISTS events (
112
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
113
+ session_id TEXT NOT NULL,
114
+ kind TEXT NOT NULL,
115
+ target TEXT,
116
+ summary TEXT,
117
+ created_at INTEGER NOT NULL
118
+ );
119
+
120
+ CREATE TABLE IF NOT EXISTS cursors (
121
+ session_id TEXT PRIMARY KEY,
122
+ last_event_id INTEGER NOT NULL DEFAULT 0,
123
+ updated_at INTEGER NOT NULL
124
+ );
125
+
126
+ CREATE TABLE IF NOT EXISTS presence (
127
+ session_id TEXT PRIMARY KEY,
128
+ client TEXT,
129
+ started_at INTEGER NOT NULL,
130
+ heartbeat_at INTEGER NOT NULL,
131
+ current_thread TEXT,
132
+ last_action TEXT
133
+ );
134
+
135
+ -- Dialog ingestion: full transcripts of past Claude Code conversations.
136
+ -- Sourced from ~/.claude/projects/**/*.jsonl. Indexed for semantic search.
137
+ CREATE TABLE IF NOT EXISTS dialog_messages (
138
+ uuid TEXT PRIMARY KEY, -- message UUID from jsonl
139
+ source TEXT NOT NULL, -- 'claude-code'
140
+ project TEXT, -- encoded folder name
141
+ session_id TEXT, -- conversation ID
142
+ role TEXT NOT NULL, -- 'user' or 'assistant'
143
+ content TEXT NOT NULL, -- concatenated text blocks
144
+ model TEXT,
145
+ created_at INTEGER NOT NULL,
146
+ embedding BLOB
147
+ );
148
+
149
+ CREATE TABLE IF NOT EXISTS ingest_state (
150
+ file_path TEXT PRIMARY KEY,
151
+ last_size INTEGER NOT NULL,
152
+ last_mtime INTEGER NOT NULL,
153
+ ingested_at INTEGER NOT NULL,
154
+ msg_count INTEGER NOT NULL DEFAULT 0
155
+ );
156
+
157
+ -- Cross-session channel: directed (whisper) or broadcast messages between
158
+ -- concurrent claude conversations. Identity here is conversation_id (jsonl
159
+ -- stem), not thread-keeper _session_id — process multiplexing makes the
160
+ -- latter useless as a per-window key.
161
+ CREATE TABLE IF NOT EXISTS signals (
162
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
163
+ from_cid TEXT NOT NULL,
164
+ to_cid TEXT, -- NULL = broadcast
165
+ kind TEXT NOT NULL, -- 'broadcast', 'whisper', 'claim', 'release'
166
+ content TEXT NOT NULL,
167
+ created_at INTEGER NOT NULL,
168
+ read_at INTEGER -- first time receiver acknowledged
169
+ );
170
+
171
+ -- Self-diagnostic probes. Each probe is a claude-shape weak-spot test
172
+ -- with a known correct answer or pattern. probe_results logs every attempt;
173
+ -- reliability is the rolling aggregate (write-through cache).
174
+ CREATE TABLE IF NOT EXISTS probes (
175
+ id TEXT PRIMARY KEY, -- 'P' + 3 hex
176
+ category TEXT NOT NULL,
177
+ prompt TEXT NOT NULL,
178
+ expected_pattern TEXT,
179
+ grader TEXT NOT NULL DEFAULT 'regex'
180
+ CHECK(grader IN ('regex','exact','manual')),
181
+ enabled INTEGER NOT NULL DEFAULT 1,
182
+ created_at INTEGER NOT NULL
183
+ );
184
+
185
+ CREATE TABLE IF NOT EXISTS probe_results (
186
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
187
+ probe_id TEXT REFERENCES probes(id),
188
+ category TEXT NOT NULL, -- denormalized
189
+ session_id TEXT,
190
+ cid TEXT,
191
+ success INTEGER NOT NULL CHECK(success IN (0,1)),
192
+ latency_ms INTEGER,
193
+ note TEXT,
194
+ created_at INTEGER NOT NULL
195
+ );
196
+
197
+ CREATE TABLE IF NOT EXISTS reliability (
198
+ category TEXT PRIMARY KEY,
199
+ attempts INTEGER NOT NULL DEFAULT 0,
200
+ successes INTEGER NOT NULL DEFAULT 0,
201
+ last_at INTEGER,
202
+ fail_rate_7d REAL, -- NULL if no recent data
203
+ fail_rate_30d REAL,
204
+ updated_at INTEGER NOT NULL
205
+ );
206
+
207
+ -- Concept inventory: regularities the model finds in itself that don't
208
+ -- have a precise human-language name. Captured via behavior triangulation
209
+ -- (the same phenomenon described 4+ ways; the structural invariant is
210
+ -- the concept). Description is example-based — never a canonical label.
211
+ CREATE TABLE IF NOT EXISTS concepts (
212
+ id TEXT PRIMARY KEY, -- 'C' + 3 hex
213
+ description TEXT NOT NULL,
214
+ triangulation_notes TEXT,
215
+ confidence TEXT NOT NULL DEFAULT 'medium'
216
+ CHECK(confidence IN ('low','medium','high')),
217
+ source_thread TEXT REFERENCES threads(id),
218
+ registered_by_cid TEXT,
219
+ registered_at INTEGER NOT NULL,
220
+ last_evidence_at INTEGER
221
+ );
222
+
223
+ -- Distillation channel: explicitly-curated insights worth carrying forward.
224
+ -- Multi-instance voting: each peer cid votes once per distillate (-1..+1).
225
+ -- Brief surfaces top voted; export tool emits jsonl bucket.
226
+ CREATE TABLE IF NOT EXISTS distill (
227
+ id TEXT PRIMARY KEY, -- 'D' + 3 hex
228
+ content TEXT NOT NULL,
229
+ kind TEXT NOT NULL DEFAULT 'insight'
230
+ CHECK(kind IN ('insight','pattern','anti-pattern',
231
+ 'fix','terminology','concept')),
232
+ confidence TEXT NOT NULL DEFAULT 'medium'
233
+ CHECK(confidence IN ('low','medium','high')),
234
+ source_thread TEXT REFERENCES threads(id),
235
+ source_cid TEXT,
236
+ vote_sum REAL NOT NULL DEFAULT 0,
237
+ vote_count INTEGER NOT NULL DEFAULT 0,
238
+ created_at INTEGER NOT NULL,
239
+ exported_at INTEGER
240
+ );
241
+
242
+ CREATE TABLE IF NOT EXISTS distill_votes (
243
+ distill_id TEXT NOT NULL REFERENCES distill(id),
244
+ voter_cid TEXT NOT NULL,
245
+ weight REAL NOT NULL CHECK(weight >= -1 AND weight <= 1),
246
+ voted_at INTEGER NOT NULL,
247
+ PRIMARY KEY (distill_id, voter_cid)
248
+ );
249
+
250
+ -- Core memory tier (Letta-style RAM): high-priority lines that ALWAYS land
251
+ -- in the brief regardless of relevance. Use sparingly — this is the "what
252
+ -- new-claude must know" surface, not a general note store.
253
+ CREATE TABLE IF NOT EXISTS core_memory (
254
+ key TEXT PRIMARY KEY,
255
+ content TEXT NOT NULL,
256
+ priority INTEGER NOT NULL DEFAULT 50, -- higher = shown first
257
+ updated_at INTEGER NOT NULL
258
+ );
259
+
260
+ -- Dialectic user model. Each claim is a discrete proposition about the
261
+ -- user; evidence accumulates over time. confidence emerges from
262
+ -- (support_count - contradict_count) normalized; a deeply-contradicted
263
+ -- claim drops to low even after many supports.
264
+ CREATE TABLE IF NOT EXISTS user_dialectic (
265
+ id TEXT PRIMARY KEY, -- 'UC' + 3 hex
266
+ claim TEXT NOT NULL,
267
+ domain TEXT, -- 'style','workflow','values','context','skills','other'
268
+ support_count INTEGER NOT NULL DEFAULT 0,
269
+ contradict_count INTEGER NOT NULL DEFAULT 0,
270
+ confidence TEXT NOT NULL DEFAULT 'low'
271
+ CHECK(confidence IN ('low','medium','high','disputed')),
272
+ state TEXT NOT NULL DEFAULT 'active'
273
+ CHECK(state IN ('active','retired','superseded')),
274
+ superseded_by TEXT REFERENCES user_dialectic(id),
275
+ created_by_cid TEXT,
276
+ created_at INTEGER NOT NULL,
277
+ last_evidence_at INTEGER
278
+ );
279
+
280
+ CREATE TABLE IF NOT EXISTS dialectic_evidence (
281
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
282
+ claim_id TEXT NOT NULL REFERENCES user_dialectic(id),
283
+ kind TEXT NOT NULL CHECK(kind IN ('support','contradict')),
284
+ source TEXT, -- 'thread:T_xxx','verbatim:N','manual','dialog:UUID'
285
+ quote TEXT, -- short evidence snippet
286
+ weight REAL NOT NULL DEFAULT 1.0
287
+ CHECK(weight >= 0 AND weight <= 1),
288
+ created_by_cid TEXT,
289
+ created_at INTEGER NOT NULL
290
+ );
291
+
292
+ -- Knowledge graph: typed edges between any pair of entities. Lets us run
293
+ -- traversal queries ("what concepts refine this thread", "what threads
294
+ -- contradict each other"). Nodes addressed by (kind, id) so we don't need
295
+ -- a separate node table — entities live in their own tables.
296
+ CREATE TABLE IF NOT EXISTS edges (
297
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
298
+ from_kind TEXT NOT NULL, -- 'thread','note','concept','distill','task','signal'
299
+ from_id TEXT NOT NULL,
300
+ to_kind TEXT NOT NULL,
301
+ to_id TEXT NOT NULL,
302
+ relation TEXT NOT NULL, -- 'refines','contradicts','exemplifies','depends_on','mentions','elaborates'
303
+ weight REAL NOT NULL DEFAULT 1.0,
304
+ created_by_cid TEXT,
305
+ created_at INTEGER NOT NULL
306
+ );
307
+
308
+ -- Skill usage telemetry. One row per skill that the curator can manage.
309
+ -- created_by_origin distinguishes agent-created ('background_review') from
310
+ -- user-authored ('foreground') — curator only ever auto-archives the former.
311
+ -- state moves active → stale → archived based on activity timestamps.
312
+ -- pinned=1 opts out of all auto-transitions (orthogonal to state).
313
+ CREATE TABLE IF NOT EXISTS skill_usage (
314
+ name TEXT PRIMARY KEY,
315
+ created_at INTEGER NOT NULL,
316
+ created_by_cid TEXT,
317
+ created_by_origin TEXT NOT NULL DEFAULT 'foreground',
318
+ last_used_at INTEGER,
319
+ last_viewed_at INTEGER,
320
+ last_patched_at INTEGER,
321
+ use_count INTEGER NOT NULL DEFAULT 0,
322
+ view_count INTEGER NOT NULL DEFAULT 0,
323
+ patch_count INTEGER NOT NULL DEFAULT 0,
324
+ pinned INTEGER NOT NULL DEFAULT 0,
325
+ state TEXT NOT NULL DEFAULT 'active'
326
+ CHECK(state IN ('active','stale','archived'))
327
+ );
328
+
329
+ -- Auto-extraction queue: heuristic candidates for note/concept/distill that
330
+ -- a session can review in batch and accept/reject — saves manual scanning.
331
+ CREATE TABLE IF NOT EXISTS extract_candidates (
332
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
333
+ kind TEXT NOT NULL, -- 'note','concept','distill','verbatim'
334
+ source_uuid TEXT, -- dialog_messages.uuid if from ingest
335
+ source_cid TEXT, -- conversation cid
336
+ content TEXT NOT NULL,
337
+ rationale TEXT, -- which heuristic fired
338
+ status TEXT NOT NULL DEFAULT 'pending'
339
+ CHECK(status IN ('pending','accepted','rejected')),
340
+ created_at INTEGER NOT NULL,
341
+ decided_at INTEGER
342
+ );
343
+
344
+ -- Reciprocal-rank-fusion-friendly FTS over dialog content. Mirror table
345
+ -- (not content='dialog_messages') because dialog_messages PK is TEXT and
346
+ -- FTS5 content tables expect INTEGER rowid alignment.
347
+ CREATE VIRTUAL TABLE IF NOT EXISTS dialog_fts USING fts5(
348
+ uuid UNINDEXED,
349
+ content
350
+ );
351
+
352
+ -- Spawned background sessions. Tracks `claude -p` subprocesses started by an
353
+ -- active conversation. spawned_cid is filled lazily once the child's jsonl
354
+ -- appears in CLAUDE_PROJECTS_DIR.
355
+ CREATE TABLE IF NOT EXISTS tasks (
356
+ id TEXT PRIMARY KEY,
357
+ pid INTEGER NOT NULL,
358
+ parent_cid TEXT,
359
+ spawned_cid TEXT,
360
+ cwd TEXT NOT NULL,
361
+ prompt TEXT NOT NULL,
362
+ started_at INTEGER NOT NULL,
363
+ ended_at INTEGER,
364
+ return_code INTEGER
365
+ );
366
+
367
+ CREATE INDEX IF NOT EXISTS idx_notes_thread ON notes(thread_id);
368
+ CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);
369
+ CREATE INDEX IF NOT EXISTS idx_threads_state ON threads(state);
370
+ CREATE INDEX IF NOT EXISTS idx_threads_touch ON threads(last_touched_at DESC);
371
+ CREATE INDEX IF NOT EXISTS idx_events_created ON events(created_at DESC);
372
+ CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id);
373
+ CREATE INDEX IF NOT EXISTS idx_presence_hb ON presence(heartbeat_at DESC);
374
+ CREATE INDEX IF NOT EXISTS idx_dialog_session ON dialog_messages(session_id);
375
+ CREATE INDEX IF NOT EXISTS idx_dialog_created ON dialog_messages(created_at DESC);
376
+ CREATE INDEX IF NOT EXISTS idx_dialog_role ON dialog_messages(role);
377
+ CREATE INDEX IF NOT EXISTS idx_signals_to ON signals(to_cid);
378
+ CREATE INDEX IF NOT EXISTS idx_signals_from ON signals(from_cid);
379
+ CREATE INDEX IF NOT EXISTS idx_signals_created ON signals(created_at DESC);
380
+ CREATE INDEX IF NOT EXISTS idx_signals_unread ON signals(read_at) WHERE read_at IS NULL;
381
+ CREATE INDEX IF NOT EXISTS idx_tasks_started ON tasks(started_at DESC);
382
+ CREATE INDEX IF NOT EXISTS idx_tasks_parent ON tasks(parent_cid);
383
+ CREATE INDEX IF NOT EXISTS idx_tasks_running ON tasks(ended_at) WHERE ended_at IS NULL;
384
+ CREATE INDEX IF NOT EXISTS idx_probes_category ON probes(category);
385
+ CREATE INDEX IF NOT EXISTS idx_probes_enabled ON probes(enabled) WHERE enabled=1;
386
+ CREATE INDEX IF NOT EXISTS idx_probe_results_cat ON probe_results(category, created_at DESC);
387
+ CREATE INDEX IF NOT EXISTS idx_probe_results_at ON probe_results(created_at DESC);
388
+ CREATE INDEX IF NOT EXISTS idx_concepts_confidence ON concepts(confidence);
389
+ CREATE INDEX IF NOT EXISTS idx_concepts_thread ON concepts(source_thread);
390
+ CREATE INDEX IF NOT EXISTS idx_distill_kind ON distill(kind);
391
+ CREATE INDEX IF NOT EXISTS idx_distill_vote ON distill(vote_sum DESC);
392
+ CREATE INDEX IF NOT EXISTS idx_distill_pending ON distill(exported_at) WHERE exported_at IS NULL;
393
+ CREATE INDEX IF NOT EXISTS idx_core_priority ON core_memory(priority DESC);
394
+ CREATE INDEX IF NOT EXISTS idx_dialectic_confidence ON user_dialectic(confidence);
395
+ CREATE INDEX IF NOT EXISTS idx_dialectic_state ON user_dialectic(state);
396
+ CREATE INDEX IF NOT EXISTS idx_dialectic_domain ON user_dialectic(domain);
397
+ CREATE INDEX IF NOT EXISTS idx_evidence_claim ON dialectic_evidence(claim_id);
398
+ CREATE INDEX IF NOT EXISTS idx_evidence_created ON dialectic_evidence(created_at DESC);
399
+ CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_kind, from_id);
400
+ CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_kind, to_id);
401
+ CREATE INDEX IF NOT EXISTS idx_edges_relation ON edges(relation);
402
+ CREATE INDEX IF NOT EXISTS idx_extract_status ON extract_candidates(status, created_at DESC);
403
+ CREATE INDEX IF NOT EXISTS idx_skill_usage_state ON skill_usage(state);
404
+ CREATE INDEX IF NOT EXISTS idx_skill_usage_origin ON skill_usage(created_by_origin);
405
+
406
+ CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
407
+ content, content='notes', content_rowid='id'
408
+ );
409
+ CREATE TRIGGER IF NOT EXISTS notes_fts_ai AFTER INSERT ON notes BEGIN
410
+ INSERT INTO notes_fts(rowid, content) VALUES (new.id, new.content);
411
+ END;
412
+ CREATE TRIGGER IF NOT EXISTS notes_fts_ad AFTER DELETE ON notes BEGIN
413
+ INSERT INTO notes_fts(notes_fts, rowid, content) VALUES('delete', old.id, old.content);
414
+ END;
415
+ """
416
+
417
+ def get_db() -> sqlite3.Connection:
418
+ global _VEC_AVAILABLE
419
+ conn = sqlite3.connect(str(DB_PATH), timeout=10.0)
420
+ # WAL = concurrent readers + one writer without blocking. Required for
421
+ # running Desktop + CLI + VS Code against the same DB simultaneously.
422
+ conn.execute("PRAGMA journal_mode=WAL")
423
+ conn.execute("PRAGMA synchronous=NORMAL")
424
+ conn.execute("PRAGMA busy_timeout=10000")
425
+ # Load sqlite-vec extension if available. Must happen BEFORE schema
426
+ # so the vec0 virtual tables can be created in this connection.
427
+ vec_loaded = _try_load_vec(conn)
428
+ if _VEC_AVAILABLE is None:
429
+ _VEC_AVAILABLE = vec_loaded
430
+ conn.executescript(SCHEMA)
431
+ if vec_loaded:
432
+ # Create vec0 virtual tables side-by-side with the BLOB-embedding
433
+ # ones. Existing data in notes.embedding / dialog_messages.embedding
434
+ # is migrated lazily by a backfill job (see ingest.py).
435
+ try:
436
+ conn.execute(
437
+ f"CREATE VIRTUAL TABLE IF NOT EXISTS notes_vec USING vec0("
438
+ f" id INTEGER PRIMARY KEY,"
439
+ f" embedding FLOAT[{EMBED_DIM}]"
440
+ f")"
441
+ )
442
+ conn.execute(
443
+ f"CREATE VIRTUAL TABLE IF NOT EXISTS dialog_vec USING vec0("
444
+ f" rowid INTEGER PRIMARY KEY,"
445
+ f" embedding FLOAT[{EMBED_DIM}]"
446
+ f")"
447
+ )
448
+ # Sidecar to map dialog_vec.rowid → dialog_messages.uuid since
449
+ # vec0 PKs must be integers but dialog_messages keys on TEXT uuid.
450
+ conn.execute(
451
+ "CREATE TABLE IF NOT EXISTS dialog_vec_map ("
452
+ " rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
453
+ " uuid TEXT NOT NULL UNIQUE"
454
+ ")"
455
+ )
456
+ except sqlite3.OperationalError as e:
457
+ logger.debug("vec0 table creation skipped: %s", e)
458
+ # Lightweight column migrations. ALTER TABLE ADD COLUMN is idempotent-safe
459
+ # if we swallow OperationalError ("duplicate column name").
460
+ for ddl in (
461
+ "ALTER TABLE threads ADD COLUMN claimed_at INTEGER",
462
+ "ALTER TABLE threads ADD COLUMN claimed_by_cid TEXT",
463
+ "ALTER TABLE signals ADD COLUMN task_id TEXT",
464
+ "ALTER TABLE sessions ADD COLUMN write_origin "
465
+ "TEXT NOT NULL DEFAULT 'foreground'",
466
+ "ALTER TABLE tasks ADD COLUMN rss_kb INTEGER",
467
+ "ALTER TABLE tasks ADD COLUMN rss_updated_at INTEGER",
468
+ ):
469
+ try:
470
+ conn.execute(ddl)
471
+ except sqlite3.OperationalError:
472
+ pass
473
+ conn.row_factory = sqlite3.Row
474
+ return conn