threadkeeper 0.4.1__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/PKG-INFO +268 -59
  2. threadkeeper-0.5.0/README.md +544 -0
  3. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/pyproject.toml +1 -1
  4. threadkeeper-0.5.0/tests/test_candidate_reviewer.py +267 -0
  5. threadkeeper-0.5.0/tests/test_dialectic_tier.py +394 -0
  6. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_skill_hint.py +3 -3
  7. threadkeeper-0.5.0/tests/test_skill_tier.py +318 -0
  8. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_skills.py +4 -5
  9. threadkeeper-0.5.0/tests/test_spawn_config.py +322 -0
  10. threadkeeper-0.5.0/tests/test_validate_threads.py +146 -0
  11. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/__init__.py +26 -1
  12. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/base.py +25 -0
  13. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/claude_code.py +26 -0
  14. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/codex.py +18 -0
  15. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/copilot.py +16 -0
  16. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/gemini.py +16 -0
  17. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/brief.py +53 -10
  18. threadkeeper-0.5.0/threadkeeper/candidate_reviewer.py +341 -0
  19. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/config.py +19 -2
  20. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/curator.py +4 -6
  21. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/db.py +31 -0
  22. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/identity.py +89 -0
  23. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/nudges.py +6 -4
  24. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/review_prompts.py +10 -9
  25. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/server.py +2 -0
  26. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/shadow_review.py +4 -4
  27. threadkeeper-0.5.0/threadkeeper/spawn_config.py +203 -0
  28. threadkeeper-0.5.0/threadkeeper/tools/candidate_reviewer.py +95 -0
  29. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/dialectic.py +242 -45
  30. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/skills.py +162 -22
  31. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/spawn.py +111 -30
  32. threadkeeper-0.5.0/threadkeeper/tools/validate.py +238 -0
  33. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/PKG-INFO +268 -59
  34. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/SOURCES.txt +10 -1
  35. threadkeeper-0.4.1/README.md +0 -335
  36. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/LICENSE +0 -0
  37. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/setup.cfg +0 -0
  38. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_adapters.py +0 -0
  39. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_brief_sections.py +0 -0
  40. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_core_memory.py +0 -0
  41. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_curator.py +0 -0
  42. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_delegated_search.py +0 -0
  43. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_dialectic.py +0 -0
  44. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_error_paths.py +0 -0
  45. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_extract_daemon.py +0 -0
  46. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_i18n_multilang.py +0 -0
  47. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_identity.py +0 -0
  48. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_lessons.py +0 -0
  49. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_missed_spawns.py +0 -0
  50. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_nudges.py +0 -0
  51. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_process_health.py +0 -0
  52. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_shadow_review.py +0 -0
  53. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_skill_use_parser.py +0 -0
  54. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_skill_watcher.py +0 -0
  55. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_spawn_budget.py +0 -0
  56. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_spawn_hint.py +0 -0
  57. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_spawn_slim.py +0 -0
  58. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_threads.py +0 -0
  59. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_tools_smoke.py +0 -0
  60. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/tests/test_vec_search.py +0 -0
  61. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/__init__.py +0 -0
  62. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/_mcp.py +0 -0
  63. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/_setup.py +0 -0
  64. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/_hook_helpers.py +0 -0
  65. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/claude_desktop.py +0 -0
  66. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/adapters/vscode.py +0 -0
  67. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/embeddings.py +0 -0
  68. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/extract_daemon.py +0 -0
  69. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/helpers.py +0 -0
  70. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/i18n.py +0 -0
  71. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/ingest.py +0 -0
  72. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/lessons.py +0 -0
  73. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/process_health.py +0 -0
  74. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/search_proxy.py +0 -0
  75. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/skill_watcher.py +0 -0
  76. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/spawn_budget.py +0 -0
  77. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/__init__.py +0 -0
  78. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/concepts.py +0 -0
  79. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/consolidate.py +0 -0
  80. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/core_memory.py +0 -0
  81. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/correlation.py +0 -0
  82. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/curator.py +0 -0
  83. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/dialog.py +0 -0
  84. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/distill.py +0 -0
  85. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/extract.py +0 -0
  86. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/graph.py +0 -0
  87. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/invariants.py +0 -0
  88. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/lessons.py +0 -0
  89. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/missed_spawns.py +0 -0
  90. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/peers.py +0 -0
  91. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/pickup.py +0 -0
  92. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/probes.py +0 -0
  93. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/process_health.py +0 -0
  94. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/session.py +0 -0
  95. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/shadow_review.py +0 -0
  96. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/style.py +0 -0
  97. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper/tools/threads.py +0 -0
  98. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/dependency_links.txt +0 -0
  99. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/entry_points.txt +0 -0
  100. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/requires.txt +0 -0
  101. {threadkeeper-0.4.1 → threadkeeper-0.5.0}/threadkeeper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: threadkeeper
3
- Version: 0.4.1
3
+ Version: 0.5.0
4
4
  Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
5
5
  Author: thread-keeper contributors
6
6
  License: MIT
@@ -77,10 +77,10 @@ make it more than a memory store:
77
77
  concurrent sessions signal each other across CLIs. Parent /
78
78
  children / sibling agents become a coordinated swarm, not isolated
79
79
  chats.
80
- - **Self-improving skill library** — four autonomous background loops
80
+ - **Self-improving skill library** — five autonomous background loops
81
81
  (auto-review on thread close, shadow-review daemon, extract
82
- harvester, weekly Curator) materialize class-level skills as the
83
- agents work. Hermes Agent v0.12 pattern adapted to multi-CLI:
82
+ harvester, candidate-reviewer, weekly Curator) materialize
83
+ class-level skills as the agents work. Adapted to multi-CLI:
84
84
  SKILL.md is the primary write target and gets mirrored to every
85
85
  detected CLI's skills directory simultaneously
86
86
  (`~/.claude/skills/`, `~/.codex/skills/`, `~/.threadkeeper/skills/`),
@@ -189,64 +189,202 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
189
189
  (3 GB default). Slim children that need semantic search delegate to the
190
190
  parent via `search_via_parent` — no per-child copy of sentence-transformers.
191
191
 
192
- ### Learning loop (hermes-style)
193
-
194
- Four loops materialize knowledge into Anthropic-style Skill files
195
- (`SKILL.md` under each detected CLI's skills directory Claude's
196
- `~/.claude/skills/`, Codex's `~/.codex/skills/`, plus the canonical
197
- `~/.threadkeeper/skills/` mirror) with a CLI-agnostic
198
- `~/.threadkeeper/lessons.md` fallback for CLIs that don't auto-trigger
199
- on the Skill format (Gemini / Copilot / bare MCP clients):
200
-
201
- - **Auto-review on close_thread** — when a closed thread is rich
202
- (≥5 notes, ≥2 insight/move), `close_thread` spawns a slim child with
203
- `SKILL_REVIEW_PROMPT` + the thread's notes. The prompt is rubric-form
204
- (Q1–Q5 yes/no) with explicit positive examples for incident-vs-rule
205
- classification. The fork also receives a "recently active skills"
206
- block so it prefers PATCHing existing umbrellas over creating new
207
- ones (Hermes Agent v0.12's *active-update bias*). Child appends a
208
- lesson via `lesson_append`, optionally mirrors to
209
- `~/.claude/skills/<name>/SKILL.md`, then closes with
210
- `mark_skill_materialized`. Opt in with `THREADKEEPER_AUTO_REVIEW=1`.
211
- - **Shadow-review daemon** — every `THREADKEEPER_SHADOW_REVIEW_INTERVAL_S`
212
- seconds (default off; 15 min recommended), scans the diff of
213
- `dialog_messages` since the last cursor across **all** CLIs. The
214
- window filters internal review-child sessions (no self-pollution)
215
- and strips adapter `[tool_result]` / `[tool_call]` noise — Hermes
216
- v0.12's "clean context" rule. If ≥500 chars of meaningful signal
217
- remain, spawns a slim observer child that decides on class-level
218
- learning. Idempotent through `events.kind='shadow_review_pass'`.
219
- - **Extract daemon** — every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds
220
- (default off; 10 min recommended), scans recent `dialog_messages`
221
- with heuristic matchers (locale-aware "I want / next time / always"
222
- patterns, headers + insight markers, bullet regularities, paraphrase
223
- clusters via cosine ≥ 0.80) and enqueues candidates in
224
- `extract_candidates.status='pending'` for the agent to review via
225
- `review_candidates()` / `accept_candidate()`. The same self-pollution
226
- filter as shadow_review excludes internal review-child sessions.
227
- Where shadow extracts CLASS-LEVEL durable rules, extract harvests
228
- PER-INCIDENT decision-shaped utterances — sidesteps the empirical
229
- problem that agents focused on their primary task don't call
230
- `note()` / `verbatim_user()` on their own.
231
- - **Autonomous Curator** — every `THREADKEEPER_CURATOR_INTERVAL_S`
232
- seconds (default off; 7 days recommended), spawns a slim child that
233
- reviews the EXISTING `lessons.md` + `skill_usage` inventory and
234
- writes `~/.threadkeeper/curator/REPORT-<isodate>.md` with KEEP /
235
- PATCH / CONSOLIDATE / PRUNE recommendations. Pinned and
236
- foreground-authored entries are marked `[PROTECTED]` in the
237
- inventory so the curator never proposes destructive changes against
238
- them. Phase 1 is advisory-only — user reviews the REPORT and
239
- applies changes manually. Inspired by Hermes Agent v0.12's
240
- `hermes curator` cron agent.
192
+ ### Learning loops
193
+
194
+ Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
195
+ skill library autonomously, without requiring agents to call
196
+ `note()` / `verbatim_user()` / `close_thread()` on their own (audit
197
+ shows agents focused on their primary task rarely do).
198
+
199
+ **Pipeline at a glance:**
200
+
201
+ ```
202
+ every CLI's transcripts
203
+
204
+ (ingest, every 30s always-on)
205
+ dialog_messages ◄──────────────────────────────────────┐
206
+ │ │
207
+ ├────────► [1] auto_review on close_thread │
208
+ │ (agent triggers rare) │
209
+ │ │ │
210
+ ├────────► [2] shadow_review daemon │
211
+ │ (cron, every 15 min) │
212
+ │ │ │
213
+ ├────────► [3] extract daemon │
214
+ │ (cron, every 10 min)
215
+ │ │ │
216
+ │ extract_candidates │
217
+ │ │ │
218
+ │ ▼ │
219
+ │ [4] candidate_reviewer daemon
220
+ (cron, every 1 h) ──────────────┤
221
+ │ │ │
222
+ ▼ ▼ │
223
+ brief() SKILL.md + lessons.md ─► skill_usage │
224
+ │ │ │ │
225
+ │ ▼ ▼ │
226
+ │ (every detected CLI's │ │
227
+ │ skills/ directory) │ │
228
+ │ │ │ │
229
+ │ └──────► [5] Curator daemon ───┘
230
+ (cron, every 7d)
231
+ │ │
232
+ │ ▼
233
+ │ REPORT-<date>.md
234
+
235
+ injected into every new session at SessionStart
236
+ ```
237
+
238
+ **Each loop in one row:**
239
+
240
+ | # | Loop | Default tick | Reads | Writes |
241
+ |---|---|---|---|---|
242
+ | 1 | auto_review on close_thread | on `close_thread()` for rich threads | the thread's notes | SKILL.md, lessons.md |
243
+ | 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
244
+ | 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
245
+ | 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
246
+ | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md (advisory) or direct PATCH/PRUNE/CONSOLIDATE |
247
+
248
+ All five write into the universal Skill format (`SKILL.md` under each
249
+ detected CLI's skills directory — `~/.claude/skills/`,
250
+ `~/.codex/skills/`, plus the canonical `~/.threadkeeper/skills/`
251
+ mirror), with `~/.threadkeeper/lessons.md` as a CLI-agnostic fallback
252
+ for clients without a native skills loader (Gemini, Copilot, bare
253
+ MCP).
254
+
255
+ #### 1. Auto-review on close_thread
256
+
257
+ When a closed thread is rich (≥5 notes, ≥2 insight/move),
258
+ `close_thread` spawns a slim child with `SKILL_REVIEW_PROMPT` + the
259
+ thread's notes. The prompt is rubric-form (Q1–Q5 yes/no) with explicit
260
+ positive examples for incident-vs-rule classification. The fork also
261
+ receives a "recently active skills" block so it prefers PATCHing
262
+ existing umbrellas over creating new ones (*active-update bias*).
263
+ Child appends a lesson via `lesson_append`, optionally mirrors to
264
+ `~/.claude/skills/<name>/SKILL.md`, then closes with
265
+ `mark_skill_materialized`. Opt in with `THREADKEEPER_AUTO_REVIEW=1`.
266
+
267
+ #### 2. Shadow-review daemon
268
+
269
+ Every `THREADKEEPER_SHADOW_REVIEW_INTERVAL_S` seconds (default off,
270
+ 900 = 15 min recommended) scans the diff of `dialog_messages` since
271
+ the last cursor **across all CLIs at once**. The window filters
272
+ internal review-child sessions (no self-pollution) and strips adapter
273
+ `[tool_result]` / `[tool_call]` noise (the "clean context" rule). If
274
+ ≥500 chars of meaningful signal remain, spawns a slim observer child
275
+ that decides on class-level learning. Idempotent through
276
+ `events.kind='shadow_review_pass'`.
277
+
278
+ #### 3. Extract daemon
279
+
280
+ Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
281
+ 10 min recommended) scans recent `dialog_messages` with heuristic
282
+ matchers: locale-aware "I want / next time / always" patterns,
283
+ headers + insight markers, bullet regularities, and paraphrase
284
+ clusters via cosine ≥ 0.80. Each match enqueues a row in
285
+ `extract_candidates.status='pending'`. Same self-pollution filter as
286
+ shadow_review (internal review-child sessions excluded) plus
287
+ message-level noise filter (compaction summaries, SKILL.md
288
+ injections, subagent role prompts, test-runner log dumps).
289
+
290
+ Where shadow extracts CLASS-LEVEL durable rules, extract harvests
291
+ PER-INCIDENT decision-shaped utterances. Heuristic, not LLM —
292
+ findings get refined by loop 4.
293
+
294
+ #### 4. Candidate-reviewer daemon
295
+
296
+ Every `THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S` seconds (default off,
297
+ 3600 = 1 h recommended) consumes the pending queue extract built up.
298
+ Spawns a slim LLM child that decides per candidate or per coherent
299
+ cluster:
300
+
301
+ - **SKILL.create** — class-level rule; merge 2-5 related candidates
302
+ into one skill (active-update bias prefers PATCH over CREATE)
303
+ - **SKILL.patch** — refines a recently-active skill
304
+ - **SKILL.write_file** — adds `references/<topic>.md` under an
305
+ existing umbrella
306
+ - **NOTE** — per-incident decision (requires `thread_id`)
307
+ - **VERBATIM** — user quote worth preserving in `brief()`
308
+ - **REJECT** — false positive that slipped past extract's filters
309
+
310
+ Hard limits: max 2 new skills per pass, `[PROTECTED]` (pinned +
311
+ foreground-authored) skills off-limits. Closes the gap between
312
+ heuristic harvest and SKILL.md materialization — previously pending
313
+ candidates accumulated indefinitely waiting for an agent to call
314
+ `accept_candidate()` manually.
315
+
316
+ #### 5. Autonomous Curator
317
+
318
+ Every `THREADKEEPER_CURATOR_INTERVAL_S` seconds (default off, 604800
319
+ = 7 days recommended) spawns a slim child that reviews the EXISTING
320
+ `lessons.md` + `skill_usage` inventory and writes
321
+ `~/.threadkeeper/curator/REPORT-<isodate>.md` with KEEP / PATCH /
322
+ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
323
+ entries are marked `[PROTECTED]` in the inventory so the curator
324
+ never proposes destructive changes against them.
325
+
326
+ Phase 1 is advisory-only (REPORT only); flip
327
+ `THREADKEEPER_CURATOR_DESTRUCTIVE=1` once trust builds to let the
328
+ child apply its own recommendations directly.
329
+
330
+ #### Honest take
331
+
332
+ What works **without** agent cooperation (passive, opt-in via env):
333
+
334
+ - Loop 2 (shadow), 3 (extract), 4 (candidate-reviewer), 5 (curator) —
335
+ all run from the parent process, never require `note()` or
336
+ `close_thread()` from the agent
337
+
338
+ What depends on the agent **calling tools explicitly**:
339
+
340
+ - Loop 1 (auto-review on close_thread) — only fires if the agent
341
+ closes threads, which the audit shows agents focused on coding
342
+ tasks rarely do
343
+ - Manual `skill_record(outcome='wrong')` — strongest feedback signal
344
+ to the Curator, but agents need to remember to flag bad skills
345
+
346
+ The whole point of having five loops (not one) is graceful
347
+ degradation: even when agents don't actively contribute, loops 2-5
348
+ keep the library growing from passive observation of the dialog
349
+ stream.
241
350
 
242
351
  ### Dialectic user model
243
352
 
244
353
  A model of you, accumulated as you use the agent. `dialectic_claim`,
245
- `dialectic_evidence` (support / contradict / clarifying),
246
- `dialectic_synthesis`, `dialectic_supersede`. Honcho-inspired smoothed
247
- ratio `(s-c)/(s+c+3)` → low / medium / high / disputed confidence.
354
+ `dialectic_evidence` (support / contradict),
355
+ `dialectic_synthesis`, `dialectic_supersede`. Honcho-inspired
356
+ **weighted, smoothed** ratio
357
+ `(Σw_support − Σw_contradict) / (Σw_support + Σw_contradict + 3)`
358
+ → low / medium / high / disputed confidence.
248
359
  Grouped by domain (style, values, workflow, ...) in `brief()`.
249
360
 
361
+ **Source-based evidence discount.** Each evidence row's effective weight
362
+ is `base_weight × discount(WRITE_ORIGIN)`. Foreground (direct user / human
363
+ signal) = 1.0. shadow_review / background_review / candidate_review /
364
+ curator review-forks = 0.5. Structural defence against self-confirmation
365
+ loops: a claim that surfaces in `brief()` and then gets "confirmed" by a
366
+ review-fork reading the same dialog can't ride that internal evidence
367
+ all the way to high confidence — internal evidence buys half as much.
368
+
369
+ **Discrete tier on each claim** — `hypothesis → observed → validated`
370
+ (plus `disputed`). Independent of the continuous confidence band; tier
371
+ is the **action-gating** signal:
372
+
373
+ - `validated` → agent applies by default (★ in brief)
374
+ - `observed` → agent references and may mention the assumption (· in brief)
375
+ - `hypothesis` → active probe; surfaces in a separate `currently_testing`
376
+ block so the agent watches the next user moves through that lens
377
+
378
+ Transitions are discrete events (`tier_promoted` / `tier_demoted` in the
379
+ `events` table) with timestamps for an auditable trail of when each
380
+ claim earned trust. Thresholds:
381
+
382
+ - `hypothesis → observed`: `w_support ≥ 2.0` (claim has real backing)
383
+ - `observed → validated`: `w_support ≥ 4.0` **and** no contradict in 14 days
384
+ - `validated → observed`: any recent contradict (demote on user pushback)
385
+ - any → `disputed`: `w_contradict > w_support`
386
+ - `disputed → hypothesis`: support overtakes contradict (recovery path)
387
+
250
388
  ### i18n bundle
251
389
 
252
390
  All multilingual regex and prompt fragments live in
@@ -271,11 +409,13 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
271
409
  | `THREADKEEPER_SHADOW_REVIEW_WINDOW_S` | 900 | sliding window for shadow scan (s) |
272
410
  | `THREADKEEPER_EXTRACT_INTERVAL_S` | 0 (off) | extract daemon tick (s); 600 = 10 min recommended |
273
411
  | `THREADKEEPER_EXTRACT_WINDOW_MIN` | 30 | sliding dialog window per extract pass (min) |
412
+ | `THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S` | 0 (off) | candidate-reviewer daemon tick (s); 3600 = 1h recommended |
413
+ | `THREADKEEPER_CANDIDATE_REVIEW_MIN` | 3 | min pending candidates before reviewer engages |
274
414
  | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
275
415
  | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
276
416
  | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
277
417
  | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
278
- | `THREADKEEPER_INGEST_INTERVAL_S` | 30 | transcript ingest tick (s) |
418
+ | `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
279
419
  | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
280
420
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
281
421
 
@@ -283,6 +423,73 @@ Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
283
423
  the equivalent env section in each CLI's config. Hot-config reload is
284
424
  [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
285
425
 
426
+ ### Per-loop agent dispatch
427
+
428
+ By default every learning-loop spawn runs through the same CLI that
429
+ hosts thread-keeper — Opus-session ⇒ Opus spawn, Codex-session ⇒
430
+ Codex spawn, etc. Detection: process-tree walk at startup, cached for
431
+ the server lifetime. The MCP tool `spawn_status()` shows the live
432
+ resolution table.
433
+
434
+ Override per role via `~/.threadkeeper/spawn.toml`:
435
+
436
+ ```toml
437
+ [default]
438
+ agent = "auto" # "auto" = use active CLI (default)
439
+
440
+ [loops]
441
+ # Force specific roles to specific CLIs regardless of active host
442
+ shadow_observer = "claude" # heaviest reasoning → keep on Claude
443
+ curator = "codex" # weekly audit → Codex is fine
444
+ candidate_reviewer = "auto" # follow active CLI
445
+ archivist = "claude" # close_thread auto-review
446
+ extract = "auto" # this one is local (no spawn)
447
+
448
+ [models]
449
+ # Optional per-CLI model pin — overrides each CLI's own default
450
+ claude = "opus"
451
+ codex = "gpt-5.4"
452
+ gemini = "gemini-2.5-pro"
453
+ ```
454
+
455
+ Or via env (highest priority, overrides the TOML):
456
+
457
+ ```bash
458
+ export THREADKEEPER_SPAWN_DEFAULT=codex # global default
459
+ export THREADKEEPER_SPAWN_LOOP_CURATOR=gemini # per-role
460
+ export THREADKEEPER_SPAWN_MODEL_CLAUDE=opus # per-CLI model
461
+ export THREADKEEPER_ACTIVE_CLI=claude # force detection
462
+ ```
463
+
464
+ Adapters without headless support (Claude Desktop, VS Code) can't be
465
+ spawn targets — `spawn_status()` reports them as "no adapter" and any
466
+ override pointing at them falls back to the next priority level.
467
+
468
+ ---
469
+
470
+ ## Hygiene tools
471
+
472
+ Two tools keep the memory tidy — both default to `dry_run=True`, run
473
+ them with `dry_run=False` to apply:
474
+
475
+ - **`consolidate()`** — dedup near-identical notes (intra-thread cosine
476
+ ≥ 0.95), deduplicate verbatim quotes, demote untouched-active threads
477
+ to `idle` after 30 days, release orphaned thread claims.
478
+ - **`validate_threads()`** — heuristic triage of active threads with
479
+ four categories (first match wins per thread):
480
+ - `no_notes_old` — active with zero notes ≥ 7 days → close as abandoned.
481
+ - `shipped` — last note matches a shipped-marker regex (EN+RU:
482
+ shipped/fixed/works/passed/done/merged/закрыто/готово/сделано/…)
483
+ and has settled ≥ 3 days → close with the last move as outcome.
484
+ - `dropped_open_q` — last note is an `open_q` left unfollowed
485
+ ≥ 14 days → close as dropped.
486
+ - `stale_idle` — any active not touched in ≥ 30 days → demote to
487
+ `idle` (not closed — revives on next `note()`).
488
+
489
+ Idle threads are never touched. Tunable via `no_notes_days`,
490
+ `shipped_settle_days`, `drop_open_q_days`, `stale_days`, and
491
+ `shipped_markers` (comma-separated extra tokens).
492
+
286
493
  ---
287
494
 
288
495
  ## Storage
@@ -317,7 +524,7 @@ pip install -e '.[semantic,dev]'
317
524
  python -m pytest
318
525
  ```
319
526
 
320
- 412 tests passing on Python 3.11 / 3.12 / 3.13 (1 skipped). CI runs
527
+ 495 tests passing on Python 3.11 / 3.12 / 3.13 (1 skipped). CI runs
321
528
  the suite on every push and PR.
322
529
 
323
530
  ---
@@ -342,11 +549,13 @@ threadkeeper/
342
549
  │ ├── gemini.py
343
550
  │ ├── copilot.py
344
551
  │ └── vscode.py
345
- └── tools/ # @mcp.tool entries — 83 of them
552
+ └── tools/ # @mcp.tool entries — 89 of them
346
553
  ├── threads.py
347
554
  ├── peers.py
348
555
  ├── spawn.py
349
556
  ├── skills.py
557
+ ├── dialectic.py
558
+ ├── validate.py
350
559
  └── ...
351
560
  ```
352
561