threadkeeper 0.8.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/PKG-INFO +150 -37
  2. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/README.md +147 -36
  3. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/pyproject.toml +5 -1
  4. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_adapters.py +61 -0
  5. threadkeeper-0.9.0/tests/test_agent_status.py +314 -0
  6. threadkeeper-0.9.0/tests/test_brief_footprint.py +135 -0
  7. threadkeeper-0.9.0/tests/test_config_settings.py +171 -0
  8. threadkeeper-0.9.0/tests/test_dashboard.py +123 -0
  9. threadkeeper-0.9.0/tests/test_dialectic_feed_tools.py +65 -0
  10. threadkeeper-0.9.0/tests/test_dialectic_miner.py +307 -0
  11. threadkeeper-0.9.0/tests/test_dialectic_observation_resolve.py +56 -0
  12. threadkeeper-0.9.0/tests/test_dialectic_recompute.py +121 -0
  13. threadkeeper-0.9.0/tests/test_dialectic_validator.py +621 -0
  14. threadkeeper-0.9.0/tests/test_evolve_applier.py +445 -0
  15. threadkeeper-0.9.0/tests/test_evolve_apply_2.py +105 -0
  16. threadkeeper-0.9.0/tests/test_evolve_apply_3.py +144 -0
  17. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_extract_daemon.py +165 -0
  18. threadkeeper-0.9.0/tests/test_extract_dedup.py +134 -0
  19. threadkeeper-0.9.0/tests/test_ingest_status.py +29 -0
  20. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_lessons.py +78 -0
  21. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_memory_guard.py +30 -0
  22. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_shadow_review.py +44 -0
  23. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_passive_tier.py +86 -0
  24. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skills.py +30 -2
  25. threadkeeper-0.9.0/tests/test_spawn_codex_stdin.py +102 -0
  26. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_config.py +118 -139
  27. threadkeeper-0.9.0/tests/test_spawn_wrap.py +250 -0
  28. threadkeeper-0.9.0/tests/test_thread_janitor.py +180 -0
  29. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/_setup.py +6 -4
  30. threadkeeper-0.9.0/threadkeeper/_spawn_wrap.py +128 -0
  31. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/codex.py +117 -5
  32. threadkeeper-0.9.0/threadkeeper/agent_status.py +754 -0
  33. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/brief.py +82 -31
  34. threadkeeper-0.9.0/threadkeeper/config.py +397 -0
  35. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/db.py +27 -0
  36. threadkeeper-0.9.0/threadkeeper/dialectic_miner.py +450 -0
  37. threadkeeper-0.9.0/threadkeeper/dialectic_validator.py +556 -0
  38. threadkeeper-0.9.0/threadkeeper/evolve_applier.py +709 -0
  39. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/identity.py +44 -0
  40. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/ingest.py +114 -1
  41. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/lessons.py +20 -0
  42. threadkeeper-0.9.0/threadkeeper/menubar_app.py +244 -0
  43. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/server.py +7 -0
  44. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/shadow_review.py +50 -12
  45. threadkeeper-0.9.0/threadkeeper/spawn_config.py +95 -0
  46. threadkeeper-0.9.0/threadkeeper/thread_janitor.py +137 -0
  47. threadkeeper-0.9.0/threadkeeper/tools/agent_status.py +19 -0
  48. threadkeeper-0.9.0/threadkeeper/tools/dashboard.py +220 -0
  49. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/dialectic.py +67 -0
  50. threadkeeper-0.9.0/threadkeeper/tools/dialectic_feed.py +119 -0
  51. threadkeeper-0.9.0/threadkeeper/tools/evolve_applier.py +163 -0
  52. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/extract.py +45 -6
  53. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/graph.py +1 -1
  54. threadkeeper-0.9.0/threadkeeper/tools/lessons.py +217 -0
  55. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/skills.py +24 -11
  56. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/spawn.py +101 -20
  57. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/threads.py +20 -4
  58. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/PKG-INFO +150 -37
  59. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/SOURCES.txt +28 -0
  60. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/entry_points.txt +1 -0
  61. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/requires.txt +2 -0
  62. threadkeeper-0.8.0/threadkeeper/config.py +0 -374
  63. threadkeeper-0.8.0/threadkeeper/spawn_config.py +0 -203
  64. threadkeeper-0.8.0/threadkeeper/tools/lessons.py +0 -110
  65. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/LICENSE +0 -0
  66. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/setup.cfg +0 -0
  67. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_brief_sections.py +0 -0
  68. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_candidate_reviewer.py +0 -0
  69. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_core_memory.py +0 -0
  70. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_curator.py +0 -0
  71. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_delegated_search.py +0 -0
  72. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_dialectic.py +0 -0
  73. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_dialectic_tier.py +0 -0
  74. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_error_paths.py +0 -0
  75. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_evolve_daemon.py +0 -0
  76. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_i18n_multilang.py +0 -0
  77. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_identity.py +0 -0
  78. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_missed_spawns.py +0 -0
  79. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_nudges.py +0 -0
  80. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_onnx_embeddings.py +0 -0
  81. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_panel.py +0 -0
  82. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_probe_daemon.py +0 -0
  83. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_process_health.py +0 -0
  84. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_search_fts_punctuation.py +0 -0
  85. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_hint.py +0 -0
  86. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_tier.py +0 -0
  87. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_use_parser.py +0 -0
  88. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_watcher.py +0 -0
  89. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_budget.py +0 -0
  90. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_hint.py +0 -0
  91. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_reap.py +0 -0
  92. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_slim.py +0 -0
  93. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_threads.py +0 -0
  94. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_tools_smoke.py +0 -0
  95. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_validate_threads.py +0 -0
  96. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_vec_search.py +0 -0
  97. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/__init__.py +0 -0
  98. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/_mcp.py +0 -0
  99. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/__init__.py +0 -0
  100. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/_hook_helpers.py +0 -0
  101. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/base.py +0 -0
  102. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/claude_code.py +0 -0
  103. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/claude_desktop.py +0 -0
  104. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/copilot.py +0 -0
  105. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/gemini.py +0 -0
  106. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/vscode.py +0 -0
  107. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/candidate_reviewer.py +0 -0
  108. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/curator.py +0 -0
  109. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/embeddings.py +0 -0
  110. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/evolve_daemon.py +0 -0
  111. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/extract_daemon.py +0 -0
  112. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/helpers.py +0 -0
  113. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/i18n.py +0 -0
  114. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/memory_guard.py +0 -0
  115. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/migrate_embeddings.py +0 -0
  116. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/nudges.py +0 -0
  117. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/probe_daemon.py +0 -0
  118. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/process_health.py +0 -0
  119. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/review_prompts.py +0 -0
  120. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/search_proxy.py +0 -0
  121. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/skill_watcher.py +0 -0
  122. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/spawn_budget.py +0 -0
  123. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/__init__.py +0 -0
  124. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/candidate_reviewer.py +0 -0
  125. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/concepts.py +0 -0
  126. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/consolidate.py +0 -0
  127. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/core_memory.py +0 -0
  128. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/correlation.py +0 -0
  129. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/curator.py +0 -0
  130. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/dialog.py +0 -0
  131. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/distill.py +0 -0
  132. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/invariants.py +0 -0
  133. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/memory_guard.py +0 -0
  134. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/missed_spawns.py +0 -0
  135. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/panel.py +0 -0
  136. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/peers.py +0 -0
  137. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/pickup.py +0 -0
  138. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/probes.py +0 -0
  139. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/process_health.py +0 -0
  140. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/session.py +0 -0
  141. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/shadow_review.py +0 -0
  142. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/style.py +0 -0
  143. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/validate.py +0 -0
  144. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/dependency_links.txt +0 -0
  145. {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: threadkeeper
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
5
5
  Author: thread-keeper contributors
6
6
  License: MIT
@@ -23,6 +23,8 @@ Requires-Python: >=3.11
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: mcp>=1.0.0
26
+ Requires-Dist: pydantic>=2
27
+ Requires-Dist: pydantic-settings>=2
26
28
  Provides-Extra: semantic
27
29
  Requires-Dist: fastembed>=0.3; extra == "semantic"
28
30
  Requires-Dist: numpy>=1.24.0; extra == "semantic"
@@ -82,10 +84,12 @@ make it more than a memory store:
82
84
  concurrent sessions signal each other across CLIs. Parent /
83
85
  children / sibling agents become a coordinated swarm, not isolated
84
86
  chats.
85
- - **Self-improving skill library** — five autonomous background loops
87
+ - **Self-improving skill library** — autonomous background loops
86
88
  (auto-review on thread close, shadow-review daemon, extract
87
- harvester, candidate-reviewer, weekly Curator) materialize
88
- class-level skills as the agents work. Adapted to multi-CLI:
89
+ harvester, candidate-reviewer, weekly Curator, and a thread-janitor
90
+ that auto-closes idle threads so abandoned work reaches the harvest
91
+ path — closing is reversible, a note reopens a closed thread)
92
+ materialize class-level skills as the agents work. Adapted to multi-CLI:
89
93
  SKILL.md is the primary write target and gets mirrored to every
90
94
  known/configured skills root simultaneously (`~/.claude/skills/`,
91
95
  `~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
@@ -196,6 +200,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
196
200
  (3 GB default). Slim children that need semantic search delegate to the
197
201
  parent via `search_via_parent` — no per-child copy of the embedding model.
198
202
 
203
+ `tk-agent-status` exposes autonomous learning loop status as structured JSON
204
+ or compact text for external monitors:
205
+
206
+ ```sh
207
+ tk-agent-status
208
+ tk-agent-status --json
209
+ ```
210
+
211
+ `apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
212
+ command every 5 seconds and shows every autonomous learning loop: enabled/off,
213
+ running/idle/ready, last pass, backlog, and active child RSS when that loop has
214
+ spawned a worker. Active loops are sorted first (`running`, then `ready`), so
215
+ background work stays at the top of the panel. The app also requests macOS
216
+ notification permission and sends a notification when a newly completed
217
+ autonomous child task produces a useful result in `recent_results`; the first
218
+ poll only marks existing results as seen, so old completions do not spam
219
+ notifications. Probe backlog is due objective probes only, not every registered
220
+ probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
221
+ macOS, `python -m threadkeeper.server` automatically installs and launches it
222
+ on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
223
+ behavior.
224
+
225
+ Manual fallback:
226
+
227
+ ```sh
228
+ cd apps/macos-agent-status
229
+ ./build.sh
230
+ open build/ThreadKeeperAgentStatus.app
231
+ ```
232
+
199
233
  ### Learning loops
200
234
 
201
235
  Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
@@ -250,7 +284,9 @@ shows agents focused on their primary task rarely do).
250
284
  | 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
251
285
  | 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
252
286
  | 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
253
- | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md (advisory) or direct PATCH/PRUNE/CONSOLIDATE |
287
+ | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
288
+ | 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
289
+ | 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
254
290
 
255
291
  All five write into the universal Skill format (`SKILL.md` under each
256
292
  known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
@@ -289,6 +325,10 @@ marked as spawned/background processes, so they cannot start their own shadow
289
325
  daemon even if a CLI drops the no-embeddings env. Idempotent through
290
326
  `events.kind='shadow_review_pass'`.
291
327
 
328
+ Before writing memory, the observer now checks existing lessons/skills and
329
+ prefers patching broad skills. Shadow-origin `lesson_append` is a compact
330
+ fallback only: oversized bodies and near-duplicate slugs are rejected.
331
+
292
332
  #### 3. Extract daemon
293
333
 
294
334
  Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
@@ -337,9 +377,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
337
377
  entries are marked `[PROTECTED]` in the inventory so the curator
338
378
  never proposes destructive changes against them.
339
379
 
340
- Phase 1 is advisory-only (REPORT only); flip
341
- `THREADKEEPER_CURATOR_DESTRUCTIVE=1` once trust builds to let the
342
- child apply its own recommendations directly.
380
+ Curator itself stays advisory-only by default. The existing Evolve applier is
381
+ the apply worker: on its next pass it first looks for the latest complete
382
+ Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
383
+ spawns an `evolve_applier` child to apply only safe, still-current memory
384
+ maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
385
+ touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
386
+ the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
387
+ prevents replaying the same report.
388
+
389
+ #### 6. Evolve applier — code evolution + curator report apply
390
+
391
+ The brief format is not fixed: any session can file a change to it with
392
+ `evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
393
+ the queue and **promotes** the good ones — promoted suggestions surface in the
394
+ brief with a ★. Until now that's where it stopped: a human had to hand-edit
395
+ `render_brief` in `brief.py`.
396
+
397
+ `evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
398
+ (resolved through the normal spawn role/model config — recommend opus, it
399
+ writes code) that:
400
+
401
+ 1. edits `render_brief()` to implement the suggestion;
402
+ 2. adds/extends a **golden brief test** asserting both that the new
403
+ behavior/field appears *and* that the existing brief sections still render —
404
+ a format change can't silently break the brief;
405
+ 3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
406
+ 4. opens a **pull request** on a feature branch via `gh`, body quoting the
407
+ suggestion + rationale. The generated commit and PR title use the repo's
408
+ allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
409
+ `evolve:` label.
410
+
411
+ **Autonomy is the PR gate, nothing more.** The child never pushes or commits to
412
+ `main` (which has branch protection); a human reviews and merges. On a
413
+ successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
414
+ sets `applied=1` so the suggestion stops resurfacing. Validation inside the
415
+ child (golden render_brief test + full suite green) is the objective gate the
416
+ loop otherwise lacks.
417
+
418
+ The same applier role also drains Curator reports. `evolve_apply_curator_report`
419
+ manually applies the latest complete report, or a specific report path. This
420
+ path does **not** edit code or open a PR; it uses memory MCP tools only and
421
+ marks the report applied with `evolve_mark_curator_report_applied(...)`.
422
+
423
+ Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
424
+ set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
425
+ apply the latest complete Curator report first, then implement the oldest
426
+ promoted+unapplied suggestion. Pin the agent/model with
427
+ `THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
428
+ `THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
429
+ a time, enforced by a short dispatch file lock plus running-task detection)
430
+ keeps code edits and memory maintenance from colliding.
343
431
 
344
432
  #### Honest take
345
433
 
@@ -428,7 +516,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
428
516
  | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
429
517
  | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
430
518
  | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
519
+ | `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
520
+ | `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
431
521
  | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
522
+ | `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
432
523
  | `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
433
524
  | `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
434
525
  | `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
@@ -445,9 +536,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
445
536
  | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
446
537
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
447
538
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
448
-
449
- Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
450
- the equivalent env section in each CLI's config. Hot-config reload is
539
+ | `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
540
+ | `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
541
+ | `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
542
+ | `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
543
+ | `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
544
+ | `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
545
+ | `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
546
+
547
+ Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
548
+ read via pydantic-settings; real environment variables still override it.
549
+ Hot-config reload is
451
550
  [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
452
551
 
453
552
  ### Per-loop agent dispatch
@@ -458,36 +557,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
458
557
  the server lifetime. The MCP tool `spawn_status()` shows the live
459
558
  resolution table.
460
559
 
461
- Override per role via `~/.threadkeeper/spawn.toml`:
462
-
463
- ```toml
464
- [default]
465
- agent = "auto" # "auto" = use active CLI (default)
466
-
467
- [loops]
468
- # Force specific roles to specific CLIs regardless of active host
469
- shadow_observer = "claude" # heaviest reasoning → keep on Claude
470
- curator = "codex" # weekly audit → Codex is fine
471
- candidate_reviewer = "auto" # follow active CLI
472
- archivist = "claude" # close_thread auto-review
473
- extract = "auto" # this one is local (no spawn)
474
-
475
- [models]
476
- # Optional per-CLI model pin — overrides each CLI's own default
477
- claude = "opus"
478
- codex = "gpt-5.4"
479
- gemini = "gemini-2.5-pro"
480
- ```
481
-
482
- Or via env (highest priority, overrides the TOML):
560
+ Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
561
+ all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
562
+ keys are lowercased:
483
563
 
484
564
  ```bash
485
- export THREADKEEPER_SPAWN_DEFAULT=codex # global default
486
- export THREADKEEPER_SPAWN_LOOP_CURATOR=gemini # per-role
487
- export THREADKEEPER_SPAWN_MODEL_CLAUDE=opus # per-CLI model
488
- export THREADKEEPER_ACTIVE_CLI=claude # force detection
565
+ # default agent for roles with no explicit pin ("" / unset = use the active CLI)
566
+ THREADKEEPER_SPAWN__DEFAULT=claude
567
+ # per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
568
+ THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
569
+ THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
570
+ THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
571
+ # model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
572
+ THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
573
+ THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
489
574
  ```
490
575
 
576
+ Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
577
+ `claude`; `"auto"` (or unset) defers to the active CLI. Real environment
578
+ variables override the `.env`. Force host detection with
579
+ `THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
580
+
491
581
  Adapters without headless support (Claude Desktop, VS Code) can't be
492
582
  spawn targets — `spawn_status()` reports them as "no adapter" and any
493
583
  override pointing at them falls back to the next priority level.
@@ -519,6 +609,29 @@ them with `dry_run=False` to apply:
519
609
 
520
610
  ---
521
611
 
612
+ ## Telemetry
613
+
614
+ - **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
615
+ system, read-only. Three sections: **stores** (threads by state,
616
+ notes/dialog/distill/concepts counts, skills + claims by tier,
617
+ extract-candidate and evolve queues, probe/task counts), **loops**
618
+ (how many times each autonomous daemon fired in the window vs 30 days,
619
+ plus last-fire age), and **outcomes** (what those loops actually
620
+ produced — skills materialized, tier promotions, candidate
621
+ accept-vs-reject rate). Surfaces the gaps the point-tools can't:
622
+ a loop firing constantly while its outcomes stay flat, or a queue
623
+ backing up. Complements the per-loop `*_status` tools (`mp_health`,
624
+ `spawn_budget_status`, `shadow_review_status`).
625
+ - **`agent_status(json_output=False, refresh=True)`** — autonomous learning
626
+ loop status, shaped for UI clients. Shows every loop's enabled/running/ready
627
+ state, last pass, backlog, and active spawned-child RSS; running child agents
628
+ are included as detail rows in the JSON. The JSON also includes
629
+ `recent_results` for useful completed loop tasks, which the macOS menu-bar app
630
+ uses for notifications. The `tk-agent-status` console command and macOS
631
+ menu-bar app use the same underlying snapshot.
632
+
633
+ ---
634
+
522
635
  ## Storage
523
636
 
524
637
  `~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
@@ -43,10 +43,12 @@ make it more than a memory store:
43
43
  concurrent sessions signal each other across CLIs. Parent /
44
44
  children / sibling agents become a coordinated swarm, not isolated
45
45
  chats.
46
- - **Self-improving skill library** — five autonomous background loops
46
+ - **Self-improving skill library** — autonomous background loops
47
47
  (auto-review on thread close, shadow-review daemon, extract
48
- harvester, candidate-reviewer, weekly Curator) materialize
49
- class-level skills as the agents work. Adapted to multi-CLI:
48
+ harvester, candidate-reviewer, weekly Curator, and a thread-janitor
49
+ that auto-closes idle threads so abandoned work reaches the harvest
50
+ path — closing is reversible, a note reopens a closed thread)
51
+ materialize class-level skills as the agents work. Adapted to multi-CLI:
50
52
  SKILL.md is the primary write target and gets mirrored to every
51
53
  known/configured skills root simultaneously (`~/.claude/skills/`,
52
54
  `~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
@@ -157,6 +159,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
157
159
  (3 GB default). Slim children that need semantic search delegate to the
158
160
  parent via `search_via_parent` — no per-child copy of the embedding model.
159
161
 
162
+ `tk-agent-status` exposes autonomous learning loop status as structured JSON
163
+ or compact text for external monitors:
164
+
165
+ ```sh
166
+ tk-agent-status
167
+ tk-agent-status --json
168
+ ```
169
+
170
+ `apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
171
+ command every 5 seconds and shows every autonomous learning loop: enabled/off,
172
+ running/idle/ready, last pass, backlog, and active child RSS when that loop has
173
+ spawned a worker. Active loops are sorted first (`running`, then `ready`), so
174
+ background work stays at the top of the panel. The app also requests macOS
175
+ notification permission and sends a notification when a newly completed
176
+ autonomous child task produces a useful result in `recent_results`; the first
177
+ poll only marks existing results as seen, so old completions do not spam
178
+ notifications. Probe backlog is due objective probes only, not every registered
179
+ probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
180
+ macOS, `python -m threadkeeper.server` automatically installs and launches it
181
+ on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
182
+ behavior.
183
+
184
+ Manual fallback:
185
+
186
+ ```sh
187
+ cd apps/macos-agent-status
188
+ ./build.sh
189
+ open build/ThreadKeeperAgentStatus.app
190
+ ```
191
+
160
192
  ### Learning loops
161
193
 
162
194
  Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
@@ -211,7 +243,9 @@ shows agents focused on their primary task rarely do).
211
243
  | 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
212
244
  | 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
213
245
  | 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
214
- | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md (advisory) or direct PATCH/PRUNE/CONSOLIDATE |
246
+ | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
247
+ | 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
248
+ | 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
215
249
 
216
250
  All five write into the universal Skill format (`SKILL.md` under each
217
251
  known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
@@ -250,6 +284,10 @@ marked as spawned/background processes, so they cannot start their own shadow
250
284
  daemon even if a CLI drops the no-embeddings env. Idempotent through
251
285
  `events.kind='shadow_review_pass'`.
252
286
 
287
+ Before writing memory, the observer now checks existing lessons/skills and
288
+ prefers patching broad skills. Shadow-origin `lesson_append` is a compact
289
+ fallback only: oversized bodies and near-duplicate slugs are rejected.
290
+
253
291
  #### 3. Extract daemon
254
292
 
255
293
  Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
@@ -298,9 +336,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
298
336
  entries are marked `[PROTECTED]` in the inventory so the curator
299
337
  never proposes destructive changes against them.
300
338
 
301
- Phase 1 is advisory-only (REPORT only); flip
302
- `THREADKEEPER_CURATOR_DESTRUCTIVE=1` once trust builds to let the
303
- child apply its own recommendations directly.
339
+ Curator itself stays advisory-only by default. The existing Evolve applier is
340
+ the apply worker: on its next pass it first looks for the latest complete
341
+ Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
342
+ spawns an `evolve_applier` child to apply only safe, still-current memory
343
+ maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
344
+ touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
345
+ the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
346
+ prevents replaying the same report.
347
+
348
+ #### 6. Evolve applier — code evolution + curator report apply
349
+
350
+ The brief format is not fixed: any session can file a change to it with
351
+ `evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
352
+ the queue and **promotes** the good ones — promoted suggestions surface in the
353
+ brief with a ★. Until now that's where it stopped: a human had to hand-edit
354
+ `render_brief` in `brief.py`.
355
+
356
+ `evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
357
+ (resolved through the normal spawn role/model config — recommend opus, it
358
+ writes code) that:
359
+
360
+ 1. edits `render_brief()` to implement the suggestion;
361
+ 2. adds/extends a **golden brief test** asserting both that the new
362
+ behavior/field appears *and* that the existing brief sections still render —
363
+ a format change can't silently break the brief;
364
+ 3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
365
+ 4. opens a **pull request** on a feature branch via `gh`, body quoting the
366
+ suggestion + rationale. The generated commit and PR title use the repo's
367
+ allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
368
+ `evolve:` label.
369
+
370
+ **Autonomy is the PR gate, nothing more.** The child never pushes or commits to
371
+ `main` (which has branch protection); a human reviews and merges. On a
372
+ successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
373
+ sets `applied=1` so the suggestion stops resurfacing. Validation inside the
374
+ child (golden render_brief test + full suite green) is the objective gate the
375
+ loop otherwise lacks.
376
+
377
+ The same applier role also drains Curator reports. `evolve_apply_curator_report`
378
+ manually applies the latest complete report, or a specific report path. This
379
+ path does **not** edit code or open a PR; it uses memory MCP tools only and
380
+ marks the report applied with `evolve_mark_curator_report_applied(...)`.
381
+
382
+ Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
383
+ set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
384
+ apply the latest complete Curator report first, then implement the oldest
385
+ promoted+unapplied suggestion. Pin the agent/model with
386
+ `THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
387
+ `THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
388
+ a time, enforced by a short dispatch file lock plus running-task detection)
389
+ keeps code edits and memory maintenance from colliding.
304
390
 
305
391
  #### Honest take
306
392
 
@@ -389,7 +475,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
389
475
  | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
390
476
  | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
391
477
  | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
478
+ | `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
479
+ | `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
392
480
  | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
481
+ | `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
393
482
  | `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
394
483
  | `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
395
484
  | `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
@@ -406,9 +495,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
406
495
  | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
407
496
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
408
497
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
409
-
410
- Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
411
- the equivalent env section in each CLI's config. Hot-config reload is
498
+ | `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
499
+ | `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
500
+ | `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
501
+ | `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
502
+ | `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
503
+ | `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
504
+ | `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
505
+
506
+ Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
507
+ read via pydantic-settings; real environment variables still override it.
508
+ Hot-config reload is
412
509
  [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
413
510
 
414
511
  ### Per-loop agent dispatch
@@ -419,36 +516,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
419
516
  the server lifetime. The MCP tool `spawn_status()` shows the live
420
517
  resolution table.
421
518
 
422
- Override per role via `~/.threadkeeper/spawn.toml`:
423
-
424
- ```toml
425
- [default]
426
- agent = "auto" # "auto" = use active CLI (default)
427
-
428
- [loops]
429
- # Force specific roles to specific CLIs regardless of active host
430
- shadow_observer = "claude" # heaviest reasoning → keep on Claude
431
- curator = "codex" # weekly audit → Codex is fine
432
- candidate_reviewer = "auto" # follow active CLI
433
- archivist = "claude" # close_thread auto-review
434
- extract = "auto" # this one is local (no spawn)
435
-
436
- [models]
437
- # Optional per-CLI model pin — overrides each CLI's own default
438
- claude = "opus"
439
- codex = "gpt-5.4"
440
- gemini = "gemini-2.5-pro"
441
- ```
442
-
443
- Or via env (highest priority, overrides the TOML):
519
+ Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
520
+ all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
521
+ keys are lowercased:
444
522
 
445
523
  ```bash
446
- export THREADKEEPER_SPAWN_DEFAULT=codex # global default
447
- export THREADKEEPER_SPAWN_LOOP_CURATOR=gemini # per-role
448
- export THREADKEEPER_SPAWN_MODEL_CLAUDE=opus # per-CLI model
449
- export THREADKEEPER_ACTIVE_CLI=claude # force detection
524
+ # default agent for roles with no explicit pin ("" / unset = use the active CLI)
525
+ THREADKEEPER_SPAWN__DEFAULT=claude
526
+ # per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
527
+ THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
528
+ THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
529
+ THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
530
+ # model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
531
+ THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
532
+ THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
450
533
  ```
451
534
 
535
+ Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
536
+ `claude`; `"auto"` (or unset) defers to the active CLI. Real environment
537
+ variables override the `.env`. Force host detection with
538
+ `THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
539
+
452
540
  Adapters without headless support (Claude Desktop, VS Code) can't be
453
541
  spawn targets — `spawn_status()` reports them as "no adapter" and any
454
542
  override pointing at them falls back to the next priority level.
@@ -480,6 +568,29 @@ them with `dry_run=False` to apply:
480
568
 
481
569
  ---
482
570
 
571
+ ## Telemetry
572
+
573
+ - **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
574
+ system, read-only. Three sections: **stores** (threads by state,
575
+ notes/dialog/distill/concepts counts, skills + claims by tier,
576
+ extract-candidate and evolve queues, probe/task counts), **loops**
577
+ (how many times each autonomous daemon fired in the window vs 30 days,
578
+ plus last-fire age), and **outcomes** (what those loops actually
579
+ produced — skills materialized, tier promotions, candidate
580
+ accept-vs-reject rate). Surfaces the gaps the point-tools can't:
581
+ a loop firing constantly while its outcomes stay flat, or a queue
582
+ backing up. Complements the per-loop `*_status` tools (`mp_health`,
583
+ `spawn_budget_status`, `shadow_review_status`).
584
+ - **`agent_status(json_output=False, refresh=True)`** — autonomous learning
585
+ loop status, shaped for UI clients. Shows every loop's enabled/running/ready
586
+ state, last pass, backlog, and active spawned-child RSS; running child agents
587
+ are included as detail rows in the JSON. The JSON also includes
588
+ `recent_results` for useful completed loop tasks, which the macOS menu-bar app
589
+ uses for notifications. The `tk-agent-status` console command and macOS
590
+ menu-bar app use the same underlying snapshot.
591
+
592
+ ---
593
+
483
594
  ## Storage
484
595
 
485
596
  `~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "threadkeeper"
7
- version = "0.8.0"
7
+ version = "0.9.0"
8
8
  description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "thread-keeper contributors" }]
@@ -27,6 +27,8 @@ classifiers = [
27
27
  ]
28
28
  dependencies = [
29
29
  "mcp>=1.0.0",
30
+ "pydantic>=2",
31
+ "pydantic-settings>=2",
30
32
  ]
31
33
 
32
34
  [project.optional-dependencies]
@@ -69,6 +71,8 @@ thread-keeper-setup = "threadkeeper._setup:main"
69
71
  # Recompute stored embeddings with the active backend (e.g. after switching to
70
72
  # the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
71
73
  tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
74
+ # JSON/text status feed for menu-bar widgets and terminal monitors.
75
+ tk-agent-status = "threadkeeper.agent_status:main"
72
76
 
73
77
  [tool.setuptools.packages.find]
74
78
  include = ["threadkeeper*"]
@@ -324,6 +324,10 @@ def test_codex_register_mcp_writes_toml(tmp_path, monkeypatch):
324
324
  assert '"/opt/python"' in body
325
325
  assert "[mcp_servers.thread-keeper.env]" in body
326
326
  assert '"/repo"' in body
327
+ assert "[mcp_servers.thread-keeper.tools.dialectic_claim]" in body
328
+ assert "[mcp_servers.thread-keeper.tools.dialectic_observation_resolve]" in body
329
+ assert "[mcp_servers.thread-keeper.tools.accept_candidate]" in body
330
+ assert 'approval_mode = "approve"' in body
327
331
 
328
332
 
329
333
  def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
@@ -350,6 +354,63 @@ def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
350
354
  assert msgs[1].content == "hello"
351
355
 
352
356
 
357
+ def test_codex_iter_messages_uses_forced_child_cid_from_spawn_preamble(
358
+ tmp_path, monkeypatch,
359
+ ):
360
+ pkg = _bootstrap(tmp_path, monkeypatch)
361
+ fp = tmp_path / "rollout-2026-06-11T10-00-00.jsonl"
362
+ forced_cid = "af389b3f-8e17-46b5-87f1-402769a74e58"
363
+ fp.write_text("\n".join([
364
+ json.dumps({
365
+ "timestamp": "2026-06-11T10:00:00Z",
366
+ "type": "session_meta",
367
+ "payload": {"id": "019eb5d0-6753-7c31-bce6-b887761090c6", "cwd": "/x"},
368
+ }),
369
+ json.dumps({
370
+ "timestamp": "2026-06-11T10:00:01Z",
371
+ "type": "response_item",
372
+ "payload": {
373
+ "type": "message",
374
+ "role": "user",
375
+ "id": "u-agents",
376
+ "content": [{"type": "input_text", "text": "# AGENTS.md instructions"}],
377
+ },
378
+ }),
379
+ json.dumps({
380
+ "timestamp": "2026-06-11T10:00:02Z",
381
+ "type": "response_item",
382
+ "payload": {
383
+ "type": "message",
384
+ "role": "user",
385
+ "id": "u-spawn",
386
+ "content": [{
387
+ "type": "input_text",
388
+ "text": (
389
+ "You were spawned in the background by parent conversation "
390
+ "8877cab4-1f45-4d05-9a1c-09c6ab28adf1. "
391
+ f"Your own cid is {forced_cid} (forced via --session-id "
392
+ "and THREADKEEPER_FORCE_CID env)."
393
+ ),
394
+ }],
395
+ },
396
+ }),
397
+ json.dumps({
398
+ "timestamp": "2026-06-11T10:00:03Z",
399
+ "type": "response_item",
400
+ "payload": {
401
+ "type": "message",
402
+ "role": "assistant",
403
+ "id": "a-1",
404
+ "content": [{"type": "output_text", "text": "processed"}],
405
+ },
406
+ }),
407
+ ]) + "\n")
408
+
409
+ msgs = list(pkg["codex"].iter_messages(fp))
410
+ assert [m.uuid for m in msgs] == ["u-agents", "u-spawn", "a-1"]
411
+ assert {m.session_id for m in msgs} == {forced_cid}
412
+
413
+
353
414
  # ---------------------------------------------------------------------
354
415
  # Gemini
355
416
  # ---------------------------------------------------------------------