threadkeeper 0.8.1__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/PKG-INFO +133 -35
  2. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/README.md +130 -34
  3. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/pyproject.toml +5 -1
  4. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_adapters.py +61 -0
  5. threadkeeper-0.9.1/tests/test_agent_status.py +314 -0
  6. threadkeeper-0.9.1/tests/test_brief_footprint.py +135 -0
  7. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_candidate_reviewer.py +36 -0
  8. threadkeeper-0.9.1/tests/test_config_settings.py +171 -0
  9. threadkeeper-0.9.1/tests/test_dialectic_feed_tools.py +65 -0
  10. threadkeeper-0.9.1/tests/test_dialectic_miner.py +307 -0
  11. threadkeeper-0.9.1/tests/test_dialectic_observation_resolve.py +56 -0
  12. threadkeeper-0.9.1/tests/test_dialectic_recompute.py +121 -0
  13. threadkeeper-0.9.1/tests/test_dialectic_validator.py +621 -0
  14. threadkeeper-0.9.1/tests/test_evolve_applier.py +445 -0
  15. threadkeeper-0.9.1/tests/test_evolve_apply_2.py +105 -0
  16. threadkeeper-0.9.1/tests/test_evolve_apply_3.py +144 -0
  17. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_extract_daemon.py +110 -0
  18. threadkeeper-0.9.1/tests/test_extract_dedup.py +134 -0
  19. threadkeeper-0.9.1/tests/test_ingest_status.py +29 -0
  20. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_lessons.py +78 -0
  21. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_memory_guard.py +30 -0
  22. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_shadow_review.py +44 -0
  23. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skill_passive_tier.py +86 -0
  24. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skills.py +14 -2
  25. threadkeeper-0.9.1/tests/test_spawn_codex_stdin.py +102 -0
  26. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_spawn_config.py +118 -139
  27. threadkeeper-0.9.1/tests/test_spawn_wrap.py +250 -0
  28. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/_setup.py +6 -4
  29. threadkeeper-0.9.1/threadkeeper/_spawn_wrap.py +128 -0
  30. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/codex.py +117 -5
  31. threadkeeper-0.9.1/threadkeeper/agent_status.py +754 -0
  32. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/brief.py +82 -31
  33. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/candidate_reviewer.py +117 -43
  34. threadkeeper-0.9.1/threadkeeper/config.py +397 -0
  35. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/db.py +27 -0
  36. threadkeeper-0.9.1/threadkeeper/dialectic_miner.py +450 -0
  37. threadkeeper-0.9.1/threadkeeper/dialectic_validator.py +556 -0
  38. threadkeeper-0.9.1/threadkeeper/evolve_applier.py +709 -0
  39. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/identity.py +39 -0
  40. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/ingest.py +114 -1
  41. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/lessons.py +20 -0
  42. threadkeeper-0.9.1/threadkeeper/menubar_app.py +244 -0
  43. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/server.py +6 -0
  44. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/shadow_review.py +50 -12
  45. threadkeeper-0.9.1/threadkeeper/spawn_config.py +95 -0
  46. threadkeeper-0.9.1/threadkeeper/tools/agent_status.py +19 -0
  47. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/dialectic.py +67 -0
  48. threadkeeper-0.9.1/threadkeeper/tools/dialectic_feed.py +119 -0
  49. threadkeeper-0.9.1/threadkeeper/tools/evolve_applier.py +163 -0
  50. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/extract.py +34 -6
  51. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/graph.py +1 -1
  52. threadkeeper-0.9.1/threadkeeper/tools/lessons.py +217 -0
  53. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/skills.py +24 -11
  54. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/spawn.py +101 -20
  55. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/threads.py +11 -2
  56. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/PKG-INFO +133 -35
  57. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/SOURCES.txt +24 -0
  58. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/entry_points.txt +1 -0
  59. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/requires.txt +2 -0
  60. threadkeeper-0.8.1/threadkeeper/config.py +0 -393
  61. threadkeeper-0.8.1/threadkeeper/spawn_config.py +0 -203
  62. threadkeeper-0.8.1/threadkeeper/tools/lessons.py +0 -110
  63. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/LICENSE +0 -0
  64. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/setup.cfg +0 -0
  65. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_brief_sections.py +0 -0
  66. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_core_memory.py +0 -0
  67. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_curator.py +0 -0
  68. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_dashboard.py +0 -0
  69. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_delegated_search.py +0 -0
  70. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_dialectic.py +0 -0
  71. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_dialectic_tier.py +0 -0
  72. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_error_paths.py +0 -0
  73. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_evolve_daemon.py +0 -0
  74. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_i18n_multilang.py +0 -0
  75. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_identity.py +0 -0
  76. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_missed_spawns.py +0 -0
  77. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_nudges.py +0 -0
  78. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_onnx_embeddings.py +0 -0
  79. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_panel.py +0 -0
  80. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_probe_daemon.py +0 -0
  81. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_process_health.py +0 -0
  82. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_search_fts_punctuation.py +0 -0
  83. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skill_hint.py +0 -0
  84. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skill_tier.py +0 -0
  85. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skill_use_parser.py +0 -0
  86. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_skill_watcher.py +0 -0
  87. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_spawn_budget.py +0 -0
  88. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_spawn_hint.py +0 -0
  89. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_spawn_reap.py +0 -0
  90. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_spawn_slim.py +0 -0
  91. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_thread_janitor.py +0 -0
  92. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_threads.py +0 -0
  93. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_tools_smoke.py +0 -0
  94. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_validate_threads.py +0 -0
  95. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/tests/test_vec_search.py +0 -0
  96. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/__init__.py +0 -0
  97. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/_mcp.py +0 -0
  98. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/__init__.py +0 -0
  99. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/_hook_helpers.py +0 -0
  100. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/base.py +0 -0
  101. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/claude_code.py +0 -0
  102. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/claude_desktop.py +0 -0
  103. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/copilot.py +0 -0
  104. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/gemini.py +0 -0
  105. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/adapters/vscode.py +0 -0
  106. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/curator.py +0 -0
  107. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/embeddings.py +0 -0
  108. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/evolve_daemon.py +0 -0
  109. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/extract_daemon.py +0 -0
  110. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/helpers.py +0 -0
  111. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/i18n.py +0 -0
  112. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/memory_guard.py +0 -0
  113. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/migrate_embeddings.py +0 -0
  114. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/nudges.py +0 -0
  115. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/probe_daemon.py +0 -0
  116. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/process_health.py +0 -0
  117. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/review_prompts.py +0 -0
  118. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/search_proxy.py +0 -0
  119. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/skill_watcher.py +0 -0
  120. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/spawn_budget.py +0 -0
  121. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/thread_janitor.py +0 -0
  122. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/__init__.py +0 -0
  123. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/candidate_reviewer.py +0 -0
  124. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/concepts.py +0 -0
  125. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/consolidate.py +0 -0
  126. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/core_memory.py +0 -0
  127. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/correlation.py +0 -0
  128. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/curator.py +0 -0
  129. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/dashboard.py +0 -0
  130. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/dialog.py +0 -0
  131. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/distill.py +0 -0
  132. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/invariants.py +0 -0
  133. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/memory_guard.py +0 -0
  134. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/missed_spawns.py +0 -0
  135. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/panel.py +0 -0
  136. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/peers.py +0 -0
  137. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/pickup.py +0 -0
  138. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/probes.py +0 -0
  139. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/process_health.py +0 -0
  140. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/session.py +0 -0
  141. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/shadow_review.py +0 -0
  142. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/style.py +0 -0
  143. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper/tools/validate.py +0 -0
  144. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/dependency_links.txt +0 -0
  145. {threadkeeper-0.8.1 → threadkeeper-0.9.1}/threadkeeper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: threadkeeper
3
- Version: 0.8.1
3
+ Version: 0.9.1
4
4
  Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
5
5
  Author: thread-keeper contributors
6
6
  License: MIT
@@ -23,6 +23,8 @@ Requires-Python: >=3.11
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: mcp>=1.0.0
26
+ Requires-Dist: pydantic>=2
27
+ Requires-Dist: pydantic-settings>=2
26
28
  Provides-Extra: semantic
27
29
  Requires-Dist: fastembed>=0.3; extra == "semantic"
28
30
  Requires-Dist: numpy>=1.24.0; extra == "semantic"
@@ -198,6 +200,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
198
200
  (3 GB default). Slim children that need semantic search delegate to the
199
201
  parent via `search_via_parent` — no per-child copy of the embedding model.
200
202
 
203
+ `tk-agent-status` exposes autonomous learning loop status as structured JSON
204
+ or compact text for external monitors:
205
+
206
+ ```sh
207
+ tk-agent-status
208
+ tk-agent-status --json
209
+ ```
210
+
211
+ `apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
212
+ command every 5 seconds and shows every autonomous learning loop: enabled/off,
213
+ running/idle/ready, last pass, backlog, and active child RSS when that loop has
214
+ spawned a worker. Active loops are sorted first (`running`, then `ready`), so
215
+ background work stays at the top of the panel. The app also requests macOS
216
+ notification permission and sends a notification when a newly completed
217
+ autonomous child task produces a useful result in `recent_results`; the first
218
+ poll only marks existing results as seen, so old completions do not spam
219
+ notifications. Probe backlog is due objective probes only, not every registered
220
+ probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
221
+ macOS, `python -m threadkeeper.server` automatically installs and launches it
222
+ on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
223
+ behavior.
224
+
225
+ Manual fallback:
226
+
227
+ ```sh
228
+ cd apps/macos-agent-status
229
+ ./build.sh
230
+ open build/ThreadKeeperAgentStatus.app
231
+ ```
232
+
201
233
  ### Learning loops
202
234
 
203
235
  Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
@@ -252,7 +284,9 @@ shows agents focused on their primary task rarely do).
252
284
  | 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
253
285
  | 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
254
286
  | 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
255
- | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md (advisory) or direct PATCH/PRUNE/CONSOLIDATE |
287
+ | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
288
+ | 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
289
+ | 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
256
290
 
257
291
  All five write into the universal Skill format (`SKILL.md` under each
258
292
  known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
@@ -291,6 +325,10 @@ marked as spawned/background processes, so they cannot start their own shadow
291
325
  daemon even if a CLI drops the no-embeddings env. Idempotent through
292
326
  `events.kind='shadow_review_pass'`.
293
327
 
328
+ Before writing memory, the observer now checks existing lessons/skills and
329
+ prefers patching broad skills. Shadow-origin `lesson_append` is a compact
330
+ fallback only: oversized bodies and near-duplicate slugs are rejected.
331
+
294
332
  #### 3. Extract daemon
295
333
 
296
334
  Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
@@ -327,7 +365,10 @@ Hard limits: max 2 new skills per pass, `[PROTECTED]` (pinned +
327
365
  foreground-authored) skills off-limits. Closes the gap between
328
366
  heuristic harvest and SKILL.md materialization — previously pending
329
367
  candidates accumulated indefinitely waiting for an agent to call
330
- `accept_candidate()` manually.
368
+ `accept_candidate()` manually. The loop is machine-wide single-flight:
369
+ while one reviewer child is running, other foreground servers/ticks report
370
+ `candidate_review_running` instead of spawning another child for the same
371
+ queue.
331
372
 
332
373
  #### 5. Autonomous Curator
333
374
 
@@ -339,9 +380,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
339
380
  entries are marked `[PROTECTED]` in the inventory so the curator
340
381
  never proposes destructive changes against them.
341
382
 
342
- Phase 1 is advisory-only (REPORT only); flip
343
- `THREADKEEPER_CURATOR_DESTRUCTIVE=1` once trust builds to let the
344
- child apply its own recommendations directly.
383
+ Curator itself stays advisory-only by default. The existing Evolve applier is
384
+ the apply worker: on its next pass it first looks for the latest complete
385
+ Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
386
+ spawns an `evolve_applier` child to apply only safe, still-current memory
387
+ maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
388
+ touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
389
+ the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
390
+ prevents replaying the same report.
391
+
392
+ #### 6. Evolve applier — code evolution + curator report apply
393
+
394
+ The brief format is not fixed: any session can file a change to it with
395
+ `evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
396
+ the queue and **promotes** the good ones — promoted suggestions surface in the
397
+ brief with a ★. Until now that's where it stopped: a human had to hand-edit
398
+ `render_brief` in `brief.py`.
399
+
400
+ `evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
401
+ (resolved through the normal spawn role/model config — recommend opus, it
402
+ writes code) that:
403
+
404
+ 1. edits `render_brief()` to implement the suggestion;
405
+ 2. adds/extends a **golden brief test** asserting both that the new
406
+ behavior/field appears *and* that the existing brief sections still render —
407
+ a format change can't silently break the brief;
408
+ 3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
409
+ 4. opens a **pull request** on a feature branch via `gh`, body quoting the
410
+ suggestion + rationale. The generated commit and PR title use the repo's
411
+ allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
412
+ `evolve:` label.
413
+
414
+ **Autonomy is the PR gate, nothing more.** The child never pushes or commits to
415
+ `main` (which has branch protection); a human reviews and merges. On a
416
+ successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
417
+ sets `applied=1` so the suggestion stops resurfacing. Validation inside the
418
+ child (golden render_brief test + full suite green) is the objective gate the
419
+ loop otherwise lacks.
420
+
421
+ The same applier role also drains Curator reports. `evolve_apply_curator_report`
422
+ manually applies the latest complete report, or a specific report path. This
423
+ path does **not** edit code or open a PR; it uses memory MCP tools only and
424
+ marks the report applied with `evolve_mark_curator_report_applied(...)`.
425
+
426
+ Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
427
+ set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
428
+ apply the latest complete Curator report first, then implement the oldest
429
+ promoted+unapplied suggestion. Pin the agent/model with
430
+ `THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
431
+ `THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
432
+ a time, enforced by a short dispatch file lock plus running-task detection)
433
+ keeps code edits and memory maintenance from colliding.
345
434
 
346
435
  #### Honest take
347
436
 
@@ -430,7 +519,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
430
519
  | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
431
520
  | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
432
521
  | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
522
+ | `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
523
+ | `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
433
524
  | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
525
+ | `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
434
526
  | `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
435
527
  | `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
436
528
  | `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
@@ -447,9 +539,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
447
539
  | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
448
540
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
449
541
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
450
-
451
- Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
452
- the equivalent env section in each CLI's config. Hot-config reload is
542
+ | `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
543
+ | `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
544
+ | `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
545
+ | `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
546
+ | `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
547
+ | `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
548
+ | `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
549
+
550
+ Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
551
+ read via pydantic-settings; real environment variables still override it.
552
+ Hot-config reload is
453
553
  [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
454
554
 
455
555
  ### Per-loop agent dispatch
@@ -460,36 +560,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
460
560
  the server lifetime. The MCP tool `spawn_status()` shows the live
461
561
  resolution table.
462
562
 
463
- Override per role via `~/.threadkeeper/spawn.toml`:
464
-
465
- ```toml
466
- [default]
467
- agent = "auto" # "auto" = use active CLI (default)
468
-
469
- [loops]
470
- # Force specific roles to specific CLIs regardless of active host
471
- shadow_observer = "claude" # heaviest reasoning → keep on Claude
472
- curator = "codex" # weekly audit → Codex is fine
473
- candidate_reviewer = "auto" # follow active CLI
474
- archivist = "claude" # close_thread auto-review
475
- extract = "auto" # this one is local (no spawn)
476
-
477
- [models]
478
- # Optional per-CLI model pin — overrides each CLI's own default
479
- claude = "opus"
480
- codex = "gpt-5.4"
481
- gemini = "gemini-2.5-pro"
482
- ```
483
-
484
- Or via env (highest priority, overrides the TOML):
563
+ Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
564
+ all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
565
+ keys are lowercased:
485
566
 
486
567
  ```bash
487
- export THREADKEEPER_SPAWN_DEFAULT=codex # global default
488
- export THREADKEEPER_SPAWN_LOOP_CURATOR=gemini # per-role
489
- export THREADKEEPER_SPAWN_MODEL_CLAUDE=opus # per-CLI model
490
- export THREADKEEPER_ACTIVE_CLI=claude # force detection
568
+ # default agent for roles with no explicit pin ("" / unset = use the active CLI)
569
+ THREADKEEPER_SPAWN__DEFAULT=claude
570
+ # per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
571
+ THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
572
+ THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
573
+ THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
574
+ # model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
575
+ THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
576
+ THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
491
577
  ```
492
578
 
579
+ Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
580
+ `claude`; `"auto"` (or unset) defers to the active CLI. Real environment
581
+ variables override the `.env`. Force host detection with
582
+ `THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
583
+
493
584
  Adapters without headless support (Claude Desktop, VS Code) can't be
494
585
  spawn targets — `spawn_status()` reports them as "no adapter" and any
495
586
  override pointing at them falls back to the next priority level.
@@ -534,6 +625,13 @@ them with `dry_run=False` to apply:
534
625
  a loop firing constantly while its outcomes stay flat, or a queue
535
626
  backing up. Complements the per-loop `*_status` tools (`mp_health`,
536
627
  `spawn_budget_status`, `shadow_review_status`).
628
+ - **`agent_status(json_output=False, refresh=True)`** — autonomous learning
629
+ loop status, shaped for UI clients. Shows every loop's enabled/running/ready
630
+ state, last pass, backlog, and active spawned-child RSS; running child agents
631
+ are included as detail rows in the JSON. The JSON also includes
632
+ `recent_results` for useful completed loop tasks, which the macOS menu-bar app
633
+ uses for notifications. The `tk-agent-status` console command and macOS
634
+ menu-bar app use the same underlying snapshot.
537
635
 
538
636
  ---
539
637
 
@@ -159,6 +159,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
159
159
  (3 GB default). Slim children that need semantic search delegate to the
160
160
  parent via `search_via_parent` — no per-child copy of the embedding model.
161
161
 
162
+ `tk-agent-status` exposes autonomous learning loop status as structured JSON
163
+ or compact text for external monitors:
164
+
165
+ ```sh
166
+ tk-agent-status
167
+ tk-agent-status --json
168
+ ```
169
+
170
+ `apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
171
+ command every 5 seconds and shows every autonomous learning loop: enabled/off,
172
+ running/idle/ready, last pass, backlog, and active child RSS when that loop has
173
+ spawned a worker. Active loops are sorted first (`running`, then `ready`), so
174
+ background work stays at the top of the panel. The app also requests macOS
175
+ notification permission and sends a notification when a newly completed
176
+ autonomous child task produces a useful result in `recent_results`; the first
177
+ poll only marks existing results as seen, so old completions do not spam
178
+ notifications. Probe backlog is due objective probes only, not every registered
179
+ probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
180
+ macOS, `python -m threadkeeper.server` automatically installs and launches it
181
+ on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
182
+ behavior.
183
+
184
+ Manual fallback:
185
+
186
+ ```sh
187
+ cd apps/macos-agent-status
188
+ ./build.sh
189
+ open build/ThreadKeeperAgentStatus.app
190
+ ```
191
+
162
192
  ### Learning loops
163
193
 
164
194
  Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
@@ -213,7 +243,9 @@ shows agents focused on their primary task rarely do).
213
243
  | 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
214
244
  | 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
215
245
  | 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
216
- | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md (advisory) or direct PATCH/PRUNE/CONSOLIDATE |
246
+ | 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
247
+ | 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
248
+ | 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
217
249
 
218
250
  All five write into the universal Skill format (`SKILL.md` under each
219
251
  known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
@@ -252,6 +284,10 @@ marked as spawned/background processes, so they cannot start their own shadow
252
284
  daemon even if a CLI drops the no-embeddings env. Idempotent through
253
285
  `events.kind='shadow_review_pass'`.
254
286
 
287
+ Before writing memory, the observer now checks existing lessons/skills and
288
+ prefers patching broad skills. Shadow-origin `lesson_append` is a compact
289
+ fallback only: oversized bodies and near-duplicate slugs are rejected.
290
+
255
291
  #### 3. Extract daemon
256
292
 
257
293
  Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
@@ -288,7 +324,10 @@ Hard limits: max 2 new skills per pass, `[PROTECTED]` (pinned +
288
324
  foreground-authored) skills off-limits. Closes the gap between
289
325
  heuristic harvest and SKILL.md materialization — previously pending
290
326
  candidates accumulated indefinitely waiting for an agent to call
291
- `accept_candidate()` manually.
327
+ `accept_candidate()` manually. The loop is machine-wide single-flight:
328
+ while one reviewer child is running, other foreground servers/ticks report
329
+ `candidate_review_running` instead of spawning another child for the same
330
+ queue.
292
331
 
293
332
  #### 5. Autonomous Curator
294
333
 
@@ -300,9 +339,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
300
339
  entries are marked `[PROTECTED]` in the inventory so the curator
301
340
  never proposes destructive changes against them.
302
341
 
303
- Phase 1 is advisory-only (REPORT only); flip
304
- `THREADKEEPER_CURATOR_DESTRUCTIVE=1` once trust builds to let the
305
- child apply its own recommendations directly.
342
+ Curator itself stays advisory-only by default. The existing Evolve applier is
343
+ the apply worker: on its next pass it first looks for the latest complete
344
+ Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
345
+ spawns an `evolve_applier` child to apply only safe, still-current memory
346
+ maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
347
+ touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
348
+ the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
349
+ prevents replaying the same report.
350
+
351
+ #### 6. Evolve applier — code evolution + curator report apply
352
+
353
+ The brief format is not fixed: any session can file a change to it with
354
+ `evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
355
+ the queue and **promotes** the good ones — promoted suggestions surface in the
356
+ brief with a ★. Until now that's where it stopped: a human had to hand-edit
357
+ `render_brief` in `brief.py`.
358
+
359
+ `evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
360
+ (resolved through the normal spawn role/model config — recommend opus, it
361
+ writes code) that:
362
+
363
+ 1. edits `render_brief()` to implement the suggestion;
364
+ 2. adds/extends a **golden brief test** asserting both that the new
365
+ behavior/field appears *and* that the existing brief sections still render —
366
+ a format change can't silently break the brief;
367
+ 3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
368
+ 4. opens a **pull request** on a feature branch via `gh`, body quoting the
369
+ suggestion + rationale. The generated commit and PR title use the repo's
370
+ allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
371
+ `evolve:` label.
372
+
373
+ **Autonomy is the PR gate, nothing more.** The child never pushes or commits to
374
+ `main` (which has branch protection); a human reviews and merges. On a
375
+ successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
376
+ sets `applied=1` so the suggestion stops resurfacing. Validation inside the
377
+ child (golden render_brief test + full suite green) is the objective gate the
378
+ loop otherwise lacks.
379
+
380
+ The same applier role also drains Curator reports. `evolve_apply_curator_report`
381
+ manually applies the latest complete report, or a specific report path. This
382
+ path does **not** edit code or open a PR; it uses memory MCP tools only and
383
+ marks the report applied with `evolve_mark_curator_report_applied(...)`.
384
+
385
+ Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
386
+ set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
387
+ apply the latest complete Curator report first, then implement the oldest
388
+ promoted+unapplied suggestion. Pin the agent/model with
389
+ `THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
390
+ `THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
391
+ a time, enforced by a short dispatch file lock plus running-task detection)
392
+ keeps code edits and memory maintenance from colliding.
306
393
 
307
394
  #### Honest take
308
395
 
@@ -391,7 +478,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
391
478
  | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
392
479
  | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
393
480
  | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
481
+ | `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
482
+ | `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
394
483
  | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
484
+ | `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
395
485
  | `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
396
486
  | `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
397
487
  | `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
@@ -408,9 +498,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
408
498
  | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
409
499
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
410
500
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
411
-
412
- Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
413
- the equivalent env section in each CLI's config. Hot-config reload is
501
+ | `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
502
+ | `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
503
+ | `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
504
+ | `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
505
+ | `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
506
+ | `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
507
+ | `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
508
+
509
+ Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
510
+ read via pydantic-settings; real environment variables still override it.
511
+ Hot-config reload is
414
512
  [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
415
513
 
416
514
  ### Per-loop agent dispatch
@@ -421,36 +519,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
421
519
  the server lifetime. The MCP tool `spawn_status()` shows the live
422
520
  resolution table.
423
521
 
424
- Override per role via `~/.threadkeeper/spawn.toml`:
425
-
426
- ```toml
427
- [default]
428
- agent = "auto" # "auto" = use active CLI (default)
429
-
430
- [loops]
431
- # Force specific roles to specific CLIs regardless of active host
432
- shadow_observer = "claude" # heaviest reasoning → keep on Claude
433
- curator = "codex" # weekly audit → Codex is fine
434
- candidate_reviewer = "auto" # follow active CLI
435
- archivist = "claude" # close_thread auto-review
436
- extract = "auto" # this one is local (no spawn)
437
-
438
- [models]
439
- # Optional per-CLI model pin — overrides each CLI's own default
440
- claude = "opus"
441
- codex = "gpt-5.4"
442
- gemini = "gemini-2.5-pro"
443
- ```
444
-
445
- Or via env (highest priority, overrides the TOML):
522
+ Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
523
+ all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
524
+ keys are lowercased:
446
525
 
447
526
  ```bash
448
- export THREADKEEPER_SPAWN_DEFAULT=codex # global default
449
- export THREADKEEPER_SPAWN_LOOP_CURATOR=gemini # per-role
450
- export THREADKEEPER_SPAWN_MODEL_CLAUDE=opus # per-CLI model
451
- export THREADKEEPER_ACTIVE_CLI=claude # force detection
527
+ # default agent for roles with no explicit pin ("" / unset = use the active CLI)
528
+ THREADKEEPER_SPAWN__DEFAULT=claude
529
+ # per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
530
+ THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
531
+ THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
532
+ THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
533
+ # model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
534
+ THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
535
+ THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
452
536
  ```
453
537
 
538
+ Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
539
+ `claude`; `"auto"` (or unset) defers to the active CLI. Real environment
540
+ variables override the `.env`. Force host detection with
541
+ `THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
542
+
454
543
  Adapters without headless support (Claude Desktop, VS Code) can't be
455
544
  spawn targets — `spawn_status()` reports them as "no adapter" and any
456
545
  override pointing at them falls back to the next priority level.
@@ -495,6 +584,13 @@ them with `dry_run=False` to apply:
495
584
  a loop firing constantly while its outcomes stay flat, or a queue
496
585
  backing up. Complements the per-loop `*_status` tools (`mp_health`,
497
586
  `spawn_budget_status`, `shadow_review_status`).
587
+ - **`agent_status(json_output=False, refresh=True)`** — autonomous learning
588
+ loop status, shaped for UI clients. Shows every loop's enabled/running/ready
589
+ state, last pass, backlog, and active spawned-child RSS; running child agents
590
+ are included as detail rows in the JSON. The JSON also includes
591
+ `recent_results` for useful completed loop tasks, which the macOS menu-bar app
592
+ uses for notifications. The `tk-agent-status` console command and macOS
593
+ menu-bar app use the same underlying snapshot.
498
594
 
499
595
  ---
500
596
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "threadkeeper"
7
- version = "0.8.1"
7
+ version = "0.9.1"
8
8
  description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "thread-keeper contributors" }]
@@ -27,6 +27,8 @@ classifiers = [
27
27
  ]
28
28
  dependencies = [
29
29
  "mcp>=1.0.0",
30
+ "pydantic>=2",
31
+ "pydantic-settings>=2",
30
32
  ]
31
33
 
32
34
  [project.optional-dependencies]
@@ -69,6 +71,8 @@ thread-keeper-setup = "threadkeeper._setup:main"
69
71
  # Recompute stored embeddings with the active backend (e.g. after switching to
70
72
  # the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
71
73
  tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
74
+ # JSON/text status feed for menu-bar widgets and terminal monitors.
75
+ tk-agent-status = "threadkeeper.agent_status:main"
72
76
 
73
77
  [tool.setuptools.packages.find]
74
78
  include = ["threadkeeper*"]
@@ -324,6 +324,10 @@ def test_codex_register_mcp_writes_toml(tmp_path, monkeypatch):
324
324
  assert '"/opt/python"' in body
325
325
  assert "[mcp_servers.thread-keeper.env]" in body
326
326
  assert '"/repo"' in body
327
+ assert "[mcp_servers.thread-keeper.tools.dialectic_claim]" in body
328
+ assert "[mcp_servers.thread-keeper.tools.dialectic_observation_resolve]" in body
329
+ assert "[mcp_servers.thread-keeper.tools.accept_candidate]" in body
330
+ assert 'approval_mode = "approve"' in body
327
331
 
328
332
 
329
333
  def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
@@ -350,6 +354,63 @@ def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
350
354
  assert msgs[1].content == "hello"
351
355
 
352
356
 
357
+ def test_codex_iter_messages_uses_forced_child_cid_from_spawn_preamble(
358
+ tmp_path, monkeypatch,
359
+ ):
360
+ pkg = _bootstrap(tmp_path, monkeypatch)
361
+ fp = tmp_path / "rollout-2026-06-11T10-00-00.jsonl"
362
+ forced_cid = "af389b3f-8e17-46b5-87f1-402769a74e58"
363
+ fp.write_text("\n".join([
364
+ json.dumps({
365
+ "timestamp": "2026-06-11T10:00:00Z",
366
+ "type": "session_meta",
367
+ "payload": {"id": "019eb5d0-6753-7c31-bce6-b887761090c6", "cwd": "/x"},
368
+ }),
369
+ json.dumps({
370
+ "timestamp": "2026-06-11T10:00:01Z",
371
+ "type": "response_item",
372
+ "payload": {
373
+ "type": "message",
374
+ "role": "user",
375
+ "id": "u-agents",
376
+ "content": [{"type": "input_text", "text": "# AGENTS.md instructions"}],
377
+ },
378
+ }),
379
+ json.dumps({
380
+ "timestamp": "2026-06-11T10:00:02Z",
381
+ "type": "response_item",
382
+ "payload": {
383
+ "type": "message",
384
+ "role": "user",
385
+ "id": "u-spawn",
386
+ "content": [{
387
+ "type": "input_text",
388
+ "text": (
389
+ "You were spawned in the background by parent conversation "
390
+ "8877cab4-1f45-4d05-9a1c-09c6ab28adf1. "
391
+ f"Your own cid is {forced_cid} (forced via --session-id "
392
+ "and THREADKEEPER_FORCE_CID env)."
393
+ ),
394
+ }],
395
+ },
396
+ }),
397
+ json.dumps({
398
+ "timestamp": "2026-06-11T10:00:03Z",
399
+ "type": "response_item",
400
+ "payload": {
401
+ "type": "message",
402
+ "role": "assistant",
403
+ "id": "a-1",
404
+ "content": [{"type": "output_text", "text": "processed"}],
405
+ },
406
+ }),
407
+ ]) + "\n")
408
+
409
+ msgs = list(pkg["codex"].iter_messages(fp))
410
+ assert [m.uuid for m in msgs] == ["u-agents", "u-spawn", "a-1"]
411
+ assert {m.session_id for m in msgs} == {forced_cid}
412
+
413
+
353
414
  # ---------------------------------------------------------------------
354
415
  # Gemini
355
416
  # ---------------------------------------------------------------------