threadkeeper 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/PKG-INFO +150 -37
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/README.md +147 -36
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/pyproject.toml +5 -1
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_adapters.py +61 -0
- threadkeeper-0.9.0/tests/test_agent_status.py +314 -0
- threadkeeper-0.9.0/tests/test_brief_footprint.py +135 -0
- threadkeeper-0.9.0/tests/test_config_settings.py +171 -0
- threadkeeper-0.9.0/tests/test_dashboard.py +123 -0
- threadkeeper-0.9.0/tests/test_dialectic_feed_tools.py +65 -0
- threadkeeper-0.9.0/tests/test_dialectic_miner.py +307 -0
- threadkeeper-0.9.0/tests/test_dialectic_observation_resolve.py +56 -0
- threadkeeper-0.9.0/tests/test_dialectic_recompute.py +121 -0
- threadkeeper-0.9.0/tests/test_dialectic_validator.py +621 -0
- threadkeeper-0.9.0/tests/test_evolve_applier.py +445 -0
- threadkeeper-0.9.0/tests/test_evolve_apply_2.py +105 -0
- threadkeeper-0.9.0/tests/test_evolve_apply_3.py +144 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_extract_daemon.py +165 -0
- threadkeeper-0.9.0/tests/test_extract_dedup.py +134 -0
- threadkeeper-0.9.0/tests/test_ingest_status.py +29 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_lessons.py +78 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_memory_guard.py +30 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_shadow_review.py +44 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_passive_tier.py +86 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skills.py +30 -2
- threadkeeper-0.9.0/tests/test_spawn_codex_stdin.py +102 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_config.py +118 -139
- threadkeeper-0.9.0/tests/test_spawn_wrap.py +250 -0
- threadkeeper-0.9.0/tests/test_thread_janitor.py +180 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/_setup.py +6 -4
- threadkeeper-0.9.0/threadkeeper/_spawn_wrap.py +128 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/codex.py +117 -5
- threadkeeper-0.9.0/threadkeeper/agent_status.py +754 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/brief.py +82 -31
- threadkeeper-0.9.0/threadkeeper/config.py +397 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/db.py +27 -0
- threadkeeper-0.9.0/threadkeeper/dialectic_miner.py +450 -0
- threadkeeper-0.9.0/threadkeeper/dialectic_validator.py +556 -0
- threadkeeper-0.9.0/threadkeeper/evolve_applier.py +709 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/identity.py +44 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/ingest.py +114 -1
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/lessons.py +20 -0
- threadkeeper-0.9.0/threadkeeper/menubar_app.py +244 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/server.py +7 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/shadow_review.py +50 -12
- threadkeeper-0.9.0/threadkeeper/spawn_config.py +95 -0
- threadkeeper-0.9.0/threadkeeper/thread_janitor.py +137 -0
- threadkeeper-0.9.0/threadkeeper/tools/agent_status.py +19 -0
- threadkeeper-0.9.0/threadkeeper/tools/dashboard.py +220 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/dialectic.py +67 -0
- threadkeeper-0.9.0/threadkeeper/tools/dialectic_feed.py +119 -0
- threadkeeper-0.9.0/threadkeeper/tools/evolve_applier.py +163 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/extract.py +45 -6
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/graph.py +1 -1
- threadkeeper-0.9.0/threadkeeper/tools/lessons.py +217 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/skills.py +24 -11
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/spawn.py +101 -20
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/threads.py +20 -4
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/PKG-INFO +150 -37
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/SOURCES.txt +28 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/entry_points.txt +1 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/requires.txt +2 -0
- threadkeeper-0.8.0/threadkeeper/config.py +0 -374
- threadkeeper-0.8.0/threadkeeper/spawn_config.py +0 -203
- threadkeeper-0.8.0/threadkeeper/tools/lessons.py +0 -110
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/LICENSE +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/setup.cfg +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_brief_sections.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_core_memory.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_delegated_search.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_dialectic.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_dialectic_tier.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_error_paths.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_evolve_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_i18n_multilang.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_identity.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_missed_spawns.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_nudges.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_onnx_embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_panel.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_probe_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_search_fts_punctuation.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_hint.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_tier.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_use_parser.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_skill_watcher.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_budget.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_hint.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_reap.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_spawn_slim.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_threads.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_tools_smoke.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_validate_threads.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/tests/test_vec_search.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/_mcp.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/_hook_helpers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/base.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/claude_code.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/claude_desktop.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/copilot.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/gemini.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/adapters/vscode.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/evolve_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/extract_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/helpers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/i18n.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/memory_guard.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/migrate_embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/nudges.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/probe_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/review_prompts.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/search_proxy.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/skill_watcher.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/spawn_budget.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/concepts.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/consolidate.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/core_memory.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/correlation.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/dialog.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/distill.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/invariants.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/memory_guard.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/missed_spawns.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/panel.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/peers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/pickup.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/probes.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/session.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/shadow_review.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/style.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper/tools/validate.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/dependency_links.txt +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.9.0}/threadkeeper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: threadkeeper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
|
|
5
5
|
Author: thread-keeper contributors
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,8 @@ Requires-Python: >=3.11
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: mcp>=1.0.0
|
|
26
|
+
Requires-Dist: pydantic>=2
|
|
27
|
+
Requires-Dist: pydantic-settings>=2
|
|
26
28
|
Provides-Extra: semantic
|
|
27
29
|
Requires-Dist: fastembed>=0.3; extra == "semantic"
|
|
28
30
|
Requires-Dist: numpy>=1.24.0; extra == "semantic"
|
|
@@ -82,10 +84,12 @@ make it more than a memory store:
|
|
|
82
84
|
concurrent sessions signal each other across CLIs. Parent /
|
|
83
85
|
children / sibling agents become a coordinated swarm, not isolated
|
|
84
86
|
chats.
|
|
85
|
-
- **Self-improving skill library** —
|
|
87
|
+
- **Self-improving skill library** — autonomous background loops
|
|
86
88
|
(auto-review on thread close, shadow-review daemon, extract
|
|
87
|
-
harvester, candidate-reviewer, weekly Curator
|
|
88
|
-
|
|
89
|
+
harvester, candidate-reviewer, weekly Curator, and a thread-janitor
|
|
90
|
+
that auto-closes idle threads so abandoned work reaches the harvest
|
|
91
|
+
path — closing is reversible, a note reopens a closed thread)
|
|
92
|
+
materialize class-level skills as the agents work. Adapted to multi-CLI:
|
|
89
93
|
SKILL.md is the primary write target and gets mirrored to every
|
|
90
94
|
known/configured skills root simultaneously (`~/.claude/skills/`,
|
|
91
95
|
`~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
|
|
@@ -196,6 +200,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
|
|
|
196
200
|
(3 GB default). Slim children that need semantic search delegate to the
|
|
197
201
|
parent via `search_via_parent` — no per-child copy of the embedding model.
|
|
198
202
|
|
|
203
|
+
`tk-agent-status` exposes autonomous learning loop status as structured JSON
|
|
204
|
+
or compact text for external monitors:
|
|
205
|
+
|
|
206
|
+
```sh
|
|
207
|
+
tk-agent-status
|
|
208
|
+
tk-agent-status --json
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
`apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
|
|
212
|
+
command every 5 seconds and shows every autonomous learning loop: enabled/off,
|
|
213
|
+
running/idle/ready, last pass, backlog, and active child RSS when that loop has
|
|
214
|
+
spawned a worker. Active loops are sorted first (`running`, then `ready`), so
|
|
215
|
+
background work stays at the top of the panel. The app also requests macOS
|
|
216
|
+
notification permission and sends a notification when a newly completed
|
|
217
|
+
autonomous child task produces a useful result in `recent_results`; the first
|
|
218
|
+
poll only marks existing results as seen, so old completions do not spam
|
|
219
|
+
notifications. Probe backlog is due objective probes only, not every registered
|
|
220
|
+
probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
|
|
221
|
+
macOS, `python -m threadkeeper.server` automatically installs and launches it
|
|
222
|
+
on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
|
|
223
|
+
behavior.
|
|
224
|
+
|
|
225
|
+
Manual fallback:
|
|
226
|
+
|
|
227
|
+
```sh
|
|
228
|
+
cd apps/macos-agent-status
|
|
229
|
+
./build.sh
|
|
230
|
+
open build/ThreadKeeperAgentStatus.app
|
|
231
|
+
```
|
|
232
|
+
|
|
199
233
|
### Learning loops
|
|
200
234
|
|
|
201
235
|
Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
|
|
@@ -250,7 +284,9 @@ shows agents focused on their primary task rarely do).
|
|
|
250
284
|
| 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
|
|
251
285
|
| 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
|
|
252
286
|
| 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
|
|
253
|
-
| 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md
|
|
287
|
+
| 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
|
|
288
|
+
| 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
|
|
289
|
+
| 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
|
|
254
290
|
|
|
255
291
|
All five write into the universal Skill format (`SKILL.md` under each
|
|
256
292
|
known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
|
|
@@ -289,6 +325,10 @@ marked as spawned/background processes, so they cannot start their own shadow
|
|
|
289
325
|
daemon even if a CLI drops the no-embeddings env. Idempotent through
|
|
290
326
|
`events.kind='shadow_review_pass'`.
|
|
291
327
|
|
|
328
|
+
Before writing memory, the observer now checks existing lessons/skills and
|
|
329
|
+
prefers patching broad skills. Shadow-origin `lesson_append` is a compact
|
|
330
|
+
fallback only: oversized bodies and near-duplicate slugs are rejected.
|
|
331
|
+
|
|
292
332
|
#### 3. Extract daemon
|
|
293
333
|
|
|
294
334
|
Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
|
|
@@ -337,9 +377,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
|
|
|
337
377
|
entries are marked `[PROTECTED]` in the inventory so the curator
|
|
338
378
|
never proposes destructive changes against them.
|
|
339
379
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
380
|
+
Curator itself stays advisory-only by default. The existing Evolve applier is
|
|
381
|
+
the apply worker: on its next pass it first looks for the latest complete
|
|
382
|
+
Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
|
|
383
|
+
spawns an `evolve_applier` child to apply only safe, still-current memory
|
|
384
|
+
maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
|
|
385
|
+
touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
|
|
386
|
+
the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
|
|
387
|
+
prevents replaying the same report.
|
|
388
|
+
|
|
389
|
+
#### 6. Evolve applier — code evolution + curator report apply
|
|
390
|
+
|
|
391
|
+
The brief format is not fixed: any session can file a change to it with
|
|
392
|
+
`evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
|
|
393
|
+
the queue and **promotes** the good ones — promoted suggestions surface in the
|
|
394
|
+
brief with a ★. Until now that's where it stopped: a human had to hand-edit
|
|
395
|
+
`render_brief` in `brief.py`.
|
|
396
|
+
|
|
397
|
+
`evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
|
|
398
|
+
(resolved through the normal spawn role/model config — recommend opus, it
|
|
399
|
+
writes code) that:
|
|
400
|
+
|
|
401
|
+
1. edits `render_brief()` to implement the suggestion;
|
|
402
|
+
2. adds/extends a **golden brief test** asserting both that the new
|
|
403
|
+
behavior/field appears *and* that the existing brief sections still render —
|
|
404
|
+
a format change can't silently break the brief;
|
|
405
|
+
3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
|
|
406
|
+
4. opens a **pull request** on a feature branch via `gh`, body quoting the
|
|
407
|
+
suggestion + rationale. The generated commit and PR title use the repo's
|
|
408
|
+
allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
|
|
409
|
+
`evolve:` label.
|
|
410
|
+
|
|
411
|
+
**Autonomy is the PR gate, nothing more.** The child never pushes or commits to
|
|
412
|
+
`main` (which has branch protection); a human reviews and merges. On a
|
|
413
|
+
successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
|
|
414
|
+
sets `applied=1` so the suggestion stops resurfacing. Validation inside the
|
|
415
|
+
child (golden render_brief test + full suite green) is the objective gate the
|
|
416
|
+
loop otherwise lacks.
|
|
417
|
+
|
|
418
|
+
The same applier role also drains Curator reports. `evolve_apply_curator_report`
|
|
419
|
+
manually applies the latest complete report, or a specific report path. This
|
|
420
|
+
path does **not** edit code or open a PR; it uses memory MCP tools only and
|
|
421
|
+
marks the report applied with `evolve_mark_curator_report_applied(...)`.
|
|
422
|
+
|
|
423
|
+
Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
|
|
424
|
+
set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
|
|
425
|
+
apply the latest complete Curator report first, then implement the oldest
|
|
426
|
+
promoted+unapplied suggestion. Pin the agent/model with
|
|
427
|
+
`THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
|
|
428
|
+
`THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
|
|
429
|
+
a time, enforced by a short dispatch file lock plus running-task detection)
|
|
430
|
+
keeps code edits and memory maintenance from colliding.
|
|
343
431
|
|
|
344
432
|
#### Honest take
|
|
345
433
|
|
|
@@ -428,7 +516,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
428
516
|
| `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
|
|
429
517
|
| `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
|
|
430
518
|
| `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
|
|
519
|
+
| `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
|
|
520
|
+
| `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
|
|
431
521
|
| `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
|
|
522
|
+
| `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
|
|
432
523
|
| `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
|
|
433
524
|
| `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
|
|
434
525
|
| `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
|
|
@@ -445,9 +536,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
445
536
|
| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
|
|
446
537
|
| `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
|
|
447
538
|
| `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
539
|
+
| `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
|
|
540
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
|
|
541
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
|
|
542
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
|
|
543
|
+
| `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
|
|
544
|
+
| `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
|
|
545
|
+
| `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
|
|
546
|
+
|
|
547
|
+
Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
|
|
548
|
+
read via pydantic-settings; real environment variables still override it.
|
|
549
|
+
Hot-config reload is
|
|
451
550
|
[tracked](https://github.com/po4erk91/thread-keeper/issues/2).
|
|
452
551
|
|
|
453
552
|
### Per-loop agent dispatch
|
|
@@ -458,36 +557,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
|
|
|
458
557
|
the server lifetime. The MCP tool `spawn_status()` shows the live
|
|
459
558
|
resolution table.
|
|
460
559
|
|
|
461
|
-
Override per role
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
[default]
|
|
465
|
-
agent = "auto" # "auto" = use active CLI (default)
|
|
466
|
-
|
|
467
|
-
[loops]
|
|
468
|
-
# Force specific roles to specific CLIs regardless of active host
|
|
469
|
-
shadow_observer = "claude" # heaviest reasoning → keep on Claude
|
|
470
|
-
curator = "codex" # weekly audit → Codex is fine
|
|
471
|
-
candidate_reviewer = "auto" # follow active CLI
|
|
472
|
-
archivist = "claude" # close_thread auto-review
|
|
473
|
-
extract = "auto" # this one is local (no spawn)
|
|
474
|
-
|
|
475
|
-
[models]
|
|
476
|
-
# Optional per-CLI model pin — overrides each CLI's own default
|
|
477
|
-
claude = "opus"
|
|
478
|
-
codex = "gpt-5.4"
|
|
479
|
-
gemini = "gemini-2.5-pro"
|
|
480
|
-
```
|
|
481
|
-
|
|
482
|
-
Or via env (highest priority, overrides the TOML):
|
|
560
|
+
Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
|
|
561
|
+
all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
|
|
562
|
+
keys are lowercased:
|
|
483
563
|
|
|
484
564
|
```bash
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
565
|
+
# default agent for roles with no explicit pin ("" / unset = use the active CLI)
|
|
566
|
+
THREADKEEPER_SPAWN__DEFAULT=claude
|
|
567
|
+
# per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
|
|
568
|
+
THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
|
|
569
|
+
THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
|
|
570
|
+
THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
|
|
571
|
+
# model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
|
|
572
|
+
THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
|
|
573
|
+
THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
|
|
489
574
|
```
|
|
490
575
|
|
|
576
|
+
Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
|
|
577
|
+
`claude`; `"auto"` (or unset) defers to the active CLI. Real environment
|
|
578
|
+
variables override the `.env`. Force host detection with
|
|
579
|
+
`THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
|
|
580
|
+
|
|
491
581
|
Adapters without headless support (Claude Desktop, VS Code) can't be
|
|
492
582
|
spawn targets — `spawn_status()` reports them as "no adapter" and any
|
|
493
583
|
override pointing at them falls back to the next priority level.
|
|
@@ -519,6 +609,29 @@ them with `dry_run=False` to apply:
|
|
|
519
609
|
|
|
520
610
|
---
|
|
521
611
|
|
|
612
|
+
## Telemetry
|
|
613
|
+
|
|
614
|
+
- **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
|
|
615
|
+
system, read-only. Three sections: **stores** (threads by state,
|
|
616
|
+
notes/dialog/distill/concepts counts, skills + claims by tier,
|
|
617
|
+
extract-candidate and evolve queues, probe/task counts), **loops**
|
|
618
|
+
(how many times each autonomous daemon fired in the window vs 30 days,
|
|
619
|
+
plus last-fire age), and **outcomes** (what those loops actually
|
|
620
|
+
produced — skills materialized, tier promotions, candidate
|
|
621
|
+
accept-vs-reject rate). Surfaces the gaps the point-tools can't:
|
|
622
|
+
a loop firing constantly while its outcomes stay flat, or a queue
|
|
623
|
+
backing up. Complements the per-loop `*_status` tools (`mp_health`,
|
|
624
|
+
`spawn_budget_status`, `shadow_review_status`).
|
|
625
|
+
- **`agent_status(json_output=False, refresh=True)`** — autonomous learning
|
|
626
|
+
loop status, shaped for UI clients. Shows every loop's enabled/running/ready
|
|
627
|
+
state, last pass, backlog, and active spawned-child RSS; running child agents
|
|
628
|
+
are included as detail rows in the JSON. The JSON also includes
|
|
629
|
+
`recent_results` for useful completed loop tasks, which the macOS menu-bar app
|
|
630
|
+
uses for notifications. The `tk-agent-status` console command and macOS
|
|
631
|
+
menu-bar app use the same underlying snapshot.
|
|
632
|
+
|
|
633
|
+
---
|
|
634
|
+
|
|
522
635
|
## Storage
|
|
523
636
|
|
|
524
637
|
`~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
|
|
@@ -43,10 +43,12 @@ make it more than a memory store:
|
|
|
43
43
|
concurrent sessions signal each other across CLIs. Parent /
|
|
44
44
|
children / sibling agents become a coordinated swarm, not isolated
|
|
45
45
|
chats.
|
|
46
|
-
- **Self-improving skill library** —
|
|
46
|
+
- **Self-improving skill library** — autonomous background loops
|
|
47
47
|
(auto-review on thread close, shadow-review daemon, extract
|
|
48
|
-
harvester, candidate-reviewer, weekly Curator
|
|
49
|
-
|
|
48
|
+
harvester, candidate-reviewer, weekly Curator, and a thread-janitor
|
|
49
|
+
that auto-closes idle threads so abandoned work reaches the harvest
|
|
50
|
+
path — closing is reversible, a note reopens a closed thread)
|
|
51
|
+
materialize class-level skills as the agents work. Adapted to multi-CLI:
|
|
50
52
|
SKILL.md is the primary write target and gets mirrored to every
|
|
51
53
|
known/configured skills root simultaneously (`~/.claude/skills/`,
|
|
52
54
|
`~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
|
|
@@ -157,6 +159,36 @@ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
|
|
|
157
159
|
(3 GB default). Slim children that need semantic search delegate to the
|
|
158
160
|
parent via `search_via_parent` — no per-child copy of the embedding model.
|
|
159
161
|
|
|
162
|
+
`tk-agent-status` exposes autonomous learning loop status as structured JSON
|
|
163
|
+
or compact text for external monitors:
|
|
164
|
+
|
|
165
|
+
```sh
|
|
166
|
+
tk-agent-status
|
|
167
|
+
tk-agent-status --json
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
`apps/macos-agent-status/` contains a small macOS menu-bar app that polls this
|
|
171
|
+
command every 5 seconds and shows every autonomous learning loop: enabled/off,
|
|
172
|
+
running/idle/ready, last pass, backlog, and active child RSS when that loop has
|
|
173
|
+
spawned a worker. Active loops are sorted first (`running`, then `ready`), so
|
|
174
|
+
background work stays at the top of the panel. The app also requests macOS
|
|
175
|
+
notification permission and sends a notification when a newly completed
|
|
176
|
+
autonomous child task produces a useful result in `recent_results`; the first
|
|
177
|
+
poll only marks existing results as seen, so old completions do not spam
|
|
178
|
+
notifications. Probe backlog is due objective probes only, not every registered
|
|
179
|
+
probe, so a healthy cooldown shows `0 due probes` instead of looking stuck. On
|
|
180
|
+
macOS, `python -m threadkeeper.server` automatically installs and launches it
|
|
181
|
+
on MCP startup. Set `THREADKEEPER_MENUBAR_AUTO_LAUNCH=0` to disable that
|
|
182
|
+
behavior.
|
|
183
|
+
|
|
184
|
+
Manual fallback:
|
|
185
|
+
|
|
186
|
+
```sh
|
|
187
|
+
cd apps/macos-agent-status
|
|
188
|
+
./build.sh
|
|
189
|
+
open build/ThreadKeeperAgentStatus.app
|
|
190
|
+
```
|
|
191
|
+
|
|
160
192
|
### Learning loops
|
|
161
193
|
|
|
162
194
|
Five loops turn raw agent dialog into a curated, multi-CLI-mirrored
|
|
@@ -211,7 +243,9 @@ shows agents focused on their primary task rarely do).
|
|
|
211
243
|
| 2 | shadow_review daemon | every 15 min (env knob) | recent `dialog_messages` window | SKILL.md, lessons.md |
|
|
212
244
|
| 3 | extract daemon | every 10 min (env knob) | recent `dialog_messages` window | `extract_candidates` pending queue |
|
|
213
245
|
| 4 | candidate-reviewer daemon | every 1 h (env knob) | pending candidates queue | SKILL.md (create/patch) / notes / verbatim / reject |
|
|
214
|
-
| 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md
|
|
246
|
+
| 5 | Curator daemon | every 7 days (env knob) | every existing lesson + recently-touched skill | REPORT-`<date>`.md; Evolve applier applies the latest complete report |
|
|
247
|
+
| 6 | dialectic_miner daemon | configurable (env knob; 0=off) | recent `dialog_messages` — user replies + preceding-assistant context | `dialectic_observations` buffer |
|
|
248
|
+
| 7 | dialectic_validator daemon | configurable (env knob; 0=off) | buffered `dialectic_observations` | dialectic claims + evidence (support / contradict / supersede) via spawned opus child |
|
|
215
249
|
|
|
216
250
|
All five write into the universal Skill format (`SKILL.md` under each
|
|
217
251
|
known/configured skills root — `~/.claude/skills/`, `~/.codex/skills/`,
|
|
@@ -250,6 +284,10 @@ marked as spawned/background processes, so they cannot start their own shadow
|
|
|
250
284
|
daemon even if a CLI drops the no-embeddings env. Idempotent through
|
|
251
285
|
`events.kind='shadow_review_pass'`.
|
|
252
286
|
|
|
287
|
+
Before writing memory, the observer now checks existing lessons/skills and
|
|
288
|
+
prefers patching broad skills. Shadow-origin `lesson_append` is a compact
|
|
289
|
+
fallback only: oversized bodies and near-duplicate slugs are rejected.
|
|
290
|
+
|
|
253
291
|
#### 3. Extract daemon
|
|
254
292
|
|
|
255
293
|
Every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds (default off, 600 =
|
|
@@ -298,9 +336,57 @@ CONSOLIDATE / PRUNE recommendations. Pinned and foreground-authored
|
|
|
298
336
|
entries are marked `[PROTECTED]` in the inventory so the curator
|
|
299
337
|
never proposes destructive changes against them.
|
|
300
338
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
339
|
+
Curator itself stays advisory-only by default. The existing Evolve applier is
|
|
340
|
+
the apply worker: on its next pass it first looks for the latest complete
|
|
341
|
+
Curator report (`CURATOR_PASS_COMPLETE`) that has not been marked applied, then
|
|
342
|
+
spawns an `evolve_applier` child to apply only safe, still-current memory
|
|
343
|
+
maintenance through `lesson_append` / `lesson_remove` / `skill_manage`. It never
|
|
344
|
+
touches `[PROTECTED]`, foreground/user, pinned, or validated entries. Only after
|
|
345
|
+
the child finishes does it call `evolve_mark_curator_report_applied(...)`, which
|
|
346
|
+
prevents replaying the same report.
|
|
347
|
+
|
|
348
|
+
#### 6. Evolve applier — code evolution + curator report apply
|
|
349
|
+
|
|
350
|
+
The brief format is not fixed: any session can file a change to it with
|
|
351
|
+
`evolve_format(suggestion, rationale)`. The `evolve_reviewer` daemon triages
|
|
352
|
+
the queue and **promotes** the good ones — promoted suggestions surface in the
|
|
353
|
+
brief with a ★. Until now that's where it stopped: a human had to hand-edit
|
|
354
|
+
`render_brief` in `brief.py`.
|
|
355
|
+
|
|
356
|
+
`evolve_apply(evolve_id)` closes the loop. It spawns an `evolve_applier` child
|
|
357
|
+
(resolved through the normal spawn role/model config — recommend opus, it
|
|
358
|
+
writes code) that:
|
|
359
|
+
|
|
360
|
+
1. edits `render_brief()` to implement the suggestion;
|
|
361
|
+
2. adds/extends a **golden brief test** asserting both that the new
|
|
362
|
+
behavior/field appears *and* that the existing brief sections still render —
|
|
363
|
+
a format change can't silently break the brief;
|
|
364
|
+
3. runs the full suite (`.venv/bin/python -m pytest -q`) until green;
|
|
365
|
+
4. opens a **pull request** on a feature branch via `gh`, body quoting the
|
|
366
|
+
suggestion + rationale. The generated commit and PR title use the repo's
|
|
367
|
+
allowed Conventional Commit types (`feat:`/`fix:` etc.), never the internal
|
|
368
|
+
`evolve:` label.
|
|
369
|
+
|
|
370
|
+
**Autonomy is the PR gate, nothing more.** The child never pushes or commits to
|
|
371
|
+
`main` (which has branch protection); a human reviews and merges. On a
|
|
372
|
+
successful PR the child calls `evolve_mark_applied(evolve_id, pr_url)`, which
|
|
373
|
+
sets `applied=1` so the suggestion stops resurfacing. Validation inside the
|
|
374
|
+
child (golden render_brief test + full suite green) is the objective gate the
|
|
375
|
+
loop otherwise lacks.
|
|
376
|
+
|
|
377
|
+
The same applier role also drains Curator reports. `evolve_apply_curator_report`
|
|
378
|
+
manually applies the latest complete report, or a specific report path. This
|
|
379
|
+
path does **not** edit code or open a PR; it uses memory MCP tools only and
|
|
380
|
+
marks the report applied with `evolve_mark_curator_report_applied(...)`.
|
|
381
|
+
|
|
382
|
+
Manual: `evolve_apply(#id)` (get ids from `evolve_review()`). Optional daemon:
|
|
383
|
+
set `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S>0` (default 0 = off) to periodically
|
|
384
|
+
apply the latest complete Curator report first, then implement the oldest
|
|
385
|
+
promoted+unapplied suggestion. Pin the agent/model with
|
|
386
|
+
`THREADKEEPER_SPAWN__LOOP__EVOLVE_APPLIER` /
|
|
387
|
+
`THREADKEEPER_SPAWN__MODEL__EVOLVE_APPLIER`. Single-flight (one applier child at
|
|
388
|
+
a time, enforced by a short dispatch file lock plus running-task detection)
|
|
389
|
+
keeps code edits and memory maintenance from colliding.
|
|
304
390
|
|
|
305
391
|
#### Honest take
|
|
306
392
|
|
|
@@ -389,7 +475,10 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
389
475
|
| `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
|
|
390
476
|
| `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
|
|
391
477
|
| `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
|
|
478
|
+
| `THREADKEEPER_PROBE_INTERVAL_S` | 0 (off) | probe daemon tick (s); 1800 = 30 min recommended so finished probe answers are graded promptly |
|
|
479
|
+
| `THREADKEEPER_PROBE_COOLDOWN_S` | 604800 | per-category probe cooldown; 86400 = 1d recommended for active reliability tracking |
|
|
392
480
|
| `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
|
|
481
|
+
| `THREADKEEPER_MENUBAR_AUTO_LAUNCH` | true | macOS: auto install/launch status menu-bar app on MCP startup |
|
|
393
482
|
| `THREADKEEPER_MEMORY_GUARD_POLL_S` | 30 | server RSS guard tick (s); 0 disables |
|
|
394
483
|
| `THREADKEEPER_MEMORY_GUARD_WARN_MB` | 1536 | notify/log when a server crosses this RSS |
|
|
395
484
|
| `THREADKEEPER_MEMORY_GUARD_KILL_MB` | 3072 | SIGTERM server above this RSS; 0 disables killing |
|
|
@@ -406,9 +495,17 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
406
495
|
| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
|
|
407
496
|
| `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
|
|
408
497
|
| `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
498
|
+
| `THREADKEEPER_DIALECTIC_MINE_INTERVAL_S` | 0 (off) | dialectic_miner daemon tick (s); 0 disables mechanical observation capture |
|
|
499
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_INTERVAL_S` | 0 (off) | dialectic_validator daemon tick (s); 0 disables LLM-driven claim synthesis |
|
|
500
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_MIN` | 5 | min buffered observations before validator engages |
|
|
501
|
+
| `THREADKEEPER_DIALECTIC_VALIDATE_BATCH_SIZE` | 50 | max observations sent to one validator child; prevents oversized prompts and drains large queues incrementally |
|
|
502
|
+
| `THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S` | 0 (off) | evolve-reviewer daemon tick (s); triages the format-evolution queue (promote/dismiss) |
|
|
503
|
+
| `THREADKEEPER_EVOLVE_APPLY_INTERVAL_S` | 0 (off) | evolve-applier daemon tick (s); applies latest complete Curator report first, then oldest promoted+unapplied suggestion behind a PR. Manual `evolve_apply` / `evolve_apply_curator_report` work regardless |
|
|
504
|
+
| `THREADKEEPER_DIALECTIC_MAX_NEW_CLAIMS` | 3 | max new dialectic claims the validator may create per pass |
|
|
505
|
+
|
|
506
|
+
Persist them in `~/.threadkeeper/.env` (copy from `.env.example`) — one file,
|
|
507
|
+
read via pydantic-settings; real environment variables still override it.
|
|
508
|
+
Hot-config reload is
|
|
412
509
|
[tracked](https://github.com/po4erk91/thread-keeper/issues/2).
|
|
413
510
|
|
|
414
511
|
### Per-loop agent dispatch
|
|
@@ -419,36 +516,27 @@ Codex spawn, etc. Detection: process-tree walk at startup, cached for
|
|
|
419
516
|
the server lifetime. The MCP tool `spawn_status()` shows the live
|
|
420
517
|
resolution table.
|
|
421
518
|
|
|
422
|
-
Override per role
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
[default]
|
|
426
|
-
agent = "auto" # "auto" = use active CLI (default)
|
|
427
|
-
|
|
428
|
-
[loops]
|
|
429
|
-
# Force specific roles to specific CLIs regardless of active host
|
|
430
|
-
shadow_observer = "claude" # heaviest reasoning → keep on Claude
|
|
431
|
-
curator = "codex" # weekly audit → Codex is fine
|
|
432
|
-
candidate_reviewer = "auto" # follow active CLI
|
|
433
|
-
archivist = "claude" # close_thread auto-review
|
|
434
|
-
extract = "auto" # this one is local (no spawn)
|
|
435
|
-
|
|
436
|
-
[models]
|
|
437
|
-
# Optional per-CLI model pin — overrides each CLI's own default
|
|
438
|
-
claude = "opus"
|
|
439
|
-
codex = "gpt-5.4"
|
|
440
|
-
gemini = "gemini-2.5-pro"
|
|
441
|
-
```
|
|
442
|
-
|
|
443
|
-
Or via env (highest priority, overrides the TOML):
|
|
519
|
+
Override per role in `~/.threadkeeper/.env` (there is no longer a `spawn.toml` —
|
|
520
|
+
all config lives in the one `.env`). Spawn routing uses nested `__` keys; dict
|
|
521
|
+
keys are lowercased:
|
|
444
522
|
|
|
445
523
|
```bash
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
524
|
+
# default agent for roles with no explicit pin ("" / unset = use the active CLI)
|
|
525
|
+
THREADKEEPER_SPAWN__DEFAULT=claude
|
|
526
|
+
# per-role CLI: THREADKEEPER_SPAWN__LOOP__<ROLE>=<cli>
|
|
527
|
+
THREADKEEPER_SPAWN__LOOP__SHADOW_OBSERVER=claude # heaviest reasoning → keep on Claude
|
|
528
|
+
THREADKEEPER_SPAWN__LOOP__CURATOR=codex # weekly audit → Codex is fine
|
|
529
|
+
THREADKEEPER_SPAWN__LOOP__CANDIDATE_REVIEWER=auto # "auto" = follow active CLI
|
|
530
|
+
# model pin per CLI or per role: THREADKEEPER_SPAWN__MODEL__<KEY>=<model>
|
|
531
|
+
THREADKEEPER_SPAWN__MODEL__CLAUDE=opus
|
|
532
|
+
THREADKEEPER_SPAWN__MODEL__DIALECTIC_VALIDATOR=opus
|
|
450
533
|
```
|
|
451
534
|
|
|
535
|
+
Resolution per role: `SPAWN__LOOP__<role>` → `SPAWN__DEFAULT` → active CLI →
|
|
536
|
+
`claude`; `"auto"` (or unset) defers to the active CLI. Real environment
|
|
537
|
+
variables override the `.env`. Force host detection with
|
|
538
|
+
`THREADKEEPER_ACTIVE_CLI=claude`. See `.env.example` for the full knob list.
|
|
539
|
+
|
|
452
540
|
Adapters without headless support (Claude Desktop, VS Code) can't be
|
|
453
541
|
spawn targets — `spawn_status()` reports them as "no adapter" and any
|
|
454
542
|
override pointing at them falls back to the next priority level.
|
|
@@ -480,6 +568,29 @@ them with `dry_run=False` to apply:
|
|
|
480
568
|
|
|
481
569
|
---
|
|
482
570
|
|
|
571
|
+
## Telemetry
|
|
572
|
+
|
|
573
|
+
- **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
|
|
574
|
+
system, read-only. Three sections: **stores** (threads by state,
|
|
575
|
+
notes/dialog/distill/concepts counts, skills + claims by tier,
|
|
576
|
+
extract-candidate and evolve queues, probe/task counts), **loops**
|
|
577
|
+
(how many times each autonomous daemon fired in the window vs 30 days,
|
|
578
|
+
plus last-fire age), and **outcomes** (what those loops actually
|
|
579
|
+
produced — skills materialized, tier promotions, candidate
|
|
580
|
+
accept-vs-reject rate). Surfaces the gaps the point-tools can't:
|
|
581
|
+
a loop firing constantly while its outcomes stay flat, or a queue
|
|
582
|
+
backing up. Complements the per-loop `*_status` tools (`mp_health`,
|
|
583
|
+
`spawn_budget_status`, `shadow_review_status`).
|
|
584
|
+
- **`agent_status(json_output=False, refresh=True)`** — autonomous learning
|
|
585
|
+
loop status, shaped for UI clients. Shows every loop's enabled/running/ready
|
|
586
|
+
state, last pass, backlog, and active spawned-child RSS; running child agents
|
|
587
|
+
are included as detail rows in the JSON. The JSON also includes
|
|
588
|
+
`recent_results` for useful completed loop tasks, which the macOS menu-bar app
|
|
589
|
+
uses for notifications. The `tk-agent-status` console command and macOS
|
|
590
|
+
menu-bar app use the same underlying snapshot.
|
|
591
|
+
|
|
592
|
+
---
|
|
593
|
+
|
|
483
594
|
## Storage
|
|
484
595
|
|
|
485
596
|
`~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "threadkeeper"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.9.0"
|
|
8
8
|
description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "thread-keeper contributors" }]
|
|
@@ -27,6 +27,8 @@ classifiers = [
|
|
|
27
27
|
]
|
|
28
28
|
dependencies = [
|
|
29
29
|
"mcp>=1.0.0",
|
|
30
|
+
"pydantic>=2",
|
|
31
|
+
"pydantic-settings>=2",
|
|
30
32
|
]
|
|
31
33
|
|
|
32
34
|
[project.optional-dependencies]
|
|
@@ -69,6 +71,8 @@ thread-keeper-setup = "threadkeeper._setup:main"
|
|
|
69
71
|
# Recompute stored embeddings with the active backend (e.g. after switching to
|
|
70
72
|
# the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
|
|
71
73
|
tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
|
|
74
|
+
# JSON/text status feed for menu-bar widgets and terminal monitors.
|
|
75
|
+
tk-agent-status = "threadkeeper.agent_status:main"
|
|
72
76
|
|
|
73
77
|
[tool.setuptools.packages.find]
|
|
74
78
|
include = ["threadkeeper*"]
|
|
@@ -324,6 +324,10 @@ def test_codex_register_mcp_writes_toml(tmp_path, monkeypatch):
|
|
|
324
324
|
assert '"/opt/python"' in body
|
|
325
325
|
assert "[mcp_servers.thread-keeper.env]" in body
|
|
326
326
|
assert '"/repo"' in body
|
|
327
|
+
assert "[mcp_servers.thread-keeper.tools.dialectic_claim]" in body
|
|
328
|
+
assert "[mcp_servers.thread-keeper.tools.dialectic_observation_resolve]" in body
|
|
329
|
+
assert "[mcp_servers.thread-keeper.tools.accept_candidate]" in body
|
|
330
|
+
assert 'approval_mode = "approve"' in body
|
|
327
331
|
|
|
328
332
|
|
|
329
333
|
def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
|
|
@@ -350,6 +354,63 @@ def test_codex_iter_messages_filters_developer_turns(tmp_path, monkeypatch):
|
|
|
350
354
|
assert msgs[1].content == "hello"
|
|
351
355
|
|
|
352
356
|
|
|
357
|
+
def test_codex_iter_messages_uses_forced_child_cid_from_spawn_preamble(
|
|
358
|
+
tmp_path, monkeypatch,
|
|
359
|
+
):
|
|
360
|
+
pkg = _bootstrap(tmp_path, monkeypatch)
|
|
361
|
+
fp = tmp_path / "rollout-2026-06-11T10-00-00.jsonl"
|
|
362
|
+
forced_cid = "af389b3f-8e17-46b5-87f1-402769a74e58"
|
|
363
|
+
fp.write_text("\n".join([
|
|
364
|
+
json.dumps({
|
|
365
|
+
"timestamp": "2026-06-11T10:00:00Z",
|
|
366
|
+
"type": "session_meta",
|
|
367
|
+
"payload": {"id": "019eb5d0-6753-7c31-bce6-b887761090c6", "cwd": "/x"},
|
|
368
|
+
}),
|
|
369
|
+
json.dumps({
|
|
370
|
+
"timestamp": "2026-06-11T10:00:01Z",
|
|
371
|
+
"type": "response_item",
|
|
372
|
+
"payload": {
|
|
373
|
+
"type": "message",
|
|
374
|
+
"role": "user",
|
|
375
|
+
"id": "u-agents",
|
|
376
|
+
"content": [{"type": "input_text", "text": "# AGENTS.md instructions"}],
|
|
377
|
+
},
|
|
378
|
+
}),
|
|
379
|
+
json.dumps({
|
|
380
|
+
"timestamp": "2026-06-11T10:00:02Z",
|
|
381
|
+
"type": "response_item",
|
|
382
|
+
"payload": {
|
|
383
|
+
"type": "message",
|
|
384
|
+
"role": "user",
|
|
385
|
+
"id": "u-spawn",
|
|
386
|
+
"content": [{
|
|
387
|
+
"type": "input_text",
|
|
388
|
+
"text": (
|
|
389
|
+
"You were spawned in the background by parent conversation "
|
|
390
|
+
"8877cab4-1f45-4d05-9a1c-09c6ab28adf1. "
|
|
391
|
+
f"Your own cid is {forced_cid} (forced via --session-id "
|
|
392
|
+
"and THREADKEEPER_FORCE_CID env)."
|
|
393
|
+
),
|
|
394
|
+
}],
|
|
395
|
+
},
|
|
396
|
+
}),
|
|
397
|
+
json.dumps({
|
|
398
|
+
"timestamp": "2026-06-11T10:00:03Z",
|
|
399
|
+
"type": "response_item",
|
|
400
|
+
"payload": {
|
|
401
|
+
"type": "message",
|
|
402
|
+
"role": "assistant",
|
|
403
|
+
"id": "a-1",
|
|
404
|
+
"content": [{"type": "output_text", "text": "processed"}],
|
|
405
|
+
},
|
|
406
|
+
}),
|
|
407
|
+
]) + "\n")
|
|
408
|
+
|
|
409
|
+
msgs = list(pkg["codex"].iter_messages(fp))
|
|
410
|
+
assert [m.uuid for m in msgs] == ["u-agents", "u-spawn", "a-1"]
|
|
411
|
+
assert {m.session_id for m in msgs} == {forced_cid}
|
|
412
|
+
|
|
413
|
+
|
|
353
414
|
# ---------------------------------------------------------------------
|
|
354
415
|
# Gemini
|
|
355
416
|
# ---------------------------------------------------------------------
|