threadkeeper 0.7.0__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {threadkeeper-0.7.0/threadkeeper.egg-info → threadkeeper-0.8.1}/PKG-INFO +22 -4
  2. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/README.md +21 -3
  3. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/pyproject.toml +1 -1
  4. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_curator.py +82 -0
  5. threadkeeper-0.8.1/tests/test_dashboard.py +123 -0
  6. threadkeeper-0.8.1/tests/test_evolve_daemon.py +187 -0
  7. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_extract_daemon.py +55 -0
  8. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_nudges.py +13 -0
  9. threadkeeper-0.8.1/tests/test_panel.py +188 -0
  10. threadkeeper-0.8.1/tests/test_probe_daemon.py +211 -0
  11. threadkeeper-0.8.1/tests/test_search_fts_punctuation.py +67 -0
  12. threadkeeper-0.8.1/tests/test_skill_passive_tier.py +117 -0
  13. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_skills.py +16 -0
  14. threadkeeper-0.8.1/tests/test_spawn_reap.py +80 -0
  15. threadkeeper-0.8.1/tests/test_thread_janitor.py +180 -0
  16. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/brief.py +21 -7
  17. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/config.py +79 -0
  18. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/curator.py +61 -1
  19. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/db.py +9 -0
  20. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/embeddings.py +5 -1
  21. threadkeeper-0.8.1/threadkeeper/evolve_daemon.py +233 -0
  22. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/helpers.py +21 -0
  23. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/identity.py +15 -0
  24. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/ingest.py +68 -35
  25. threadkeeper-0.8.1/threadkeeper/probe_daemon.py +276 -0
  26. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/server.py +2 -0
  27. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/shadow_review.py +2 -0
  28. threadkeeper-0.8.1/threadkeeper/thread_janitor.py +137 -0
  29. threadkeeper-0.8.1/threadkeeper/tools/dashboard.py +220 -0
  30. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/dialectic.py +20 -2
  31. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/extract.py +11 -0
  32. threadkeeper-0.8.1/threadkeeper/tools/panel.py +195 -0
  33. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/spawn.py +59 -5
  34. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/threads.py +57 -5
  35. {threadkeeper-0.7.0 → threadkeeper-0.8.1/threadkeeper.egg-info}/PKG-INFO +22 -4
  36. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/SOURCES.txt +13 -0
  37. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/LICENSE +0 -0
  38. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/setup.cfg +0 -0
  39. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_adapters.py +0 -0
  40. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_brief_sections.py +0 -0
  41. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_candidate_reviewer.py +0 -0
  42. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_core_memory.py +0 -0
  43. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_delegated_search.py +0 -0
  44. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_dialectic.py +0 -0
  45. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_dialectic_tier.py +0 -0
  46. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_error_paths.py +0 -0
  47. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_i18n_multilang.py +0 -0
  48. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_identity.py +0 -0
  49. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_lessons.py +0 -0
  50. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_memory_guard.py +0 -0
  51. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_missed_spawns.py +0 -0
  52. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_onnx_embeddings.py +0 -0
  53. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_process_health.py +0 -0
  54. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_shadow_review.py +0 -0
  55. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_skill_hint.py +0 -0
  56. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_skill_tier.py +0 -0
  57. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_skill_use_parser.py +0 -0
  58. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_skill_watcher.py +0 -0
  59. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_spawn_budget.py +0 -0
  60. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_spawn_config.py +0 -0
  61. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_spawn_hint.py +0 -0
  62. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_spawn_slim.py +0 -0
  63. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_threads.py +0 -0
  64. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_tools_smoke.py +0 -0
  65. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_validate_threads.py +0 -0
  66. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/tests/test_vec_search.py +0 -0
  67. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/__init__.py +0 -0
  68. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/_mcp.py +0 -0
  69. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/_setup.py +0 -0
  70. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/__init__.py +0 -0
  71. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/_hook_helpers.py +0 -0
  72. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/base.py +0 -0
  73. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/claude_code.py +0 -0
  74. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/claude_desktop.py +0 -0
  75. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/codex.py +0 -0
  76. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/copilot.py +0 -0
  77. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/gemini.py +0 -0
  78. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/adapters/vscode.py +0 -0
  79. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/candidate_reviewer.py +0 -0
  80. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/extract_daemon.py +0 -0
  81. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/i18n.py +0 -0
  82. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/lessons.py +0 -0
  83. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/memory_guard.py +0 -0
  84. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/migrate_embeddings.py +0 -0
  85. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/nudges.py +0 -0
  86. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/process_health.py +0 -0
  87. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/review_prompts.py +0 -0
  88. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/search_proxy.py +0 -0
  89. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/skill_watcher.py +0 -0
  90. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/spawn_budget.py +0 -0
  91. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/spawn_config.py +0 -0
  92. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/__init__.py +0 -0
  93. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/candidate_reviewer.py +0 -0
  94. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/concepts.py +0 -0
  95. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/consolidate.py +0 -0
  96. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/core_memory.py +0 -0
  97. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/correlation.py +0 -0
  98. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/curator.py +0 -0
  99. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/dialog.py +0 -0
  100. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/distill.py +0 -0
  101. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/graph.py +0 -0
  102. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/invariants.py +0 -0
  103. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/lessons.py +0 -0
  104. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/memory_guard.py +0 -0
  105. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/missed_spawns.py +0 -0
  106. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/peers.py +0 -0
  107. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/pickup.py +0 -0
  108. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/probes.py +0 -0
  109. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/process_health.py +0 -0
  110. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/session.py +0 -0
  111. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/shadow_review.py +0 -0
  112. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/skills.py +0 -0
  113. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/style.py +0 -0
  114. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper/tools/validate.py +0 -0
  115. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/dependency_links.txt +0 -0
  116. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/entry_points.txt +0 -0
  117. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/requires.txt +0 -0
  118. {threadkeeper-0.7.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: threadkeeper
3
- Version: 0.7.0
3
+ Version: 0.8.1
4
4
  Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
5
5
  Author: thread-keeper contributors
6
6
  License: MIT
@@ -82,10 +82,12 @@ make it more than a memory store:
82
82
  concurrent sessions signal each other across CLIs. Parent /
83
83
  children / sibling agents become a coordinated swarm, not isolated
84
84
  chats.
85
- - **Self-improving skill library** — five autonomous background loops
85
+ - **Self-improving skill library** — autonomous background loops
86
86
  (auto-review on thread close, shadow-review daemon, extract
87
- harvester, candidate-reviewer, weekly Curator) materialize
88
- class-level skills as the agents work. Adapted to multi-CLI:
87
+ harvester, candidate-reviewer, weekly Curator, and a thread-janitor
88
+ that auto-closes idle threads so abandoned work reaches the harvest
89
+ path — closing is reversible, a note reopens a closed thread)
90
+ materialize class-level skills as the agents work. Adapted to multi-CLI:
89
91
  SKILL.md is the primary write target and gets mirrored to every
90
92
  known/configured skills root simultaneously (`~/.claude/skills/`,
91
93
  `~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
@@ -519,6 +521,22 @@ them with `dry_run=False` to apply:
519
521
 
520
522
  ---
521
523
 
524
+ ## Telemetry
525
+
526
+ - **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
527
+ system, read-only. Three sections: **stores** (threads by state,
528
+ notes/dialog/distill/concepts counts, skills + claims by tier,
529
+ extract-candidate and evolve queues, probe/task counts), **loops**
530
+ (how many times each autonomous daemon fired in the window vs 30 days,
531
+ plus last-fire age), and **outcomes** (what those loops actually
532
+ produced — skills materialized, tier promotions, candidate
533
+ accept-vs-reject rate). Surfaces the gaps the point-tools can't:
534
+ a loop firing constantly while its outcomes stay flat, or a queue
535
+ backing up. Complements the per-loop `*_status` tools (`mp_health`,
536
+ `spawn_budget_status`, `shadow_review_status`).
537
+
538
+ ---
539
+
522
540
  ## Storage
523
541
 
524
542
  `~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
@@ -43,10 +43,12 @@ make it more than a memory store:
43
43
  concurrent sessions signal each other across CLIs. Parent /
44
44
  children / sibling agents become a coordinated swarm, not isolated
45
45
  chats.
46
- - **Self-improving skill library** — five autonomous background loops
46
+ - **Self-improving skill library** — autonomous background loops
47
47
  (auto-review on thread close, shadow-review daemon, extract
48
- harvester, candidate-reviewer, weekly Curator) materialize
49
- class-level skills as the agents work. Adapted to multi-CLI:
48
+ harvester, candidate-reviewer, weekly Curator, and a thread-janitor
49
+ that auto-closes idle threads so abandoned work reaches the harvest
50
+ path — closing is reversible, a note reopens a closed thread)
51
+ materialize class-level skills as the agents work. Adapted to multi-CLI:
50
52
  SKILL.md is the primary write target and gets mirrored to every
51
53
  known/configured skills root simultaneously (`~/.claude/skills/`,
52
54
  `~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
@@ -480,6 +482,22 @@ them with `dry_run=False` to apply:
480
482
 
481
483
  ---
482
484
 
485
+ ## Telemetry
486
+
487
+ - **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
488
+ system, read-only. Three sections: **stores** (threads by state,
489
+ notes/dialog/distill/concepts counts, skills + claims by tier,
490
+ extract-candidate and evolve queues, probe/task counts), **loops**
491
+ (how many times each autonomous daemon fired in the window vs 30 days,
492
+ plus last-fire age), and **outcomes** (what those loops actually
493
+ produced — skills materialized, tier promotions, candidate
494
+ accept-vs-reject rate). Surfaces the gaps the point-tools can't:
495
+ a loop firing constantly while its outcomes stay flat, or a queue
496
+ backing up. Complements the per-loop `*_status` tools (`mp_health`,
497
+ `spawn_budget_status`, `shadow_review_status`).
498
+
499
+ ---
500
+
483
501
  ## Storage
484
502
 
485
503
  `~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "threadkeeper"
7
- version = "0.7.0"
7
+ version = "0.8.1"
8
8
  description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "thread-keeper contributors" }]
@@ -321,3 +321,85 @@ def test_advisory_mode_default_excludes_destructive_tools(
321
321
  assert "lesson_append" not in allowed
322
322
  assert "ADVISORY MODE" in kw["prompt"]
323
323
  assert "DESTRUCTIVE MODE ENABLED" not in kw["prompt"]
324
+
325
+
326
+ # ──────────────────────────────────────────────────────────────────────
327
+ # Concepts review (F1) — curator also audits the concepts store
328
+ # ──────────────────────────────────────────────────────────────────────
329
+
330
+ def _add_concept(conn, cid, desc, confidence="medium",
331
+ registered_at=None, last_evidence_at=None):
332
+ now = int(time.time())
333
+ conn.execute(
334
+ "INSERT INTO concepts (id, description, confidence, registered_at, "
335
+ "last_evidence_at) VALUES (?,?,?,?,?)",
336
+ (cid, desc, confidence, registered_at or now, last_evidence_at),
337
+ )
338
+ conn.commit()
339
+
340
+
341
+ def test_collect_concepts_empty(tmp_path, monkeypatch):
342
+ pkg = _bootstrap(tmp_path, monkeypatch)
343
+ conn = pkg["db"].get_db()
344
+ text, n = pkg["curator"]._collect_concepts(conn)
345
+ assert n == 0
346
+ assert text == ""
347
+
348
+
349
+ def test_collect_concepts_lists_with_age(tmp_path, monkeypatch):
350
+ pkg = _bootstrap(tmp_path, monkeypatch)
351
+ conn = pkg["db"].get_db()
352
+ now = int(time.time())
353
+ _add_concept(conn, "Cfresh", "fresh high-conf idea",
354
+ confidence="high", last_evidence_at=now - 86400) # 1d
355
+ _add_concept(conn, "Cstale", "stale low-conf idea",
356
+ confidence="low",
357
+ registered_at=now - 40 * 86400,
358
+ last_evidence_at=None) # never corroborated, 40d old
359
+ text, n = pkg["curator"]._collect_concepts(conn)
360
+ assert n == 2
361
+ assert "Cfresh" in text and "Cstale" in text
362
+ assert "CONCEPTS (n=2)" in text
363
+ # stale concept (no last_evidence, registered 40d ago) shows ~40d age
364
+ assert "40d_ago" in text
365
+ # oldest-first ordering: stale concept appears before fresh one
366
+ assert text.index("Cstale") < text.index("Cfresh")
367
+
368
+
369
+ def test_run_curator_pass_includes_concepts_in_inventory(
370
+ tmp_path, monkeypatch,
371
+ ):
372
+ pkg = _bootstrap(tmp_path, monkeypatch, min_lessons="2")
373
+ pkg["lessons"].append_lesson(title="a", body="b1", source="shadow")
374
+ pkg["lessons"].append_lesson(title="b", body="b2", source="shadow")
375
+ conn = pkg["db"].get_db()
376
+ _add_concept(conn, "Cabc", "asymmetric in-band reactivity",
377
+ confidence="high")
378
+
379
+ import threadkeeper.tools.spawn as spawn_mod
380
+ captured: list[dict] = []
381
+ monkeypatch.setattr(
382
+ spawn_mod, "spawn",
383
+ lambda **kw: captured.append(kw) or "spawn task_id=fake pid=0",
384
+ )
385
+ pkg["curator"].run_curator_pass(force=True)
386
+ prompt = captured[0]["prompt"]
387
+ assert "CONCEPTS (n=1)" in prompt
388
+ assert "Cabc" in prompt
389
+ assert "asymmetric in-band reactivity" in prompt
390
+
391
+
392
+ def test_concepts_alone_do_not_trigger_pass(tmp_path, monkeypatch):
393
+ """Concepts enrich the review but don't lower the lesson threshold —
394
+ a pass still requires CURATOR_MIN_LESSONS lessons."""
395
+ pkg = _bootstrap(tmp_path, monkeypatch, min_lessons="3")
396
+ conn = pkg["db"].get_db()
397
+ _add_concept(conn, "Conly", "a lone concept", confidence="high")
398
+
399
+ import threadkeeper.tools.spawn as spawn_mod
400
+ called = []
401
+ monkeypatch.setattr(spawn_mod, "spawn",
402
+ lambda **kw: called.append(kw) or "x")
403
+ out = pkg["curator"].run_curator_pass(force=True)
404
+ assert out.startswith("below_threshold")
405
+ assert called == []
@@ -0,0 +1,123 @@
1
+ """mp_dashboard — aggregate rollup tool.
2
+
3
+ Verifies the tool renders all sections, counts seeded stores, reflects
4
+ loop-pass + outcome events, and degrades without crashing on an empty DB.
5
+
6
+ NOTE on isolation: assertions are DELTA-based, never absolute counts. The
7
+ suite's `test_tools_smoke.py` does a `del sys.modules` + package re-import
8
+ + every-tool invocation at COLLECTION time in the parent process, which
9
+ `os.environ.setdefault`-pins a DB path and seeds rows. So "exactly N
10
+ threads" is not guaranteed across the full suite even with `fresh_mp`'s
11
+ tmp DB — we assert that the dashboard reflects the rows THIS test adds
12
+ (before/after delta), which is the real contract anyway.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ import time
18
+
19
+
20
+ def _tool(pkg, name):
21
+ return pkg["mcp"]._tool_manager._tools[name].fn
22
+
23
+
24
+ def _count(out: str, key: str) -> int:
25
+ """Pull `<key>=N` from the dashboard text. Absence means zero: when a
26
+ store is empty the grouped `threads:` line collapses to `threads: 0`
27
+ (no `active=` token), and that genuinely means 0 active threads — so a
28
+ missing key reads as 0, which keeps before/after deltas correct."""
29
+ m = re.search(rf"\b{re.escape(key)}=(\d+)", out)
30
+ return int(m.group(1)) if m else 0
31
+
32
+
33
+ def _active_count(out: str) -> int:
34
+ return _count(out, "active")
35
+
36
+
37
+ def _notes_count(out: str) -> int:
38
+ return _count(out, "notes")
39
+
40
+
41
+ def _concepts_count(out: str) -> int:
42
+ return _count(out, "concepts")
43
+
44
+
45
+ def test_dashboard_registered(fresh_mp):
46
+ assert "mp_dashboard" in fresh_mp["mcp"]._tool_manager._tools
47
+
48
+
49
+ def test_dashboard_empty_db_no_crash(fresh_mp):
50
+ out = _tool(fresh_mp, "mp_dashboard")()
51
+ for section in ("dashboard", "stores", "loops", "outcomes", "reliability"):
52
+ assert section in out, (section, out)
53
+
54
+
55
+ def test_dashboard_counts_stores_delta(fresh_mp):
56
+ dash = _tool(fresh_mp, "mp_dashboard")
57
+ before = dash()
58
+ a0, n0, c0 = (_active_count(before), _notes_count(before),
59
+ _concepts_count(before))
60
+
61
+ open_thread = _tool(fresh_mp, "open_thread")
62
+ note = _tool(fresh_mp, "note")
63
+ t1 = open_thread(question="alpha")
64
+ open_thread(question="beta")
65
+ note(thread_id=t1, content="a note here", kind="insight")
66
+ note(thread_id=t1, content="another move", kind="move")
67
+ _tool(fresh_mp, "register_concept")(description="a concept by example",
68
+ confidence="low")
69
+
70
+ after = dash()
71
+ assert _active_count(after) - a0 == 2, (a0, after)
72
+ assert _notes_count(after) - n0 == 2, (n0, after)
73
+ assert _concepts_count(after) - c0 == 1, (c0, after)
74
+
75
+
76
+ def _shadow_win(out: str) -> int:
77
+ m = re.search(r"shadow\s+(\d+) / \d+", out)
78
+ return int(m.group(1)) if m else 0
79
+
80
+
81
+ def test_dashboard_reflects_loop_and_outcome_events(fresh_mp):
82
+ # Delta measured THROUGH the tool itself (before vs after), so both reads
83
+ # go through the identical DB-resolution path — immune to whatever DB a
84
+ # contaminated parent env pinned. Insert the loop/outcome events the
85
+ # daemons would write, then confirm the dashboard's own count rises by 3.
86
+ conn = fresh_mp["db"].get_db()
87
+ now = int(time.time())
88
+ before = _shadow_win(_tool(fresh_mp, "mp_dashboard")(window_days=7))
89
+ for _ in range(3):
90
+ conn.execute(
91
+ "INSERT INTO events (session_id, kind, target, summary, created_at) "
92
+ "VALUES ('s', 'shadow_review_pass', ?, '', ?)", (str(now), now))
93
+ conn.execute(
94
+ "INSERT INTO events (session_id, kind, target, summary, created_at) "
95
+ "VALUES ('s', 'skill_materialized', 'Tx', 'path', ?)", (now,))
96
+ conn.commit()
97
+ after_out = _tool(fresh_mp, "mp_dashboard")(window_days=7)
98
+ assert _shadow_win(after_out) - before == 3, (before, after_out)
99
+ assert "skill_materialized" in after_out, after_out
100
+
101
+
102
+ def test_dashboard_accept_rate(fresh_mp):
103
+ conn = fresh_mp["db"].get_db()
104
+ now = int(time.time())
105
+ # Snapshot existing decisions so the ratio assertion is exact regardless
106
+ # of pre-seeded rows.
107
+ acc0 = conn.execute(
108
+ "SELECT COUNT(*) FROM events WHERE kind LIKE 'accept_candidate%'"
109
+ ).fetchone()[0]
110
+ rej0 = conn.execute(
111
+ "SELECT COUNT(*) FROM events WHERE kind='reject_candidate'"
112
+ ).fetchone()[0]
113
+ conn.execute(
114
+ "INSERT INTO events (session_id, kind, target, created_at) "
115
+ "VALUES ('s','accept_candidate:note','1',?)", (now,))
116
+ for _ in range(3):
117
+ conn.execute(
118
+ "INSERT INTO events (session_id, kind, target, created_at) "
119
+ "VALUES ('s','reject_candidate','x',?)", (now,))
120
+ conn.commit()
121
+ out = _tool(fresh_mp, "mp_dashboard")()
122
+ acc, dec = acc0 + 1, acc0 + 1 + rej0 + 3
123
+ assert f"candidate_accept_rate {acc}/{dec}" in out, (acc0, rej0, out)
@@ -0,0 +1,187 @@
1
+ """Evolve reviewer daemon — autonomous triage of the format-evolution queue.
2
+
3
+ The daemon never APPLIES a suggestion (that edits format/code). It spawns a
4
+ child that calls evolve_decide(promote|dismiss) to keep the queue honest.
5
+ Tests exercise the pure logic + dispatch with spawn monkeypatched; no real
6
+ child is launched.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ import time
12
+ from pathlib import Path
13
+
14
+
15
+ _FAKE_CID = "dddd4444-5555-6666-7777-888899990000"
16
+
17
+
18
+ def _bootstrap(tmp_path, monkeypatch, interval="0", review_min="2"):
19
+ env = {
20
+ "THREADKEEPER_DB": str(tmp_path / "db.sqlite"),
21
+ "CLAUDE_PROJECTS_DIR": str(tmp_path / "fake_claude_projects"),
22
+ "THREADKEEPER_INGEST_INTERVAL_S": "0",
23
+ "THREADKEEPER_INGEST_CAP": "0",
24
+ "THREADKEEPER_SKILL_WATCH_INTERVAL_S": "0",
25
+ "THREADKEEPER_SPAWN_BUDGET_POLL_S": "0",
26
+ "THREADKEEPER_SEARCH_PROXY_POLL_S": "0",
27
+ "THREADKEEPER_MEMORY_GUARD_POLL_S": "0",
28
+ "THREADKEEPER_SHADOW_REVIEW_INTERVAL_S": "0",
29
+ "THREADKEEPER_CURATOR_INTERVAL_S": "0",
30
+ "THREADKEEPER_EXTRACT_INTERVAL_S": "0",
31
+ "THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S": "0",
32
+ "THREADKEEPER_PROBE_INTERVAL_S": "0",
33
+ "THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S": interval,
34
+ "THREADKEEPER_EVOLVE_REVIEW_MIN": review_min,
35
+ "THREADKEEPER_TASK_LOG_DIR": str(tmp_path / "tasks"),
36
+ "THREADKEEPER_CLIENT": "pytest",
37
+ "THREADKEEPER_FORCE_CID": _FAKE_CID,
38
+ "THREADKEEPER_NO_EMBEDDINGS": "1",
39
+ }
40
+ for k, v in env.items():
41
+ monkeypatch.setenv(k, v)
42
+ Path(env["CLAUDE_PROJECTS_DIR"]).mkdir(parents=True, exist_ok=True)
43
+ for name in [m for m in list(sys.modules) if m.startswith("threadkeeper")]:
44
+ del sys.modules[name]
45
+ import threadkeeper.server # noqa: F401
46
+ from threadkeeper import _mcp, db, evolve_daemon, identity
47
+ return {"mcp": _mcp.mcp, "db": db, "ed": evolve_daemon, "identity": identity}
48
+
49
+
50
+ def _tool(pkg, name):
51
+ return pkg["mcp"]._tool_manager._tools[name].fn
52
+
53
+
54
+ def _add_evolve(conn, suggestion, rationale=None, applied=0, status="pending"):
55
+ conn.execute(
56
+ "INSERT INTO evolve (suggestion, rationale, applied, status, created_at) "
57
+ "VALUES (?,?,?,?,?)",
58
+ (suggestion, rationale, applied, status, int(time.time())),
59
+ )
60
+ conn.commit()
61
+
62
+
63
+ # ── pending selection ──────────────────────────────────────────────────
64
+
65
+ def test_pending_excludes_applied_and_decided(tmp_path, monkeypatch):
66
+ pkg = _bootstrap(tmp_path, monkeypatch)
67
+ conn = pkg["db"].get_db()
68
+ _add_evolve(conn, "pending one")
69
+ _add_evolve(conn, "already applied", applied=1)
70
+ _add_evolve(conn, "already dismissed", status="dismissed")
71
+ _add_evolve(conn, "already promoted", status="promoted")
72
+ pend = pkg["ed"]._pending(conn)
73
+ sugg = [r["suggestion"] for r in pend]
74
+ assert sugg == ["pending one"]
75
+
76
+
77
+ # ── evolve_decide tool ─────────────────────────────────────────────────
78
+
79
+ def test_evolve_decide_promote(tmp_path, monkeypatch):
80
+ pkg = _bootstrap(tmp_path, monkeypatch)
81
+ conn = pkg["db"].get_db()
82
+ _add_evolve(conn, "make briefs shorter")
83
+ eid = conn.execute("SELECT id FROM evolve").fetchone()["id"]
84
+ out = _tool(pkg, "evolve_decide")(evolve_id=eid, decision="promote",
85
+ reason="clear win")
86
+ assert "status=promoted" in out
87
+ row = conn.execute("SELECT status, review_reason, reviewed_at FROM evolve "
88
+ "WHERE id=?", (eid,)).fetchone()
89
+ assert row["status"] == "promoted"
90
+ assert row["review_reason"] == "clear win"
91
+ assert row["reviewed_at"] is not None
92
+
93
+
94
+ def test_evolve_decide_dismiss_and_bad_inputs(tmp_path, monkeypatch):
95
+ pkg = _bootstrap(tmp_path, monkeypatch)
96
+ conn = pkg["db"].get_db()
97
+ _add_evolve(conn, "dup suggestion")
98
+ eid = conn.execute("SELECT id FROM evolve").fetchone()["id"]
99
+ assert "status=dismissed" in _tool(pkg, "evolve_decide")(
100
+ evolve_id=eid, decision="dismiss", reason="duplicate of #1")
101
+ assert _tool(pkg, "evolve_decide")(
102
+ evolve_id=eid, decision="banana").startswith("ERR bad_decision")
103
+ assert _tool(pkg, "evolve_decide")(
104
+ evolve_id=9999, decision="promote").startswith("ERR evolve_not_found")
105
+
106
+
107
+ # ── run_evolve_pass dispatch ────────────────────────────────────────────
108
+
109
+ def test_run_evolve_pass_disabled(tmp_path, monkeypatch):
110
+ pkg = _bootstrap(tmp_path, monkeypatch)
111
+ assert pkg["ed"].run_evolve_pass() == "disabled"
112
+
113
+
114
+ def test_run_evolve_pass_no_pending(tmp_path, monkeypatch):
115
+ pkg = _bootstrap(tmp_path, monkeypatch)
116
+ assert pkg["ed"].run_evolve_pass(force=True) == "no_pending"
117
+
118
+
119
+ def test_run_evolve_pass_below_min(tmp_path, monkeypatch):
120
+ pkg = _bootstrap(tmp_path, monkeypatch, review_min="2")
121
+ conn = pkg["db"].get_db()
122
+ _add_evolve(conn, "only one")
123
+ assert pkg["ed"].run_evolve_pass(force=True) == "below_min n=1"
124
+
125
+
126
+ def test_run_evolve_pass_spawns_reviewer(tmp_path, monkeypatch):
127
+ pkg = _bootstrap(tmp_path, monkeypatch, review_min="2")
128
+ conn = pkg["db"].get_db()
129
+ _add_evolve(conn, "suggestion alpha", rationale="friction A")
130
+ _add_evolve(conn, "suggestion beta")
131
+ calls = {}
132
+ import threadkeeper.tools.spawn as spawn_mod
133
+ monkeypatch.setattr(spawn_mod, "spawn",
134
+ lambda **kw: calls.update(kw) or "ok task=tk_ev pid=1")
135
+ out = pkg["ed"].run_evolve_pass(force=True)
136
+ assert out.startswith("spawned n=2")
137
+ # both suggestions reached the child prompt
138
+ assert "suggestion alpha" in calls["prompt"]
139
+ assert "suggestion beta" in calls["prompt"]
140
+ assert "friction A" in calls["prompt"]
141
+ assert calls["write_origin"] == "evolve"
142
+ assert calls["role"] == "evolve_reviewer"
143
+ # narrow tool surface: triage only, never applies
144
+ assert "evolve_decide" in calls["extra_allowed_tools"]
145
+ assert "skill_manage" not in calls["extra_allowed_tools"]
146
+ assert pkg["ed"]._last_evolve_ts(conn) > 0
147
+
148
+
149
+ def test_run_evolve_pass_single_flight(tmp_path, monkeypatch):
150
+ pkg = _bootstrap(tmp_path, monkeypatch, review_min="1")
151
+ conn = pkg["db"].get_db()
152
+ _add_evolve(conn, "s1")
153
+ import os
154
+ conn.execute(
155
+ "INSERT INTO tasks (id, pid, cwd, prompt, started_at) "
156
+ "VALUES (?,?,?,?,?)",
157
+ ("tk_evr", os.getpid(), "/tmp",
158
+ "You are an EVOLVE REVIEWER triaging the queue.", int(time.time())),
159
+ )
160
+ conn.commit()
161
+
162
+ def _boom(**kw):
163
+ raise AssertionError("must not spawn while a reviewer runs")
164
+ import threadkeeper.tools.spawn as spawn_mod
165
+ monkeypatch.setattr(spawn_mod, "spawn", _boom)
166
+ assert "reviewer_running" in pkg["ed"].run_evolve_pass(force=True)
167
+
168
+
169
+ # ── brief surfaces promoted ★ first, drops dismissed ───────────────────
170
+
171
+ def test_brief_evolve_promoted_marked_dismissed_hidden(tmp_path, monkeypatch):
172
+ pkg = _bootstrap(tmp_path, monkeypatch)
173
+ conn = pkg["db"].get_db()
174
+ _add_evolve(conn, "promoted one", status="promoted")
175
+ _add_evolve(conn, "pending one", status="pending")
176
+ _add_evolve(conn, "dismissed one", status="dismissed")
177
+ from threadkeeper.brief import render_brief
178
+ text = render_brief(conn)
179
+ # suggestion text is wrapped by q(); assert on the ★ marker + substring
180
+ assert "★" in text
181
+ assert "promoted one" in text
182
+ assert "pending one" in text
183
+ assert "dismissed one" not in text
184
+ # the ★ marker attaches to the promoted suggestion, not the pending one
185
+ assert text.index("★") < text.index("promoted one")
186
+ # promoted sorts before pending
187
+ assert text.index("promoted one") < text.index("pending one")
@@ -347,6 +347,61 @@ def test_extract_filters_shadow_observer_sessions(tmp_path, monkeypatch):
347
347
  assert not any(r["source_cid"] == "shadow-sess" for r in rows)
348
348
 
349
349
 
350
+ def test_extract_filters_spawned_child_sessions(tmp_path, monkeypatch):
351
+ """A session whose cid is a tasks.spawned_cid is one of OUR spawned
352
+ children (curator, panel voter, ad-hoc research agent, ...). Its dialog
353
+ is system-injected task framing + work artifacts, never user intent —
354
+ exclude it wholesale, regardless of how its prompt opens. This catches
355
+ the noise the prompt-prefix list misses: real rejects included children
356
+ opening with 'You are auditing…', 'You are analyzing whether…',
357
+ 'Use the Write tool to…' — none matched _INTERNAL_PROMPT_PREFIXES, so
358
+ 66/107 historical rejects were exactly this class."""
359
+ pkg = _bootstrap(tmp_path, monkeypatch)
360
+ conn = pkg["db"].get_db()
361
+ now = int(time.time())
362
+ child_cid = "child-cid-xyz"
363
+ # Register the child in tasks (parent spawned it). Prompt text is
364
+ # deliberately NOT in any prefix list — the link is what identifies it.
365
+ conn.execute(
366
+ "INSERT INTO tasks (id, pid, parent_cid, spawned_cid, cwd, prompt, "
367
+ "started_at) VALUES ('tk_x', 0, 'parent-cid', ?, '/x', "
368
+ "'You are auditing a slice of lessons. Analyze each one.', ?)",
369
+ (child_cid, now - 200),
370
+ )
371
+ # The child emits substantive-looking dialog that WOULD trip H1/H2/H3.
372
+ _seed_dialog(
373
+ conn, "user",
374
+ "I want you to record the decision: always reset the network "
375
+ "before WDA start, every single run.",
376
+ now - 90, session_id=child_cid,
377
+ )
378
+ _seed_dialog(
379
+ conn, "assistant",
380
+ "## Findings\n\nWe want the pipeline to always dedup first.\n"
381
+ "Therefore the rule is: dedup before enrich. In conclusion, that "
382
+ "is the durable pattern here for every future run of this job.",
383
+ now - 85, session_id=child_cid,
384
+ )
385
+ # A genuine foreground user session — must still be picked up.
386
+ _seed_dialog(
387
+ conn, "user",
388
+ "I want you to record decision notes automatically without "
389
+ "waiting for the agent to remember each time.",
390
+ now - 60, session_id="real-sess",
391
+ )
392
+ conn.commit()
393
+
394
+ out = pkg["extract_daemon"].run_extract_pass(force=True)
395
+ assert "ok" in out
396
+ rows = conn.execute(
397
+ "SELECT source_cid FROM extract_candidates WHERE status='pending'"
398
+ ).fetchall()
399
+ assert any(r["source_cid"] == "real-sess" for r in rows), \
400
+ "real user session should still yield candidates"
401
+ assert not any(r["source_cid"] == child_cid for r in rows), \
402
+ "spawned-child session must be fully excluded"
403
+
404
+
350
405
  # ──────────────────────────────────────────────────────────────────────
351
406
  # Daemon lifecycle
352
407
  # ──────────────────────────────────────────────────────────────────────
@@ -37,6 +37,19 @@ def _bootstrap_with_env(tmp_path, monkeypatch,
37
37
  "CLAUDE_PROJECTS_DIR": str(tmp_path / "fake_claude_projects"),
38
38
  "THREADKEEPER_INGEST_INTERVAL_S": "0",
39
39
  "THREADKEEPER_INGEST_CAP": "0",
40
+ # Zero every background-daemon interval so no daemon thread fires a
41
+ # pass mid-test and emits a counted `spawn` event that races the
42
+ # nudge-counter assertions. Inherited from the real shell env
43
+ # otherwise (a dev box with probe/evolve daemons enabled in
44
+ # settings.json leaks the interval into pytest).
45
+ "THREADKEEPER_PROBE_INTERVAL_S": "0",
46
+ "THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S": "0",
47
+ "THREADKEEPER_SHADOW_REVIEW_INTERVAL_S": "0",
48
+ "THREADKEEPER_CURATOR_INTERVAL_S": "0",
49
+ "THREADKEEPER_EXTRACT_INTERVAL_S": "0",
50
+ "THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S": "0",
51
+ "THREADKEEPER_SPAWN_BUDGET_POLL_S": "0",
52
+ "THREADKEEPER_MEMORY_GUARD_POLL_S": "0",
40
53
  "THREADKEEPER_TASK_LOG_DIR": str(tmp_path / "tasks"),
41
54
  "THREADKEEPER_CLIENT": "pytest",
42
55
  "THREADKEEPER_MEMORY_NUDGE_INTERVAL": str(memory_interval),