threadkeeper 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/PKG-INFO +22 -4
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/README.md +21 -3
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/pyproject.toml +1 -1
- threadkeeper-0.8.1/tests/test_dashboard.py +123 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_extract_daemon.py +55 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skills.py +16 -0
- threadkeeper-0.8.1/tests/test_thread_janitor.py +180 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/config.py +19 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/identity.py +5 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/server.py +1 -0
- threadkeeper-0.8.1/threadkeeper/thread_janitor.py +137 -0
- threadkeeper-0.8.1/threadkeeper/tools/dashboard.py +220 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/extract.py +11 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/threads.py +9 -2
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/PKG-INFO +22 -4
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/SOURCES.txt +4 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/LICENSE +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/setup.cfg +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_adapters.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_brief_sections.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_core_memory.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_delegated_search.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_dialectic.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_dialectic_tier.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_error_paths.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_evolve_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_i18n_multilang.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_identity.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_lessons.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_memory_guard.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_missed_spawns.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_nudges.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_onnx_embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_panel.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_probe_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_search_fts_punctuation.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_shadow_review.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skill_hint.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skill_passive_tier.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skill_tier.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skill_use_parser.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_skill_watcher.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_spawn_budget.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_spawn_config.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_spawn_hint.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_spawn_reap.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_spawn_slim.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_threads.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_tools_smoke.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_validate_threads.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/tests/test_vec_search.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/_mcp.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/_setup.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/_hook_helpers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/base.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/claude_code.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/claude_desktop.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/codex.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/copilot.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/gemini.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/adapters/vscode.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/brief.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/db.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/evolve_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/extract_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/helpers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/i18n.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/ingest.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/lessons.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/memory_guard.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/migrate_embeddings.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/nudges.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/probe_daemon.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/review_prompts.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/search_proxy.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/shadow_review.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/skill_watcher.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/spawn_budget.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/spawn_config.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/__init__.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/candidate_reviewer.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/concepts.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/consolidate.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/core_memory.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/correlation.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/curator.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/dialectic.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/dialog.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/distill.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/graph.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/invariants.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/lessons.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/memory_guard.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/missed_spawns.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/panel.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/peers.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/pickup.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/probes.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/process_health.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/session.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/shadow_review.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/skills.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/spawn.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/style.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper/tools/validate.py +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/dependency_links.txt +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/entry_points.txt +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/requires.txt +0 -0
- {threadkeeper-0.8.0 → threadkeeper-0.8.1}/threadkeeper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: threadkeeper
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
|
|
5
5
|
Author: thread-keeper contributors
|
|
6
6
|
License: MIT
|
|
@@ -82,10 +82,12 @@ make it more than a memory store:
|
|
|
82
82
|
concurrent sessions signal each other across CLIs. Parent /
|
|
83
83
|
children / sibling agents become a coordinated swarm, not isolated
|
|
84
84
|
chats.
|
|
85
|
-
- **Self-improving skill library** —
|
|
85
|
+
- **Self-improving skill library** — autonomous background loops
|
|
86
86
|
(auto-review on thread close, shadow-review daemon, extract
|
|
87
|
-
harvester, candidate-reviewer, weekly Curator
|
|
88
|
-
|
|
87
|
+
harvester, candidate-reviewer, weekly Curator, and a thread-janitor
|
|
88
|
+
that auto-closes idle threads so abandoned work reaches the harvest
|
|
89
|
+
path — closing is reversible, a note reopens a closed thread)
|
|
90
|
+
materialize class-level skills as the agents work. Adapted to multi-CLI:
|
|
89
91
|
SKILL.md is the primary write target and gets mirrored to every
|
|
90
92
|
known/configured skills root simultaneously (`~/.claude/skills/`,
|
|
91
93
|
`~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
|
|
@@ -519,6 +521,22 @@ them with `dry_run=False` to apply:
|
|
|
519
521
|
|
|
520
522
|
---
|
|
521
523
|
|
|
524
|
+
## Telemetry
|
|
525
|
+
|
|
526
|
+
- **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
|
|
527
|
+
system, read-only. Three sections: **stores** (threads by state,
|
|
528
|
+
notes/dialog/distill/concepts counts, skills + claims by tier,
|
|
529
|
+
extract-candidate and evolve queues, probe/task counts), **loops**
|
|
530
|
+
(how many times each autonomous daemon fired in the window vs 30 days,
|
|
531
|
+
plus last-fire age), and **outcomes** (what those loops actually
|
|
532
|
+
produced — skills materialized, tier promotions, candidate
|
|
533
|
+
accept-vs-reject rate). Surfaces the gaps the point-tools can't:
|
|
534
|
+
a loop firing constantly while its outcomes stay flat, or a queue
|
|
535
|
+
backing up. Complements the per-loop `*_status` tools (`mp_health`,
|
|
536
|
+
`spawn_budget_status`, `shadow_review_status`).
|
|
537
|
+
|
|
538
|
+
---
|
|
539
|
+
|
|
522
540
|
## Storage
|
|
523
541
|
|
|
524
542
|
`~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
|
|
@@ -43,10 +43,12 @@ make it more than a memory store:
|
|
|
43
43
|
concurrent sessions signal each other across CLIs. Parent /
|
|
44
44
|
children / sibling agents become a coordinated swarm, not isolated
|
|
45
45
|
chats.
|
|
46
|
-
- **Self-improving skill library** —
|
|
46
|
+
- **Self-improving skill library** — autonomous background loops
|
|
47
47
|
(auto-review on thread close, shadow-review daemon, extract
|
|
48
|
-
harvester, candidate-reviewer, weekly Curator
|
|
49
|
-
|
|
48
|
+
harvester, candidate-reviewer, weekly Curator, and a thread-janitor
|
|
49
|
+
that auto-closes idle threads so abandoned work reaches the harvest
|
|
50
|
+
path — closing is reversible, a note reopens a closed thread)
|
|
51
|
+
materialize class-level skills as the agents work. Adapted to multi-CLI:
|
|
50
52
|
SKILL.md is the primary write target and gets mirrored to every
|
|
51
53
|
known/configured skills root simultaneously (`~/.claude/skills/`,
|
|
52
54
|
`~/.codex/skills/`, existing `~/.agents/skills/`, extra roots from
|
|
@@ -480,6 +482,22 @@ them with `dry_run=False` to apply:
|
|
|
480
482
|
|
|
481
483
|
---
|
|
482
484
|
|
|
485
|
+
## Telemetry
|
|
486
|
+
|
|
487
|
+
- **`mp_dashboard(window_days=7)`** — one-call rollup of the whole
|
|
488
|
+
system, read-only. Three sections: **stores** (threads by state,
|
|
489
|
+
notes/dialog/distill/concepts counts, skills + claims by tier,
|
|
490
|
+
extract-candidate and evolve queues, probe/task counts), **loops**
|
|
491
|
+
(how many times each autonomous daemon fired in the window vs 30 days,
|
|
492
|
+
plus last-fire age), and **outcomes** (what those loops actually
|
|
493
|
+
produced — skills materialized, tier promotions, candidate
|
|
494
|
+
accept-vs-reject rate). Surfaces the gaps the point-tools can't:
|
|
495
|
+
a loop firing constantly while its outcomes stay flat, or a queue
|
|
496
|
+
backing up. Complements the per-loop `*_status` tools (`mp_health`,
|
|
497
|
+
`spawn_budget_status`, `shadow_review_status`).
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
483
501
|
## Storage
|
|
484
502
|
|
|
485
503
|
`~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "threadkeeper"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.1"
|
|
8
8
|
description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "thread-keeper contributors" }]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""mp_dashboard — aggregate rollup tool.
|
|
2
|
+
|
|
3
|
+
Verifies the tool renders all sections, counts seeded stores, reflects
|
|
4
|
+
loop-pass + outcome events, and degrades without crashing on an empty DB.
|
|
5
|
+
|
|
6
|
+
NOTE on isolation: assertions are DELTA-based, never absolute counts. The
|
|
7
|
+
suite's `test_tools_smoke.py` does a `del sys.modules` + package re-import
|
|
8
|
+
+ every-tool invocation at COLLECTION time in the parent process, which
|
|
9
|
+
`os.environ.setdefault`-pins a DB path and seeds rows. So "exactly N
|
|
10
|
+
threads" is not guaranteed across the full suite even with `fresh_mp`'s
|
|
11
|
+
tmp DB — we assert that the dashboard reflects the rows THIS test adds
|
|
12
|
+
(before/after delta), which is the real contract anyway.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
import time
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _tool(pkg, name):
|
|
21
|
+
return pkg["mcp"]._tool_manager._tools[name].fn
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _count(out: str, key: str) -> int:
|
|
25
|
+
"""Pull `<key>=N` from the dashboard text. Absence means zero: when a
|
|
26
|
+
store is empty the grouped `threads:` line collapses to `threads: 0`
|
|
27
|
+
(no `active=` token), and that genuinely means 0 active threads — so a
|
|
28
|
+
missing key reads as 0, which keeps before/after deltas correct."""
|
|
29
|
+
m = re.search(rf"\b{re.escape(key)}=(\d+)", out)
|
|
30
|
+
return int(m.group(1)) if m else 0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _active_count(out: str) -> int:
|
|
34
|
+
return _count(out, "active")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _notes_count(out: str) -> int:
|
|
38
|
+
return _count(out, "notes")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _concepts_count(out: str) -> int:
|
|
42
|
+
return _count(out, "concepts")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_dashboard_registered(fresh_mp):
|
|
46
|
+
assert "mp_dashboard" in fresh_mp["mcp"]._tool_manager._tools
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_dashboard_empty_db_no_crash(fresh_mp):
|
|
50
|
+
out = _tool(fresh_mp, "mp_dashboard")()
|
|
51
|
+
for section in ("dashboard", "stores", "loops", "outcomes", "reliability"):
|
|
52
|
+
assert section in out, (section, out)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_dashboard_counts_stores_delta(fresh_mp):
|
|
56
|
+
dash = _tool(fresh_mp, "mp_dashboard")
|
|
57
|
+
before = dash()
|
|
58
|
+
a0, n0, c0 = (_active_count(before), _notes_count(before),
|
|
59
|
+
_concepts_count(before))
|
|
60
|
+
|
|
61
|
+
open_thread = _tool(fresh_mp, "open_thread")
|
|
62
|
+
note = _tool(fresh_mp, "note")
|
|
63
|
+
t1 = open_thread(question="alpha")
|
|
64
|
+
open_thread(question="beta")
|
|
65
|
+
note(thread_id=t1, content="a note here", kind="insight")
|
|
66
|
+
note(thread_id=t1, content="another move", kind="move")
|
|
67
|
+
_tool(fresh_mp, "register_concept")(description="a concept by example",
|
|
68
|
+
confidence="low")
|
|
69
|
+
|
|
70
|
+
after = dash()
|
|
71
|
+
assert _active_count(after) - a0 == 2, (a0, after)
|
|
72
|
+
assert _notes_count(after) - n0 == 2, (n0, after)
|
|
73
|
+
assert _concepts_count(after) - c0 == 1, (c0, after)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _shadow_win(out: str) -> int:
|
|
77
|
+
m = re.search(r"shadow\s+(\d+) / \d+", out)
|
|
78
|
+
return int(m.group(1)) if m else 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_dashboard_reflects_loop_and_outcome_events(fresh_mp):
|
|
82
|
+
# Delta measured THROUGH the tool itself (before vs after), so both reads
|
|
83
|
+
# go through the identical DB-resolution path — immune to whatever DB a
|
|
84
|
+
# contaminated parent env pinned. Insert the loop/outcome events the
|
|
85
|
+
# daemons would write, then confirm the dashboard's own count rises by 3.
|
|
86
|
+
conn = fresh_mp["db"].get_db()
|
|
87
|
+
now = int(time.time())
|
|
88
|
+
before = _shadow_win(_tool(fresh_mp, "mp_dashboard")(window_days=7))
|
|
89
|
+
for _ in range(3):
|
|
90
|
+
conn.execute(
|
|
91
|
+
"INSERT INTO events (session_id, kind, target, summary, created_at) "
|
|
92
|
+
"VALUES ('s', 'shadow_review_pass', ?, '', ?)", (str(now), now))
|
|
93
|
+
conn.execute(
|
|
94
|
+
"INSERT INTO events (session_id, kind, target, summary, created_at) "
|
|
95
|
+
"VALUES ('s', 'skill_materialized', 'Tx', 'path', ?)", (now,))
|
|
96
|
+
conn.commit()
|
|
97
|
+
after_out = _tool(fresh_mp, "mp_dashboard")(window_days=7)
|
|
98
|
+
assert _shadow_win(after_out) - before == 3, (before, after_out)
|
|
99
|
+
assert "skill_materialized" in after_out, after_out
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_dashboard_accept_rate(fresh_mp):
|
|
103
|
+
conn = fresh_mp["db"].get_db()
|
|
104
|
+
now = int(time.time())
|
|
105
|
+
# Snapshot existing decisions so the ratio assertion is exact regardless
|
|
106
|
+
# of pre-seeded rows.
|
|
107
|
+
acc0 = conn.execute(
|
|
108
|
+
"SELECT COUNT(*) FROM events WHERE kind LIKE 'accept_candidate%'"
|
|
109
|
+
).fetchone()[0]
|
|
110
|
+
rej0 = conn.execute(
|
|
111
|
+
"SELECT COUNT(*) FROM events WHERE kind='reject_candidate'"
|
|
112
|
+
).fetchone()[0]
|
|
113
|
+
conn.execute(
|
|
114
|
+
"INSERT INTO events (session_id, kind, target, created_at) "
|
|
115
|
+
"VALUES ('s','accept_candidate:note','1',?)", (now,))
|
|
116
|
+
for _ in range(3):
|
|
117
|
+
conn.execute(
|
|
118
|
+
"INSERT INTO events (session_id, kind, target, created_at) "
|
|
119
|
+
"VALUES ('s','reject_candidate','x',?)", (now,))
|
|
120
|
+
conn.commit()
|
|
121
|
+
out = _tool(fresh_mp, "mp_dashboard")()
|
|
122
|
+
acc, dec = acc0 + 1, acc0 + 1 + rej0 + 3
|
|
123
|
+
assert f"candidate_accept_rate {acc}/{dec}" in out, (acc0, rej0, out)
|
|
@@ -347,6 +347,61 @@ def test_extract_filters_shadow_observer_sessions(tmp_path, monkeypatch):
|
|
|
347
347
|
assert not any(r["source_cid"] == "shadow-sess" for r in rows)
|
|
348
348
|
|
|
349
349
|
|
|
350
|
+
def test_extract_filters_spawned_child_sessions(tmp_path, monkeypatch):
|
|
351
|
+
"""A session whose cid is a tasks.spawned_cid is one of OUR spawned
|
|
352
|
+
children (curator, panel voter, ad-hoc research agent, ...). Its dialog
|
|
353
|
+
is system-injected task framing + work artifacts, never user intent —
|
|
354
|
+
exclude it wholesale, regardless of how its prompt opens. This catches
|
|
355
|
+
the noise the prompt-prefix list misses: real rejects included children
|
|
356
|
+
opening with 'You are auditing…', 'You are analyzing whether…',
|
|
357
|
+
'Use the Write tool to…' — none matched _INTERNAL_PROMPT_PREFIXES, so
|
|
358
|
+
66/107 historical rejects were exactly this class."""
|
|
359
|
+
pkg = _bootstrap(tmp_path, monkeypatch)
|
|
360
|
+
conn = pkg["db"].get_db()
|
|
361
|
+
now = int(time.time())
|
|
362
|
+
child_cid = "child-cid-xyz"
|
|
363
|
+
# Register the child in tasks (parent spawned it). Prompt text is
|
|
364
|
+
# deliberately NOT in any prefix list — the link is what identifies it.
|
|
365
|
+
conn.execute(
|
|
366
|
+
"INSERT INTO tasks (id, pid, parent_cid, spawned_cid, cwd, prompt, "
|
|
367
|
+
"started_at) VALUES ('tk_x', 0, 'parent-cid', ?, '/x', "
|
|
368
|
+
"'You are auditing a slice of lessons. Analyze each one.', ?)",
|
|
369
|
+
(child_cid, now - 200),
|
|
370
|
+
)
|
|
371
|
+
# The child emits substantive-looking dialog that WOULD trip H1/H2/H3.
|
|
372
|
+
_seed_dialog(
|
|
373
|
+
conn, "user",
|
|
374
|
+
"I want you to record the decision: always reset the network "
|
|
375
|
+
"before WDA start, every single run.",
|
|
376
|
+
now - 90, session_id=child_cid,
|
|
377
|
+
)
|
|
378
|
+
_seed_dialog(
|
|
379
|
+
conn, "assistant",
|
|
380
|
+
"## Findings\n\nWe want the pipeline to always dedup first.\n"
|
|
381
|
+
"Therefore the rule is: dedup before enrich. In conclusion, that "
|
|
382
|
+
"is the durable pattern here for every future run of this job.",
|
|
383
|
+
now - 85, session_id=child_cid,
|
|
384
|
+
)
|
|
385
|
+
# A genuine foreground user session — must still be picked up.
|
|
386
|
+
_seed_dialog(
|
|
387
|
+
conn, "user",
|
|
388
|
+
"I want you to record decision notes automatically without "
|
|
389
|
+
"waiting for the agent to remember each time.",
|
|
390
|
+
now - 60, session_id="real-sess",
|
|
391
|
+
)
|
|
392
|
+
conn.commit()
|
|
393
|
+
|
|
394
|
+
out = pkg["extract_daemon"].run_extract_pass(force=True)
|
|
395
|
+
assert "ok" in out
|
|
396
|
+
rows = conn.execute(
|
|
397
|
+
"SELECT source_cid FROM extract_candidates WHERE status='pending'"
|
|
398
|
+
).fetchall()
|
|
399
|
+
assert any(r["source_cid"] == "real-sess" for r in rows), \
|
|
400
|
+
"real user session should still yield candidates"
|
|
401
|
+
assert not any(r["source_cid"] == child_cid for r in rows), \
|
|
402
|
+
"spawned-child session must be fully excluded"
|
|
403
|
+
|
|
404
|
+
|
|
350
405
|
# ──────────────────────────────────────────────────────────────────────
|
|
351
406
|
# Daemon lifecycle
|
|
352
407
|
# ──────────────────────────────────────────────────────────────────────
|
|
@@ -31,6 +31,22 @@ def skills_pkg(tmp_path, monkeypatch):
|
|
|
31
31
|
"CLAUDE_PROJECTS_DIR": str(tmp_path / "fake_claude_projects"),
|
|
32
32
|
"THREADKEEPER_INGEST_INTERVAL_S": "0",
|
|
33
33
|
"THREADKEEPER_INGEST_CAP": "0",
|
|
34
|
+
# Disable every background daemon. Without this the skill_watcher
|
|
35
|
+
# daemon runs live and races delete tests: it scans CLAUDE_SKILLS_DIR
|
|
36
|
+
# on a timer and re-INSERTs a skill_usage row right after a test's
|
|
37
|
+
# delete removed dir+row, making test_delete_removes_skill_dir_and_
|
|
38
|
+
# usage_row flake (~2 in 20). Same daemon-vs-test TOCTOU the conftest
|
|
39
|
+
# _force_clean_env guards against; this bespoke fixture must mirror it.
|
|
40
|
+
"THREADKEEPER_SKILL_WATCH_INTERVAL_S": "0",
|
|
41
|
+
"THREADKEEPER_SPAWN_BUDGET_POLL_S": "0",
|
|
42
|
+
"THREADKEEPER_MEMORY_GUARD_POLL_S": "0",
|
|
43
|
+
"THREADKEEPER_SEARCH_PROXY_POLL_S": "0",
|
|
44
|
+
"THREADKEEPER_SHADOW_REVIEW_INTERVAL_S": "0",
|
|
45
|
+
"THREADKEEPER_CURATOR_INTERVAL_S": "0",
|
|
46
|
+
"THREADKEEPER_EXTRACT_INTERVAL_S": "0",
|
|
47
|
+
"THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S": "0",
|
|
48
|
+
"THREADKEEPER_PROBE_INTERVAL_S": "0",
|
|
49
|
+
"THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S": "0",
|
|
34
50
|
"THREADKEEPER_TASK_LOG_DIR": str(tmp_path / "tasks"),
|
|
35
51
|
"THREADKEEPER_CLIENT": "pytest",
|
|
36
52
|
"THREADKEEPER_FORCE_CID": _FAKE_CID,
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Thread-janitor daemon — closes idle threads so the skill-harvest path
|
|
2
|
+
(close_thread → auto-review hook) actually runs.
|
|
3
|
+
|
|
4
|
+
Isolation: bespoke _bootstrap mirrors conftest._force_clean_env (all
|
|
5
|
+
daemons off) so the janitor only fires when a test calls run_janitor_pass
|
|
6
|
+
directly. AUTO_REVIEW off here — we test the CLOSE behavior; the harvest
|
|
7
|
+
hook is close_thread's own tested concern.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_FAKE_CID = "11111111-2222-3333-4444-555555555555"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _bootstrap(tmp_path, monkeypatch, interval="0", idle_days="1"):
|
|
20
|
+
env = {
|
|
21
|
+
"THREADKEEPER_DB": str(tmp_path / "db.sqlite"),
|
|
22
|
+
"CLAUDE_PROJECTS_DIR": str(tmp_path / "fake_claude_projects"),
|
|
23
|
+
"THREADKEEPER_INGEST_INTERVAL_S": "0",
|
|
24
|
+
"THREADKEEPER_INGEST_CAP": "0",
|
|
25
|
+
"THREADKEEPER_SKILL_WATCH_INTERVAL_S": "0",
|
|
26
|
+
"THREADKEEPER_SPAWN_BUDGET_POLL_S": "0",
|
|
27
|
+
"THREADKEEPER_MEMORY_GUARD_POLL_S": "0",
|
|
28
|
+
"THREADKEEPER_SEARCH_PROXY_POLL_S": "0",
|
|
29
|
+
"THREADKEEPER_SHADOW_REVIEW_INTERVAL_S": "0",
|
|
30
|
+
"THREADKEEPER_CURATOR_INTERVAL_S": "0",
|
|
31
|
+
"THREADKEEPER_EXTRACT_INTERVAL_S": "0",
|
|
32
|
+
"THREADKEEPER_CANDIDATE_REVIEW_INTERVAL_S": "0",
|
|
33
|
+
"THREADKEEPER_PROBE_INTERVAL_S": "0",
|
|
34
|
+
"THREADKEEPER_EVOLVE_REVIEW_INTERVAL_S": "0",
|
|
35
|
+
"THREADKEEPER_THREAD_JANITOR_INTERVAL_S": interval,
|
|
36
|
+
"THREADKEEPER_THREAD_IDLE_CLOSE_DAYS": idle_days,
|
|
37
|
+
"THREADKEEPER_AUTO_REVIEW": "", # off — harvest hook is close_thread's concern
|
|
38
|
+
"THREADKEEPER_LESSONS": str(tmp_path / "lessons.md"),
|
|
39
|
+
"THREADKEEPER_TASK_LOG_DIR": str(tmp_path / "tasks"),
|
|
40
|
+
"THREADKEEPER_CLIENT": "pytest",
|
|
41
|
+
"THREADKEEPER_FORCE_CID": _FAKE_CID,
|
|
42
|
+
"THREADKEEPER_NO_EMBEDDINGS": "1",
|
|
43
|
+
}
|
|
44
|
+
for k, v in env.items():
|
|
45
|
+
monkeypatch.setenv(k, v)
|
|
46
|
+
Path(env["CLAUDE_PROJECTS_DIR"]).mkdir(parents=True, exist_ok=True)
|
|
47
|
+
for name in [m for m in list(sys.modules) if m.startswith("threadkeeper")]:
|
|
48
|
+
del sys.modules[name]
|
|
49
|
+
import threadkeeper.server # noqa: F401
|
|
50
|
+
from threadkeeper import db, thread_janitor, identity, _mcp
|
|
51
|
+
return {
|
|
52
|
+
"db": db,
|
|
53
|
+
"thread_janitor": thread_janitor,
|
|
54
|
+
"identity": identity,
|
|
55
|
+
"mcp": _mcp.mcp,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _tool(pkg, name):
|
|
60
|
+
return pkg["mcp"]._tool_manager._tools[name].fn
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _age_thread(conn, tid, days_ago):
|
|
64
|
+
"""Backdate a thread's last_touched_at by `days_ago` days."""
|
|
65
|
+
ts = int(time.time()) - int(days_ago * 86400)
|
|
66
|
+
conn.execute(
|
|
67
|
+
"UPDATE threads SET last_touched_at=? WHERE id=?", (ts, tid)
|
|
68
|
+
)
|
|
69
|
+
conn.commit()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
73
|
+
# dispatch / gating
|
|
74
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
def test_disabled_without_force(tmp_path, monkeypatch):
|
|
77
|
+
pkg = _bootstrap(tmp_path, monkeypatch, interval="0")
|
|
78
|
+
assert pkg["thread_janitor"].run_janitor_pass() == "disabled"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_no_stale_when_all_fresh(tmp_path, monkeypatch):
|
|
82
|
+
pkg = _bootstrap(tmp_path, monkeypatch)
|
|
83
|
+
_tool(pkg, "open_thread")(question="fresh thread")
|
|
84
|
+
# default last_touched_at = now → not stale
|
|
85
|
+
assert pkg["thread_janitor"].run_janitor_pass(force=True) == "no_stale"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
89
|
+
# closing behavior
|
|
90
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
def test_closes_stale_active_thread(tmp_path, monkeypatch):
|
|
93
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
94
|
+
conn = pkg["db"].get_db()
|
|
95
|
+
tid = _tool(pkg, "open_thread")(question="stale one")
|
|
96
|
+
_age_thread(conn, tid, days_ago=2) # older than 1d threshold
|
|
97
|
+
out = pkg["thread_janitor"].run_janitor_pass(force=True)
|
|
98
|
+
assert out == "closed=1", out
|
|
99
|
+
row = conn.execute("SELECT state, outcome FROM threads WHERE id=?",
|
|
100
|
+
(tid,)).fetchone()
|
|
101
|
+
assert row["state"] == "closed"
|
|
102
|
+
assert "janitor" in (row["outcome"] or "")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_closes_stale_idle_thread(tmp_path, monkeypatch):
|
|
106
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
107
|
+
conn = pkg["db"].get_db()
|
|
108
|
+
tid = _tool(pkg, "open_thread")(question="parked")
|
|
109
|
+
_tool(pkg, "idle_thread")(thread_id=tid)
|
|
110
|
+
_age_thread(conn, tid, days_ago=3)
|
|
111
|
+
out = pkg["thread_janitor"].run_janitor_pass(force=True)
|
|
112
|
+
assert out == "closed=1", out
|
|
113
|
+
row = conn.execute("SELECT state FROM threads WHERE id=?", (tid,)).fetchone()
|
|
114
|
+
assert row["state"] == "closed"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_leaves_fresh_thread_open(tmp_path, monkeypatch):
|
|
118
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
119
|
+
conn = pkg["db"].get_db()
|
|
120
|
+
stale = _tool(pkg, "open_thread")(question="stale")
|
|
121
|
+
fresh = _tool(pkg, "open_thread")(question="fresh")
|
|
122
|
+
_age_thread(conn, stale, days_ago=2)
|
|
123
|
+
# fresh keeps default now-ish last_touched_at
|
|
124
|
+
out = pkg["thread_janitor"].run_janitor_pass(force=True)
|
|
125
|
+
assert out == "closed=1", out
|
|
126
|
+
s = conn.execute("SELECT state FROM threads WHERE id=?", (stale,)).fetchone()
|
|
127
|
+
f = conn.execute("SELECT state FROM threads WHERE id=?", (fresh,)).fetchone()
|
|
128
|
+
assert s["state"] == "closed"
|
|
129
|
+
assert f["state"] == "active"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_idempotent_second_pass(tmp_path, monkeypatch):
|
|
133
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
134
|
+
conn = pkg["db"].get_db()
|
|
135
|
+
tid = _tool(pkg, "open_thread")(question="stale")
|
|
136
|
+
_age_thread(conn, tid, days_ago=2)
|
|
137
|
+
assert pkg["thread_janitor"].run_janitor_pass(force=True) == "closed=1"
|
|
138
|
+
# already closed → not re-matched
|
|
139
|
+
assert pkg["thread_janitor"].run_janitor_pass(force=True) == "no_stale"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_closed_then_note_reopens_survives_janitor(tmp_path, monkeypatch):
|
|
143
|
+
"""The whole safety story end-to-end: janitor closes a stale thread,
|
|
144
|
+
a note reopens it (fresh last_touched_at), and the next janitor pass
|
|
145
|
+
leaves it alone because it's no longer stale."""
|
|
146
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
147
|
+
conn = pkg["db"].get_db()
|
|
148
|
+
tid = _tool(pkg, "open_thread")(question="comes back")
|
|
149
|
+
_age_thread(conn, tid, days_ago=2)
|
|
150
|
+
pkg["thread_janitor"].run_janitor_pass(force=True)
|
|
151
|
+
assert conn.execute("SELECT state FROM threads WHERE id=?",
|
|
152
|
+
(tid,)).fetchone()["state"] == "closed"
|
|
153
|
+
# user returns → agent notes on it → reopen
|
|
154
|
+
_tool(pkg, "note")(thread_id=tid, content="picking this back up", kind="move")
|
|
155
|
+
assert conn.execute("SELECT state FROM threads WHERE id=?",
|
|
156
|
+
(tid,)).fetchone()["state"] == "active"
|
|
157
|
+
# fresh now, so a second janitor pass must not re-close it
|
|
158
|
+
assert pkg["thread_janitor"].run_janitor_pass(force=True) == "no_stale"
|
|
159
|
+
assert conn.execute("SELECT state FROM threads WHERE id=?",
|
|
160
|
+
(tid,)).fetchone()["state"] == "active"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_records_janitor_pass_event(tmp_path, monkeypatch):
|
|
164
|
+
pkg = _bootstrap(tmp_path, monkeypatch, idle_days="1")
|
|
165
|
+
conn = pkg["db"].get_db()
|
|
166
|
+
tid = _tool(pkg, "open_thread")(question="stale")
|
|
167
|
+
_age_thread(conn, tid, days_ago=2)
|
|
168
|
+
pkg["thread_janitor"].run_janitor_pass(force=True)
|
|
169
|
+
row = conn.execute(
|
|
170
|
+
"SELECT summary FROM events WHERE kind='janitor_pass' "
|
|
171
|
+
"ORDER BY id DESC LIMIT 1"
|
|
172
|
+
).fetchone()
|
|
173
|
+
assert row is not None
|
|
174
|
+
assert "closed=1" in row["summary"]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def test_daemon_does_not_start_at_interval_zero(tmp_path, monkeypatch):
|
|
178
|
+
pkg = _bootstrap(tmp_path, monkeypatch, interval="0")
|
|
179
|
+
pkg["thread_janitor"].start_thread_janitor()
|
|
180
|
+
assert pkg["thread_janitor"]._started is False
|
|
@@ -372,3 +372,22 @@ EVOLVE_REVIEW_INTERVAL_S: float = float(
|
|
|
372
372
|
EVOLVE_REVIEW_MIN: int = int(
|
|
373
373
|
os.environ.get("THREADKEEPER_EVOLVE_REVIEW_MIN", "2")
|
|
374
374
|
)
|
|
375
|
+
|
|
376
|
+
# Thread-janitor daemon. The skill-harvest path fires on close_thread(), but
|
|
377
|
+
# the user never closes threads and the agent rarely does — so threads pile
|
|
378
|
+
# up open (32 active, some 12d stale in the audit) and abandoned work never
|
|
379
|
+
# gets reviewed into a skill (2 auto-review spawns ever, 5 skills / 115
|
|
380
|
+
# closes). This daemon closes threads that have been idle past
|
|
381
|
+
# THREAD_IDLE_CLOSE_DAYS, routing through the normal close_thread() path so
|
|
382
|
+
# the auto-review hook fires for the richest pending thread. Safe because
|
|
383
|
+
# closing is reversible: note() revives a closed thread (see tools/threads
|
|
384
|
+
# note()). 0 disables (default — opt in). Recommended: 86400 (daily) — this
|
|
385
|
+
# is slow housekeeping, not a hot loop.
|
|
386
|
+
THREAD_JANITOR_INTERVAL_S: float = float(
|
|
387
|
+
os.environ.get("THREADKEEPER_THREAD_JANITOR_INTERVAL_S", "0")
|
|
388
|
+
)
|
|
389
|
+
# Close active/idle threads whose last_touched_at is older than this many
|
|
390
|
+
# days. Default 1 (user's choice): aggressive, but reopenable on return.
|
|
391
|
+
THREAD_IDLE_CLOSE_DAYS: float = float(
|
|
392
|
+
os.environ.get("THREADKEEPER_THREAD_IDLE_CLOSE_DAYS", "1")
|
|
393
|
+
)
|
|
@@ -169,6 +169,11 @@ def _ensure_session(conn: sqlite3.Connection, client: Optional[str] = None) -> s
|
|
|
169
169
|
evolve_daemon.start_evolve_daemon()
|
|
170
170
|
except Exception:
|
|
171
171
|
pass
|
|
172
|
+
try:
|
|
173
|
+
from . import thread_janitor
|
|
174
|
+
thread_janitor.start_thread_janitor()
|
|
175
|
+
except Exception:
|
|
176
|
+
pass
|
|
172
177
|
return _session_id
|
|
173
178
|
|
|
174
179
|
|
|
@@ -53,6 +53,7 @@ from .tools import shadow_review # noqa: F401
|
|
|
53
53
|
from .tools import lessons # noqa: F401
|
|
54
54
|
from .tools import curator # noqa: F401
|
|
55
55
|
from .tools import candidate_reviewer # noqa: F401
|
|
56
|
+
from .tools import dashboard # noqa: F401
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Thread-janitor daemon — autonomously close stale threads so abandoned
|
|
2
|
+
work gets harvested into skills.
|
|
3
|
+
|
|
4
|
+
The skill-harvest path is event-driven: `close_thread()` fires the
|
|
5
|
+
auto-review hook, which spawns a background child that materializes a skill
|
|
6
|
+
from a rich closed thread. But that path only runs when threads actually
|
|
7
|
+
CLOSE — and in practice they don't: the user never closes threads, and the
|
|
8
|
+
agent rarely remembers to. The audit found 32 threads open (some idle 12d),
|
|
9
|
+
2 auto-review spawns ever, 5 skills from 115 closes. The harvest machinery
|
|
10
|
+
was starved of its trigger.
|
|
11
|
+
|
|
12
|
+
This daemon supplies the trigger. Each pass it finds threads idle past
|
|
13
|
+
THREAD_IDLE_CLOSE_DAYS and closes them via the normal `close_thread()` path,
|
|
14
|
+
so the existing auto-review hook fires (for the richest pending thread) and
|
|
15
|
+
the brief's skill_hint surfaces the rest for the foreground agent.
|
|
16
|
+
|
|
17
|
+
Aggressive auto-close is safe ONLY because closing is reversible: a note()
|
|
18
|
+
on a closed thread revives it to active (see tools/threads.note). Returning
|
|
19
|
+
to a topic — i.e. adding a note — reopens it. So the janitor can close
|
|
20
|
+
freely; nothing is lost, just parked.
|
|
21
|
+
|
|
22
|
+
Mirror of the other daemons: interval knob (0 = off), foreground-only via
|
|
23
|
+
BACKGROUND_DAEMONS_ALLOWED so spawned children don't recurse, idempotent
|
|
24
|
+
(already-closed threads don't re-match), records a `janitor_pass` event for
|
|
25
|
+
observability / the dashboard.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
import sqlite3
|
|
32
|
+
import threading
|
|
33
|
+
import time
|
|
34
|
+
|
|
35
|
+
from .config import THREAD_JANITOR_INTERVAL_S, THREAD_IDLE_CLOSE_DAYS
|
|
36
|
+
from .db import get_db
|
|
37
|
+
from . import identity
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
_started = False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _record_janitor_pass(conn: sqlite3.Connection, outcome: str) -> None:
|
|
45
|
+
try:
|
|
46
|
+
conn.execute(
|
|
47
|
+
"INSERT INTO events (session_id, kind, target, summary, "
|
|
48
|
+
"created_at) VALUES (?, 'janitor_pass', '', ?, ?)",
|
|
49
|
+
(identity._session_id or "", outcome[:300], int(time.time())),
|
|
50
|
+
)
|
|
51
|
+
conn.commit()
|
|
52
|
+
except sqlite3.OperationalError:
|
|
53
|
+
logger.debug("thread_janitor: failed to record pass", exc_info=True)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _stale_threads(conn: sqlite3.Connection, cutoff: int) -> list[sqlite3.Row]:
|
|
57
|
+
"""Active or idle threads not touched since `cutoff`, oldest first."""
|
|
58
|
+
try:
|
|
59
|
+
return conn.execute(
|
|
60
|
+
"SELECT id, question FROM threads "
|
|
61
|
+
"WHERE state IN ('active','idle') AND last_touched_at < ? "
|
|
62
|
+
"ORDER BY last_touched_at ASC",
|
|
63
|
+
(cutoff,),
|
|
64
|
+
).fetchall()
|
|
65
|
+
except sqlite3.OperationalError:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def run_janitor_pass(force: bool = False) -> str:
|
|
70
|
+
"""One janitor pass: close every thread idle past the threshold via
|
|
71
|
+
close_thread() (which fires the auto-review hook). Returns a short
|
|
72
|
+
status string for observability:
|
|
73
|
+
|
|
74
|
+
'disabled' — knob off and not forced
|
|
75
|
+
'no_stale' — nothing past the idle threshold
|
|
76
|
+
'closed=N' — closed N stale threads
|
|
77
|
+
"""
|
|
78
|
+
if THREAD_JANITOR_INTERVAL_S <= 0 and not force:
|
|
79
|
+
return "disabled"
|
|
80
|
+
conn = get_db()
|
|
81
|
+
now = int(time.time())
|
|
82
|
+
cutoff = now - int(max(0.0, THREAD_IDLE_CLOSE_DAYS) * 86400)
|
|
83
|
+
stale = _stale_threads(conn, cutoff)
|
|
84
|
+
if not stale:
|
|
85
|
+
_record_janitor_pass(conn, "no_stale")
|
|
86
|
+
return "no_stale"
|
|
87
|
+
|
|
88
|
+
# Late import — tools.threads imports brief/embeddings; importing at
|
|
89
|
+
# module load would risk a cycle. close_thread() owns the state change,
|
|
90
|
+
# the close event, AND the auto-review hook, so routing through it keeps
|
|
91
|
+
# the janitor's closes indistinguishable from a manual close.
|
|
92
|
+
from .tools.threads import close_thread
|
|
93
|
+
|
|
94
|
+
days = THREAD_IDLE_CLOSE_DAYS
|
|
95
|
+
days_disp = int(days) if float(days).is_integer() else days
|
|
96
|
+
outcome = f"auto-closed by janitor: idle > {days_disp}d (reopen via note)"
|
|
97
|
+
closed = 0
|
|
98
|
+
for t in stale:
|
|
99
|
+
try:
|
|
100
|
+
res = close_thread(thread_id=t["id"], outcome=outcome)
|
|
101
|
+
if isinstance(res, str) and res.startswith("ok"):
|
|
102
|
+
closed += 1
|
|
103
|
+
except Exception: # noqa: BLE001 — never crash the daemon on one row
|
|
104
|
+
logger.debug("thread_janitor: close failed for %s",
|
|
105
|
+
t["id"], exc_info=True)
|
|
106
|
+
out = f"closed={closed}"
|
|
107
|
+
_record_janitor_pass(conn, out)
|
|
108
|
+
return out
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _serve_loop() -> None:
|
|
112
|
+
while True:
|
|
113
|
+
try:
|
|
114
|
+
run_janitor_pass()
|
|
115
|
+
except Exception:
|
|
116
|
+
logger.debug("thread_janitor tick failed", exc_info=True)
|
|
117
|
+
time.sleep(THREAD_JANITOR_INTERVAL_S)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def start_thread_janitor() -> None:
|
|
121
|
+
"""Idempotent starter. No-op when THREAD_JANITOR_INTERVAL_S<=0. Same
|
|
122
|
+
cascade prevention as the other daemons: spawned children / non-
|
|
123
|
+
foreground origins refuse to start it, so a review child the janitor
|
|
124
|
+
triggers can't spin up its own janitor."""
|
|
125
|
+
global _started
|
|
126
|
+
if _started:
|
|
127
|
+
return
|
|
128
|
+
if THREAD_JANITOR_INTERVAL_S <= 0:
|
|
129
|
+
return
|
|
130
|
+
from .config import BACKGROUND_DAEMONS_ALLOWED
|
|
131
|
+
if not BACKGROUND_DAEMONS_ALLOWED:
|
|
132
|
+
return
|
|
133
|
+
t = threading.Thread(
|
|
134
|
+
target=_serve_loop, name="thread_janitor", daemon=True,
|
|
135
|
+
)
|
|
136
|
+
t.start()
|
|
137
|
+
_started = True
|