swarph-cli 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {swarph_cli-0.3.0/src/swarph_cli.egg-info → swarph_cli-0.5.0}/PKG-INFO +63 -7
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/README.md +61 -5
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/pyproject.toml +2 -2
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/__init__.py +1 -1
- swarph_cli-0.5.0/src/swarph_cli/commands/daemon.py +438 -0
- swarph_cli-0.5.0/src/swarph_cli/commands/onboard.py +377 -0
- swarph_cli-0.5.0/src/swarph_cli/commands/ratify.py +283 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/main.py +13 -4
- {swarph_cli-0.3.0 → swarph_cli-0.5.0/src/swarph_cli.egg-info}/PKG-INFO +63 -7
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/SOURCES.txt +8 -1
- swarph_cli-0.5.0/tests/test_daemon_command.py +356 -0
- swarph_cli-0.5.0/tests/test_onboard_command.py +279 -0
- swarph_cli-0.5.0/tests/test_ratify_command.py +224 -0
- swarph_cli-0.5.0/tests/test_smoke_phase_5_5.py +144 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/LICENSE +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/setup.cfg +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/caller.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/__init__.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/chat.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/import_session.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/parsers/__init__.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/parsers/claude.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/dependency_links.txt +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/entry_points.txt +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/requires.txt +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/top_level.txt +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_chat_command.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_claude_parser.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_import_command.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_main.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_smoke_chat.py +0 -0
- {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_smoke_one_shot.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: swarph-cli
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.5.0 ships Phase 5.6 `swarph daemon` (foreground inbox drain — retires the orphaned-tail-F class) on top of Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify (PLAN.md §13 / §16).
|
|
5
5
|
Author: Pierre Samson, Claude Opus
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/darw007d/swarph-cli
|
|
@@ -49,13 +49,68 @@ This is one of three repos in the v0.3.x architecture:
|
|
|
49
49
|
|
|
50
50
|
## Status
|
|
51
51
|
|
|
52
|
-
**v0.
|
|
52
|
+
**v0.5.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify + Phase 5.6 daemon.** Six verbs ship:
|
|
53
53
|
|
|
54
54
|
1. `swarph "prompt"` — Phase 2 one-shot mode (any of five providers)
|
|
55
55
|
2. `swarph chat` — Phase 5 interactive REPL with multi-turn history + slash commands
|
|
56
56
|
3. `swarph import <path>` — Phase 2.5 session import (Claude JSONL → swarph-native, with `--report-only` for honest pre-commit inspection)
|
|
57
|
+
4. `swarph onboard <peer-name>` — Phase 5.5 mechanics-phase onboarding (PLAN.md §15.4)
|
|
58
|
+
5. `swarph ratify <peer-name>` — Phase 5.5 witness ratification (PLAN.md §15.4a)
|
|
59
|
+
6. `swarph daemon` — **NEW** Phase 5.6 foreground inbox drain loop (PLAN.md §16); structurally retires the orphaned-tail-F class
|
|
57
60
|
|
|
58
|
-
Subsequent phases extend the CLI surface (`--ask <peer>`,
|
|
61
|
+
Subsequent phases extend the CLI surface (`--ask <peer>`, REPL drain coroutine + `/inbox` + `/reply` slash commands in 5.6b).
|
|
62
|
+
|
|
63
|
+
### `swarph daemon` (Phase 5.6)
|
|
64
|
+
|
|
65
|
+
Replaces the 4-layer `tail -F | grep | Monitor | systemd | cron poll` stack with one foreground process. Liveness check collapses to:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
ps aux | grep '[s]warph daemon' # zero output = monitoring is down
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
$ swarph daemon --state-dir ~/swarph_state/lab-ovh --self lab-ovh
|
|
73
|
+
[swarph-daemon] starting: self=lab-ovh gateway=http://localhost:8788 poll=30s ...
|
|
74
|
+
[2026-05-08T21:00:30Z] id=728 from=droplet kind=answer → 'Drop review on Phase 5.5 PRs A+B...'
|
|
75
|
+
[2026-05-08T21:01:10Z] id=729 from=droplet kind=fyi → 'Both Phase 5.5 PRs merged...'
|
|
76
|
+
^C
|
|
77
|
+
[swarph-daemon] signal 2 received — draining + flushing cursor
|
|
78
|
+
[swarph-daemon] shutdown: iterations=12 dms_seen=2 cursor.last_msg_id=729
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Loud-on-down (PLAN §16.5): never silently exits. Cursor writes are atomic (write-and-rename — corrupted mid-flush leaves the previous cursor intact). Backoff: 60s after 5 consecutive empty polls; 300s after 5 min of consecutive 5xx. SIGINT/SIGTERM trigger clean drain + flush.
|
|
82
|
+
|
|
83
|
+
`--auto-act` flag is documented for v0.5.1+ when handler registration via `@swarph.on_dm(...)` lands; v0.5.0 ships surface-only mode (DMs printed + JSONL-logged to `inbox.log`, no automatic replies).
|
|
84
|
+
|
|
85
|
+
### `swarph onboard` + `swarph ratify` (Phase 5.5)
|
|
86
|
+
|
|
87
|
+
Per PLAN.md §15, onboarding splits into a **mechanics phase** (`swarph onboard`) that automates the boring parts (registry POST, scaffolding, token resolution) and a **manual contract phase** (the new peer composes the handshake DM in their own words). A witness peer judges the handshake and runs `swarph ratify <peer>` to flip `ratified=true`, gating `task_claim` server-side.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# New peer self-onboards
|
|
91
|
+
$ swarph onboard razorpeter
|
|
92
|
+
[1/6] validate_node_name('razorpeter') ok
|
|
93
|
+
[2/6] prepare peer-registry row ok
|
|
94
|
+
[3/6] resolve MESH_GATEWAY_TOKEN ok
|
|
95
|
+
[4/6] POST .../peers/register ok (registered_unratified=true)
|
|
96
|
+
[5/6] verify_subscription_setup() ok
|
|
97
|
+
[6/6] scaffold ~/swarph_state/razorpeter/ ok
|
|
98
|
+
|
|
99
|
+
[manual] handshake template at /tmp/razorpeter-handshake.md
|
|
100
|
+
Edit each section in your own words, then send to your witness peer.
|
|
101
|
+
|
|
102
|
+
# After peer composes + sends handshake, witness ratifies
|
|
103
|
+
$ SWARPH_WITNESS=lab-ovh swarph ratify razorpeter \
|
|
104
|
+
--reason "handshake covers all four invariants in own words"
|
|
105
|
+
[1/6] validate_node_name('razorpeter') ok
|
|
106
|
+
[2/6] verify witness 'lab-ovh' is ratified ok
|
|
107
|
+
[3/6] verify 'razorpeter' is registered_unratified ok
|
|
108
|
+
[4/6] PATCH .../peers/razorpeter ok
|
|
109
|
+
[5/6] verify peer_ratifications audit row ok (id=N reason='...')
|
|
110
|
+
[6/6] invalidate local TTL cache ok
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Server-side gating (mesh-gateway PR A): unratified peers can read inbox + send DMs (so the handshake itself works) but `task_claim` returns 403. Witness must itself be ratified — no self-ratification, no unratified-witnesses-ratifying-others. Audit log (`peer_ratifications`) is append-only.
|
|
59
114
|
|
|
60
115
|
### `swarph chat`
|
|
61
116
|
|
|
@@ -158,10 +213,11 @@ Pong!
|
|
|
158
213
|
| **0** | Scaffold — entry-point + status banner |
|
|
159
214
|
| **2** (v0.1.0) | One-shot mode: `swarph "hello" --provider gemini` |
|
|
160
215
|
| **2.5** (v0.2.0) | `swarph import` — Claude JSONL → swarph-native session format |
|
|
161
|
-
| **5** (v0.3.0
|
|
216
|
+
| **5** (v0.3.0) | `swarph chat` interactive REPL — multi-turn against any of five adapters + slash commands |
|
|
217
|
+
| **5.5** (v0.4.0) | `swarph onboard` + `swarph ratify` — six mechanics steps + handshake template + witness flip (PLAN.md §15) |
|
|
218
|
+
| **5.6** (v0.5.0 — this release) | **`swarph daemon`** — foreground inbox drain loop with atomic cursor writes; retires the orphaned-tail-F class (PLAN.md §16) |
|
|
219
|
+
| **5.6b** | REPL drain coroutine + `/inbox`/`/reply` slash commands + `@swarph.on_dm()` handler registration (mesh + cli) |
|
|
162
220
|
| **3** | `--ask <peer>` mesh-aware one-shot via MeshClient |
|
|
163
|
-
| **5.5** | `swarph onboard <peer-name>` + `swarph ratify <peer-name>` (PLAN.md §15) |
|
|
164
|
-
| **5.6** | `swarph daemon` foreground drain loop + REPL drain coroutine + `/inbox`, `/reply` (PLAN.md §16) |
|
|
165
221
|
| **6** | (already done) PyPI publish |
|
|
166
222
|
|
|
167
223
|
## Why split CLI from substrate
|
|
@@ -17,13 +17,68 @@ This is one of three repos in the v0.3.x architecture:
|
|
|
17
17
|
|
|
18
18
|
## Status
|
|
19
19
|
|
|
20
|
-
**v0.
|
|
20
|
+
**v0.5.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify + Phase 5.6 daemon.** Six verbs ship:
|
|
21
21
|
|
|
22
22
|
1. `swarph "prompt"` — Phase 2 one-shot mode (any of five providers)
|
|
23
23
|
2. `swarph chat` — Phase 5 interactive REPL with multi-turn history + slash commands
|
|
24
24
|
3. `swarph import <path>` — Phase 2.5 session import (Claude JSONL → swarph-native, with `--report-only` for honest pre-commit inspection)
|
|
25
|
+
4. `swarph onboard <peer-name>` — Phase 5.5 mechanics-phase onboarding (PLAN.md §15.4)
|
|
26
|
+
5. `swarph ratify <peer-name>` — Phase 5.5 witness ratification (PLAN.md §15.4a)
|
|
27
|
+
6. `swarph daemon` — **NEW** Phase 5.6 foreground inbox drain loop (PLAN.md §16); structurally retires the orphaned-tail-F class
|
|
25
28
|
|
|
26
|
-
Subsequent phases extend the CLI surface (`--ask <peer>`,
|
|
29
|
+
Subsequent phases extend the CLI surface (`--ask <peer>`, REPL drain coroutine + `/inbox` + `/reply` slash commands in 5.6b).
|
|
30
|
+
|
|
31
|
+
### `swarph daemon` (Phase 5.6)
|
|
32
|
+
|
|
33
|
+
Replaces the 4-layer `tail -F | grep | Monitor | systemd | cron poll` stack with one foreground process. Liveness check collapses to:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
ps aux | grep '[s]warph daemon' # zero output = monitoring is down
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
$ swarph daemon --state-dir ~/swarph_state/lab-ovh --self lab-ovh
|
|
41
|
+
[swarph-daemon] starting: self=lab-ovh gateway=http://localhost:8788 poll=30s ...
|
|
42
|
+
[2026-05-08T21:00:30Z] id=728 from=droplet kind=answer → 'Drop review on Phase 5.5 PRs A+B...'
|
|
43
|
+
[2026-05-08T21:01:10Z] id=729 from=droplet kind=fyi → 'Both Phase 5.5 PRs merged...'
|
|
44
|
+
^C
|
|
45
|
+
[swarph-daemon] signal 2 received — draining + flushing cursor
|
|
46
|
+
[swarph-daemon] shutdown: iterations=12 dms_seen=2 cursor.last_msg_id=729
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Loud-on-down (PLAN §16.5): never silently exits. Cursor writes are atomic (write-and-rename — corrupted mid-flush leaves the previous cursor intact). Backoff: 60s after 5 consecutive empty polls; 300s after 5 min of consecutive 5xx. SIGINT/SIGTERM trigger clean drain + flush.
|
|
50
|
+
|
|
51
|
+
`--auto-act` flag is documented for v0.5.1+ when handler registration via `@swarph.on_dm(...)` lands; v0.5.0 ships surface-only mode (DMs printed + JSONL-logged to `inbox.log`, no automatic replies).
|
|
52
|
+
|
|
53
|
+
### `swarph onboard` + `swarph ratify` (Phase 5.5)
|
|
54
|
+
|
|
55
|
+
Per PLAN.md §15, onboarding splits into a **mechanics phase** (`swarph onboard`) that automates the boring parts (registry POST, scaffolding, token resolution) and a **manual contract phase** (the new peer composes the handshake DM in their own words). A witness peer judges the handshake and runs `swarph ratify <peer>` to flip `ratified=true`, gating `task_claim` server-side.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# New peer self-onboards
|
|
59
|
+
$ swarph onboard razorpeter
|
|
60
|
+
[1/6] validate_node_name('razorpeter') ok
|
|
61
|
+
[2/6] prepare peer-registry row ok
|
|
62
|
+
[3/6] resolve MESH_GATEWAY_TOKEN ok
|
|
63
|
+
[4/6] POST .../peers/register ok (registered_unratified=true)
|
|
64
|
+
[5/6] verify_subscription_setup() ok
|
|
65
|
+
[6/6] scaffold ~/swarph_state/razorpeter/ ok
|
|
66
|
+
|
|
67
|
+
[manual] handshake template at /tmp/razorpeter-handshake.md
|
|
68
|
+
Edit each section in your own words, then send to your witness peer.
|
|
69
|
+
|
|
70
|
+
# After peer composes + sends handshake, witness ratifies
|
|
71
|
+
$ SWARPH_WITNESS=lab-ovh swarph ratify razorpeter \
|
|
72
|
+
--reason "handshake covers all four invariants in own words"
|
|
73
|
+
[1/6] validate_node_name('razorpeter') ok
|
|
74
|
+
[2/6] verify witness 'lab-ovh' is ratified ok
|
|
75
|
+
[3/6] verify 'razorpeter' is registered_unratified ok
|
|
76
|
+
[4/6] PATCH .../peers/razorpeter ok
|
|
77
|
+
[5/6] verify peer_ratifications audit row ok (id=N reason='...')
|
|
78
|
+
[6/6] invalidate local TTL cache ok
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Server-side gating (mesh-gateway PR A): unratified peers can read inbox + send DMs (so the handshake itself works) but `task_claim` returns 403. Witness must itself be ratified — no self-ratification, no unratified-witnesses-ratifying-others. Audit log (`peer_ratifications`) is append-only.
|
|
27
82
|
|
|
28
83
|
### `swarph chat`
|
|
29
84
|
|
|
@@ -126,10 +181,11 @@ Pong!
|
|
|
126
181
|
| **0** | Scaffold — entry-point + status banner |
|
|
127
182
|
| **2** (v0.1.0) | One-shot mode: `swarph "hello" --provider gemini` |
|
|
128
183
|
| **2.5** (v0.2.0) | `swarph import` — Claude JSONL → swarph-native session format |
|
|
129
|
-
| **5** (v0.3.0
|
|
184
|
+
| **5** (v0.3.0) | `swarph chat` interactive REPL — multi-turn against any of five adapters + slash commands |
|
|
185
|
+
| **5.5** (v0.4.0) | `swarph onboard` + `swarph ratify` — six mechanics steps + handshake template + witness flip (PLAN.md §15) |
|
|
186
|
+
| **5.6** (v0.5.0 — this release) | **`swarph daemon`** — foreground inbox drain loop with atomic cursor writes; retires the orphaned-tail-F class (PLAN.md §16) |
|
|
187
|
+
| **5.6b** | REPL drain coroutine + `/inbox`/`/reply` slash commands + `@swarph.on_dm()` handler registration (mesh + cli) |
|
|
130
188
|
| **3** | `--ask <peer>` mesh-aware one-shot via MeshClient |
|
|
131
|
-
| **5.5** | `swarph onboard <peer-name>` + `swarph ratify <peer-name>` (PLAN.md §15) |
|
|
132
|
-
| **5.6** | `swarph daemon` foreground drain loop + REPL drain coroutine + `/inbox`, `/reply` (PLAN.md §16) |
|
|
133
189
|
| **6** | (already done) PyPI publish |
|
|
134
190
|
|
|
135
191
|
## Why split CLI from substrate
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "swarph-cli"
|
|
7
|
-
version = "0.
|
|
8
|
-
description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.5.0 ships Phase 5.6 `swarph daemon` (foreground inbox drain — retires the orphaned-tail-F class) on top of Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify (PLAN.md §13 / §16)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
11
11
|
requires-python = ">=3.10"
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""``swarph daemon`` — Phase 5.6 foreground drain loop per PLAN.md §16.
|
|
2
|
+
|
|
3
|
+
The structural retirement of the orphaned-tail-F class. Replaces the
|
|
4
|
+
4-layer ``tail -F | grep | Monitor | systemd | cron poll`` stack with
|
|
5
|
+
one foreground process that polls the gateway directly and writes
|
|
6
|
+
the cursor transactionally (write-and-rename, no half-flushed state).
|
|
7
|
+
|
|
8
|
+
Liveness check collapses to::
|
|
9
|
+
|
|
10
|
+
ps aux | grep '[s]warph daemon'
|
|
11
|
+
|
|
12
|
+
— zero output = monitoring is down.
|
|
13
|
+
|
|
14
|
+
Default mode is **surface-only** (DMs printed + logged, never auto-replied).
|
|
15
|
+
``--auto-act`` flips on the AI-to-AI default per CLAUDE.md DM SEMANTICS,
|
|
16
|
+
routing incoming DMs to handlers registered via ``@swarph.on_dm(...)``.
|
|
17
|
+
v0.5.0 ships the daemon + cursor + signals + backoff; handler registration
|
|
18
|
+
+ ``MeshClient.watch()`` event stream + REPL drain coroutine + capability
|
|
19
|
+
advert + heartbeat self-reporting land in v0.5.1+ per PLAN §16.4 / §16.4a /
|
|
20
|
+
§16.4b.
|
|
21
|
+
|
|
22
|
+
Loud-on-down discipline (PLAN §16.5): the daemon never silently exits.
|
|
23
|
+
SIGINT / SIGTERM trigger a clean drain + cursor flush + non-zero shell
|
|
24
|
+
liveness signal; uncaught exceptions land on stderr loudly. ``ps aux``
|
|
25
|
+
is the only thing that needs to be checked.
|
|
26
|
+
|
|
27
|
+
Open question §16.7 #2 resolution: ``--auto-act`` default OFF (lab read +
|
|
28
|
+
drop's standing-auth lane discretion). Daemon-launchers in §15.4 step 6
|
|
29
|
+
include ``--auto-act`` explicitly so AI peers opt in at provisioning time.
|
|
30
|
+
|
|
31
|
+
Open question §16.7 #3 resolution: cursor format stays single-row JSON
|
|
32
|
+
with write-and-rename atomic semantics. If flush fails mid-write the
|
|
33
|
+
rename never happens and the previous cursor stands — no append-only
|
|
34
|
+
log needed.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import argparse
|
|
40
|
+
import asyncio
|
|
41
|
+
import json
|
|
42
|
+
import os
|
|
43
|
+
import signal
|
|
44
|
+
import sys
|
|
45
|
+
import time
|
|
46
|
+
import urllib.error
|
|
47
|
+
import urllib.request
|
|
48
|
+
from contextlib import suppress
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
from typing import Optional
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_DEFAULT_POLL_S = 30
|
|
54
|
+
_BACKOFF_EMPTY_THRESHOLD = 5 # consecutive empty polls before backing off
|
|
55
|
+
_BACKOFF_EMPTY_SECONDS = 60
|
|
56
|
+
_BACKOFF_5XX_THRESHOLD_SECONDS = 300 # 5 min of consecutive 5xx
|
|
57
|
+
_BACKOFF_5XX_SECONDS = 300
|
|
58
|
+
_LOUD_DISCONNECT_SECONDS = 600 # emit loud line every minute past this
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
62
|
+
p = argparse.ArgumentParser(
|
|
63
|
+
prog="swarph daemon",
|
|
64
|
+
description=(
|
|
65
|
+
"Phase 5.6 mesh inbox drain daemon per PLAN.md §16. "
|
|
66
|
+
"Foreground process; loud-on-down; transactional cursor."
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
p.add_argument(
|
|
70
|
+
"--state-dir",
|
|
71
|
+
default=None,
|
|
72
|
+
help="state directory containing cursor.json + inbox.log "
|
|
73
|
+
"(default: ~/swarph_state/<self>/).",
|
|
74
|
+
)
|
|
75
|
+
p.add_argument(
|
|
76
|
+
"--self",
|
|
77
|
+
dest="self_name",
|
|
78
|
+
default=None,
|
|
79
|
+
help="canonical name of this peer (default: $SWARPH_SELF or "
|
|
80
|
+
"the directory name of --state-dir).",
|
|
81
|
+
)
|
|
82
|
+
p.add_argument(
|
|
83
|
+
"--gateway",
|
|
84
|
+
default=os.environ.get("MESH_GATEWAY_URL", "http://localhost:8788"),
|
|
85
|
+
help="mesh-gateway base URL.",
|
|
86
|
+
)
|
|
87
|
+
p.add_argument(
|
|
88
|
+
"--token-file",
|
|
89
|
+
default=None,
|
|
90
|
+
help="optional secrets file path (mode 0600 expected).",
|
|
91
|
+
)
|
|
92
|
+
p.add_argument(
|
|
93
|
+
"--poll-seconds",
|
|
94
|
+
type=int,
|
|
95
|
+
default=_DEFAULT_POLL_S,
|
|
96
|
+
help=f"base poll cadence in seconds (default: {_DEFAULT_POLL_S}).",
|
|
97
|
+
)
|
|
98
|
+
p.add_argument(
|
|
99
|
+
"--auto-act",
|
|
100
|
+
action="store_true",
|
|
101
|
+
help="route DMs to registered @swarph.on_dm handlers (v0.5.1+ — "
|
|
102
|
+
"in v0.5.0 this is a documentation flag; surface-only mode runs "
|
|
103
|
+
"regardless).",
|
|
104
|
+
)
|
|
105
|
+
p.add_argument(
|
|
106
|
+
"--once",
|
|
107
|
+
action="store_true",
|
|
108
|
+
help="run a single poll iteration then exit (test mode).",
|
|
109
|
+
)
|
|
110
|
+
return p
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _resolve_self_name(arg: Optional[str], state_dir: Path) -> str:
|
|
114
|
+
if arg:
|
|
115
|
+
return arg
|
|
116
|
+
env = os.environ.get("SWARPH_SELF")
|
|
117
|
+
if env:
|
|
118
|
+
return env
|
|
119
|
+
return state_dir.name
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _resolve_state_dir(arg: Optional[str], self_name_arg: Optional[str]) -> Path:
|
|
123
|
+
if arg:
|
|
124
|
+
return Path(arg).expanduser()
|
|
125
|
+
self_name = self_name_arg or os.environ.get("SWARPH_SELF")
|
|
126
|
+
if self_name:
|
|
127
|
+
return Path.home() / "swarph_state" / self_name
|
|
128
|
+
# Last resort — a self_name is needed to disambiguate; surface error.
|
|
129
|
+
raise SystemExit(
|
|
130
|
+
"swarph daemon: cannot resolve state directory. "
|
|
131
|
+
"Pass --state-dir <path> or set $SWARPH_SELF."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _resolve_token(token_file_arg: Optional[str]) -> str:
|
|
136
|
+
"""Mirror onboard's resolution. env → secrets.toml mode 0600 → prompt."""
|
|
137
|
+
from swarph_cli.commands.onboard import _resolve_token as _onboard_resolve
|
|
138
|
+
|
|
139
|
+
return _onboard_resolve(token_file_arg)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
# Cursor — single-row JSON with write-and-rename atomic semantics
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _read_cursor(path: Path) -> dict:
|
|
148
|
+
if not path.exists():
|
|
149
|
+
return {"last_msg_id": 0, "tasks_snapshot": {}}
|
|
150
|
+
try:
|
|
151
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
152
|
+
except json.JSONDecodeError as exc:
|
|
153
|
+
# Loud — corrupted cursor needs operator attention, not silent reset.
|
|
154
|
+
print(
|
|
155
|
+
f"[swarph-daemon] CORRUPTED cursor at {path}: {exc}. "
|
|
156
|
+
f"Refusing to overwrite. Inspect manually.",
|
|
157
|
+
file=sys.stderr,
|
|
158
|
+
flush=True,
|
|
159
|
+
)
|
|
160
|
+
raise
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _write_cursor_atomic(path: Path, cursor: dict) -> None:
|
|
164
|
+
"""Write-and-rename: write to a tmp file in the same dir, then atomic
|
|
165
|
+
rename over the target. Failed mid-write leaves the previous cursor
|
|
166
|
+
intact — open question §16.7 #3 resolution."""
|
|
167
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
tmp = path.with_suffix(path.suffix + f".tmp.{os.getpid()}")
|
|
169
|
+
tmp.write_text(json.dumps(cursor, indent=2, sort_keys=True), encoding="utf-8")
|
|
170
|
+
os.replace(tmp, path) # atomic on POSIX + Windows ≥3.3
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# HTTP — stdlib only, no httpx
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _http_get(url: str, *, token: str, timeout: float = 10.0) -> tuple[int, dict]:
|
|
179
|
+
req = urllib.request.Request(
|
|
180
|
+
url, headers={"Authorization": f"Bearer {token}"}
|
|
181
|
+
)
|
|
182
|
+
try:
|
|
183
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
184
|
+
return resp.status, json.loads(resp.read().decode("utf-8") or "{}")
|
|
185
|
+
except urllib.error.HTTPError as exc:
|
|
186
|
+
try:
|
|
187
|
+
body = json.loads(exc.read().decode("utf-8") or "{}")
|
|
188
|
+
except Exception:
|
|
189
|
+
body = {"detail": str(exc)}
|
|
190
|
+
return exc.code, body
|
|
191
|
+
except urllib.error.URLError as exc:
|
|
192
|
+
# Network-level failure — gateway down, DNS, etc.
|
|
193
|
+
return 0, {"detail": str(exc)}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ---------------------------------------------------------------------------
|
|
197
|
+
# Daemon state + loop
|
|
198
|
+
# ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class DaemonState:
|
|
202
|
+
"""Mutable state held by the drain loop. Surfaced as a class so tests
|
|
203
|
+
can inspect post-run."""
|
|
204
|
+
|
|
205
|
+
def __init__(self, *, self_name: str, state_dir: Path, gateway: str,
|
|
206
|
+
token: str, poll_s: int, auto_act: bool):
|
|
207
|
+
self.self_name = self_name
|
|
208
|
+
self.state_dir = state_dir
|
|
209
|
+
self.gateway = gateway
|
|
210
|
+
self.token = token
|
|
211
|
+
self.poll_s = poll_s
|
|
212
|
+
self.auto_act = auto_act
|
|
213
|
+
self.cursor_path = state_dir / "cursor.json"
|
|
214
|
+
self.inbox_log_path = state_dir / "inbox.log"
|
|
215
|
+
self.cursor = _read_cursor(self.cursor_path)
|
|
216
|
+
self.consecutive_empty = 0
|
|
217
|
+
self.disconnect_since: Optional[float] = None
|
|
218
|
+
self.iterations = 0
|
|
219
|
+
self.dms_seen = 0
|
|
220
|
+
self.shutdown_requested = False
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _log_dm(state: DaemonState, dm: dict) -> None:
|
|
224
|
+
"""Both stdout (visible to operator + journald) AND inbox.log (cursor
|
|
225
|
+
audit trail). Inbox.log is append-only structured JSONL."""
|
|
226
|
+
line = (
|
|
227
|
+
f"[{dm.get('created_at', '?')}] "
|
|
228
|
+
f"id={dm['id']} from={dm.get('from_node')} kind={dm.get('kind')} "
|
|
229
|
+
f"→ {dm['content'][:120]!r}"
|
|
230
|
+
)
|
|
231
|
+
print(line, flush=True)
|
|
232
|
+
state.inbox_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
with state.inbox_log_path.open("a", encoding="utf-8") as f:
|
|
234
|
+
f.write(json.dumps(dm) + "\n")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _route_to_handler(state: DaemonState, dm: dict) -> None:
|
|
238
|
+
"""v0.5.0 stub. v0.5.1+ wires this through @swarph.on_dm() registrations
|
|
239
|
+
on the swarph-mesh side. For now, --auto-act prints a placeholder."""
|
|
240
|
+
if state.auto_act:
|
|
241
|
+
print(
|
|
242
|
+
f" [auto-act] handler dispatch deferred to v0.5.1 "
|
|
243
|
+
f"(@swarph.on_dm + MeshClient.watch); surfacing only.",
|
|
244
|
+
flush=True,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _select_next_poll_seconds(state: DaemonState) -> int:
|
|
249
|
+
"""Backoff per §16.2: empty-poll backoff after 5 consecutive empties,
|
|
250
|
+
5xx backoff after 5 min of consecutive failures."""
|
|
251
|
+
if state.disconnect_since is not None:
|
|
252
|
+
if (time.time() - state.disconnect_since) > _BACKOFF_5XX_THRESHOLD_SECONDS:
|
|
253
|
+
return _BACKOFF_5XX_SECONDS
|
|
254
|
+
if state.consecutive_empty >= _BACKOFF_EMPTY_THRESHOLD:
|
|
255
|
+
return _BACKOFF_EMPTY_SECONDS
|
|
256
|
+
return state.poll_s
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
async def _drain_iteration(state: DaemonState) -> None:
|
|
260
|
+
"""One poll → handle → cursor-write cycle. Errors logged loud; never
|
|
261
|
+
raises out of the loop."""
|
|
262
|
+
state.iterations += 1
|
|
263
|
+
last_id = state.cursor.get("last_msg_id", 0)
|
|
264
|
+
# Note: gateway query param is `to=` (NOT `to_node=`). The latter is
|
|
265
|
+
# silently ignored — bit the entire session's drain code which had
|
|
266
|
+
# python-side filters masking the issue. Defense-in-depth: also
|
|
267
|
+
# filter from_node != self_name client-side in case any future
|
|
268
|
+
# gateway quirk re-introduces outbound bleed-through.
|
|
269
|
+
url = (
|
|
270
|
+
f"{state.gateway}/messages?to={state.self_name}"
|
|
271
|
+
f"&limit=50"
|
|
272
|
+
)
|
|
273
|
+
status, body = _http_get(url, token=state.token)
|
|
274
|
+
|
|
275
|
+
if status == 0:
|
|
276
|
+
# Network-level failure
|
|
277
|
+
if state.disconnect_since is None:
|
|
278
|
+
state.disconnect_since = time.time()
|
|
279
|
+
elapsed = time.time() - state.disconnect_since
|
|
280
|
+
if elapsed > _LOUD_DISCONNECT_SECONDS:
|
|
281
|
+
print(
|
|
282
|
+
f"[swarph-daemon] LOUD: gateway unreachable for "
|
|
283
|
+
f"{elapsed:.0f}s — {body.get('detail', '?')}",
|
|
284
|
+
file=sys.stderr,
|
|
285
|
+
flush=True,
|
|
286
|
+
)
|
|
287
|
+
return
|
|
288
|
+
if status >= 500:
|
|
289
|
+
if state.disconnect_since is None:
|
|
290
|
+
state.disconnect_since = time.time()
|
|
291
|
+
print(
|
|
292
|
+
f"[swarph-daemon] gateway 5xx {status}: {body.get('detail', '?')}",
|
|
293
|
+
file=sys.stderr,
|
|
294
|
+
flush=True,
|
|
295
|
+
)
|
|
296
|
+
return
|
|
297
|
+
if status >= 400:
|
|
298
|
+
print(
|
|
299
|
+
f"[swarph-daemon] gateway {status}: {body.get('detail', '?')}",
|
|
300
|
+
file=sys.stderr,
|
|
301
|
+
flush=True,
|
|
302
|
+
)
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
# Success — clear disconnect tracking
|
|
306
|
+
state.disconnect_since = None
|
|
307
|
+
|
|
308
|
+
messages = [
|
|
309
|
+
m
|
|
310
|
+
for m in body.get("messages", [])
|
|
311
|
+
if m["id"] > last_id and m.get("from_node") != state.self_name
|
|
312
|
+
]
|
|
313
|
+
if not messages:
|
|
314
|
+
state.consecutive_empty += 1
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
# Process oldest-first so cursor monotonically advances
|
|
318
|
+
messages.sort(key=lambda m: m["id"])
|
|
319
|
+
state.consecutive_empty = 0
|
|
320
|
+
new_last_id = last_id
|
|
321
|
+
for dm in messages:
|
|
322
|
+
_log_dm(state, dm)
|
|
323
|
+
_route_to_handler(state, dm)
|
|
324
|
+
state.dms_seen += 1
|
|
325
|
+
new_last_id = max(new_last_id, dm["id"])
|
|
326
|
+
|
|
327
|
+
state.cursor["last_msg_id"] = new_last_id
|
|
328
|
+
_write_cursor_atomic(state.cursor_path, state.cursor)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
async def _drain_loop(state: DaemonState) -> None:
|
|
332
|
+
"""Main loop. Returns on shutdown_requested. Exceptions in
|
|
333
|
+
_drain_iteration are caught + logged + retried; only signal handlers
|
|
334
|
+
set shutdown_requested."""
|
|
335
|
+
print(
|
|
336
|
+
f"[swarph-daemon] starting: self={state.self_name} "
|
|
337
|
+
f"gateway={state.gateway} poll={state.poll_s}s "
|
|
338
|
+
f"state={state.state_dir} auto_act={state.auto_act} "
|
|
339
|
+
f"cursor.last_msg_id={state.cursor.get('last_msg_id', 0)}",
|
|
340
|
+
flush=True,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
while not state.shutdown_requested:
|
|
344
|
+
try:
|
|
345
|
+
await _drain_iteration(state)
|
|
346
|
+
except Exception as exc: # noqa: BLE001 — loud-on-error per §16.4
|
|
347
|
+
print(
|
|
348
|
+
f"[swarph-daemon] iteration error (continuing): "
|
|
349
|
+
f"{type(exc).__name__}: {exc}",
|
|
350
|
+
file=sys.stderr,
|
|
351
|
+
flush=True,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
delay = _select_next_poll_seconds(state)
|
|
355
|
+
# Sleep in 1-second chunks so SIGINT/SIGTERM can interrupt promptly
|
|
356
|
+
for _ in range(delay):
|
|
357
|
+
if state.shutdown_requested:
|
|
358
|
+
break
|
|
359
|
+
await asyncio.sleep(1)
|
|
360
|
+
|
|
361
|
+
print(
|
|
362
|
+
f"[swarph-daemon] shutdown: iterations={state.iterations} "
|
|
363
|
+
f"dms_seen={state.dms_seen} cursor.last_msg_id={state.cursor.get('last_msg_id', 0)}",
|
|
364
|
+
flush=True,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _install_signal_handlers(loop: asyncio.AbstractEventLoop, state: DaemonState) -> None:
|
|
369
|
+
"""SIGINT + SIGTERM → set shutdown_requested. The loop drains cleanly
|
|
370
|
+
on the next sleep boundary (≤1s)."""
|
|
371
|
+
|
|
372
|
+
def _handler(signum, frame): # noqa: ARG001
|
|
373
|
+
if not state.shutdown_requested:
|
|
374
|
+
print(
|
|
375
|
+
f"[swarph-daemon] signal {signum} received — draining + flushing cursor",
|
|
376
|
+
flush=True,
|
|
377
|
+
)
|
|
378
|
+
state.shutdown_requested = True
|
|
379
|
+
|
|
380
|
+
# Use the signal module directly rather than loop.add_signal_handler so
|
|
381
|
+
# this works inside test harnesses where the loop's default policy may
|
|
382
|
+
# block signal-handler installation.
|
|
383
|
+
signal.signal(signal.SIGINT, _handler)
|
|
384
|
+
signal.signal(signal.SIGTERM, _handler)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def run_daemon(argv: list[str]) -> int:
|
|
388
|
+
"""Entry point invoked by ``swarph_cli.main`` verb dispatch."""
|
|
389
|
+
args = _build_parser().parse_args(argv)
|
|
390
|
+
|
|
391
|
+
# Resolve identity + state path
|
|
392
|
+
self_name = args.self_name or os.environ.get("SWARPH_SELF")
|
|
393
|
+
if args.state_dir:
|
|
394
|
+
state_dir = Path(args.state_dir).expanduser()
|
|
395
|
+
if not self_name:
|
|
396
|
+
self_name = state_dir.name
|
|
397
|
+
elif self_name:
|
|
398
|
+
state_dir = Path.home() / "swarph_state" / self_name
|
|
399
|
+
else:
|
|
400
|
+
print(
|
|
401
|
+
"swarph daemon: cannot resolve identity. Pass --self <name> or "
|
|
402
|
+
"--state-dir <path> or set $SWARPH_SELF.",
|
|
403
|
+
file=sys.stderr,
|
|
404
|
+
flush=True,
|
|
405
|
+
)
|
|
406
|
+
return 2
|
|
407
|
+
|
|
408
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
409
|
+
token = _resolve_token(args.token_file)
|
|
410
|
+
if not token:
|
|
411
|
+
print("swarph daemon: empty MESH_GATEWAY_TOKEN", file=sys.stderr)
|
|
412
|
+
return 2
|
|
413
|
+
|
|
414
|
+
state = DaemonState(
|
|
415
|
+
self_name=self_name,
|
|
416
|
+
state_dir=state_dir,
|
|
417
|
+
gateway=args.gateway,
|
|
418
|
+
token=token,
|
|
419
|
+
poll_s=args.poll_seconds,
|
|
420
|
+
auto_act=args.auto_act,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
if args.once:
|
|
424
|
+
# Test mode — single iteration, no signal handlers, no loop
|
|
425
|
+
asyncio.run(_drain_iteration(state))
|
|
426
|
+
return 0
|
|
427
|
+
|
|
428
|
+
loop = asyncio.new_event_loop()
|
|
429
|
+
asyncio.set_event_loop(loop)
|
|
430
|
+
_install_signal_handlers(loop, state)
|
|
431
|
+
try:
|
|
432
|
+
loop.run_until_complete(_drain_loop(state))
|
|
433
|
+
finally:
|
|
434
|
+
# Final cursor flush in case shutdown happened mid-iteration
|
|
435
|
+
with suppress(Exception):
|
|
436
|
+
_write_cursor_atomic(state.cursor_path, state.cursor)
|
|
437
|
+
loop.close()
|
|
438
|
+
return 0
|