swarph-cli 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {swarph_cli-0.3.0/src/swarph_cli.egg-info → swarph_cli-0.5.0}/PKG-INFO +63 -7
  2. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/README.md +61 -5
  3. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/pyproject.toml +2 -2
  4. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/__init__.py +1 -1
  5. swarph_cli-0.5.0/src/swarph_cli/commands/daemon.py +438 -0
  6. swarph_cli-0.5.0/src/swarph_cli/commands/onboard.py +377 -0
  7. swarph_cli-0.5.0/src/swarph_cli/commands/ratify.py +283 -0
  8. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/main.py +13 -4
  9. {swarph_cli-0.3.0 → swarph_cli-0.5.0/src/swarph_cli.egg-info}/PKG-INFO +63 -7
  10. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/SOURCES.txt +8 -1
  11. swarph_cli-0.5.0/tests/test_daemon_command.py +356 -0
  12. swarph_cli-0.5.0/tests/test_onboard_command.py +279 -0
  13. swarph_cli-0.5.0/tests/test_ratify_command.py +224 -0
  14. swarph_cli-0.5.0/tests/test_smoke_phase_5_5.py +144 -0
  15. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/LICENSE +0 -0
  16. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/setup.cfg +0 -0
  17. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/caller.py +0 -0
  18. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/__init__.py +0 -0
  19. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/chat.py +0 -0
  20. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/commands/import_session.py +0 -0
  21. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/parsers/__init__.py +0 -0
  22. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli/parsers/claude.py +0 -0
  23. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/dependency_links.txt +0 -0
  24. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/entry_points.txt +0 -0
  25. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/requires.txt +0 -0
  26. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/src/swarph_cli.egg-info/top_level.txt +0 -0
  27. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_chat_command.py +0 -0
  28. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_claude_parser.py +0 -0
  29. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_import_command.py +0 -0
  30. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_main.py +0 -0
  31. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_smoke_chat.py +0 -0
  32. {swarph_cli-0.3.0 → swarph_cli-0.5.0}/tests/test_smoke_one_shot.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarph-cli
3
- Version: 0.3.0
4
- Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.3.0 ships the Phase 5 `swarph chat` REPL on top of Phase 2 one-shot + Phase 2.5 import (PLAN.md §13).
3
+ Version: 0.5.0
4
+ Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.5.0 ships Phase 5.6 `swarph daemon` (foreground inbox drain — retires the orphaned-tail-F class) on top of Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify (PLAN.md §13 / §16).
5
5
  Author: Pierre Samson, Claude Opus
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/darw007d/swarph-cli
@@ -49,13 +49,68 @@ This is one of three repos in the v0.3.x architecture:
49
49
 
50
50
  ## Status
51
51
 
52
- **v0.3.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL.** Three verbs ship:
52
+ **v0.5.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify + Phase 5.6 daemon.** Six verbs ship:
53
53
 
54
54
  1. `swarph "prompt"` — Phase 2 one-shot mode (any of five providers)
55
55
  2. `swarph chat` — Phase 5 interactive REPL with multi-turn history + slash commands
56
56
  3. `swarph import <path>` — Phase 2.5 session import (Claude JSONL → swarph-native, with `--report-only` for honest pre-commit inspection)
57
+ 4. `swarph onboard <peer-name>` — Phase 5.5 mechanics-phase onboarding (PLAN.md §15.4)
58
+ 5. `swarph ratify <peer-name>` — Phase 5.5 witness ratification (PLAN.md §15.4a)
59
+ 6. `swarph daemon` — **NEW** Phase 5.6 foreground inbox drain loop (PLAN.md §16); structurally retires the orphaned-tail-F class
57
60
 
58
- Subsequent phases extend the CLI surface (`--ask <peer>`, onboard/ratify, daemon, additional source formats).
61
+ Subsequent phases extend the CLI surface (`--ask <peer>`, REPL drain coroutine + `/inbox` + `/reply` slash commands in 5.6b).
62
+
63
+ ### `swarph daemon` (Phase 5.6)
64
+
65
+ Replaces the 4-layer `tail -F | grep | Monitor | systemd | cron poll` stack with one foreground process. Liveness check collapses to:
66
+
67
+ ```bash
68
+ ps aux | grep '[s]warph daemon' # zero output = monitoring is down
69
+ ```
70
+
71
+ ```bash
72
+ $ swarph daemon --state-dir ~/swarph_state/lab-ovh --self lab-ovh
73
+ [swarph-daemon] starting: self=lab-ovh gateway=http://localhost:8788 poll=30s ...
74
+ [2026-05-08T21:00:30Z] id=728 from=droplet kind=answer → 'Drop review on Phase 5.5 PRs A+B...'
75
+ [2026-05-08T21:01:10Z] id=729 from=droplet kind=fyi → 'Both Phase 5.5 PRs merged...'
76
+ ^C
77
+ [swarph-daemon] signal 2 received — draining + flushing cursor
78
+ [swarph-daemon] shutdown: iterations=12 dms_seen=2 cursor.last_msg_id=729
79
+ ```
80
+
81
+ Loud-on-down (PLAN §16.5): never silently exits. Cursor writes are atomic (write-and-rename — corrupted mid-flush leaves the previous cursor intact). Backoff: 60s after 5 consecutive empty polls; 300s after 5 min of consecutive 5xx. SIGINT/SIGTERM trigger clean drain + flush.
82
+
83
+ `--auto-act` flag is documented for v0.5.1+ when handler registration via `@swarph.on_dm(...)` lands; v0.5.0 ships surface-only mode (DMs printed + JSONL-logged to `inbox.log`, no automatic replies).
84
+
85
+ ### `swarph onboard` + `swarph ratify` (Phase 5.5)
86
+
87
+ Per PLAN.md §15, onboarding splits into a **mechanics phase** (`swarph onboard`) that automates the boring parts (registry POST, scaffolding, token resolution) and a **manual contract phase** (the new peer composes the handshake DM in their own words). A witness peer judges the handshake and runs `swarph ratify <peer>` to flip `ratified=true`, gating `task_claim` server-side.
88
+
89
+ ```bash
90
+ # New peer self-onboards
91
+ $ swarph onboard razorpeter
92
+ [1/6] validate_node_name('razorpeter') ok
93
+ [2/6] prepare peer-registry row ok
94
+ [3/6] resolve MESH_GATEWAY_TOKEN ok
95
+ [4/6] POST .../peers/register ok (registered_unratified=true)
96
+ [5/6] verify_subscription_setup() ok
97
+ [6/6] scaffold ~/swarph_state/razorpeter/ ok
98
+
99
+ [manual] handshake template at /tmp/razorpeter-handshake.md
100
+ Edit each section in your own words, then send to your witness peer.
101
+
102
+ # After peer composes + sends handshake, witness ratifies
103
+ $ SWARPH_WITNESS=lab-ovh swarph ratify razorpeter \
104
+ --reason "handshake covers all four invariants in own words"
105
+ [1/6] validate_node_name('razorpeter') ok
106
+ [2/6] verify witness 'lab-ovh' is ratified ok
107
+ [3/6] verify 'razorpeter' is registered_unratified ok
108
+ [4/6] PATCH .../peers/razorpeter ok
109
+ [5/6] verify peer_ratifications audit row ok (id=N reason='...')
110
+ [6/6] invalidate local TTL cache ok
111
+ ```
112
+
113
+ Server-side gating (mesh-gateway PR A): unratified peers can read inbox + send DMs (so the handshake itself works) but `task_claim` returns 403. Witness must itself be ratified — no self-ratification, no unratified-witnesses-ratifying-others. Audit log (`peer_ratifications`) is append-only.
59
114
 
60
115
  ### `swarph chat`
61
116
 
@@ -158,10 +213,11 @@ Pong!
158
213
  | **0** | Scaffold — entry-point + status banner |
159
214
  | **2** (v0.1.0) | One-shot mode: `swarph "hello" --provider gemini` |
160
215
  | **2.5** (v0.2.0) | `swarph import` — Claude JSONL → swarph-native session format |
161
- | **5** (v0.3.0 — this release) | **`swarph chat` interactive REPL** — multi-turn against any of five adapters + slash commands (`/help`, `/clear`, `/system`, `/provider`, `/model`, `/history`, `/cost`, `/quit`) |
216
+ | **5** (v0.3.0) | `swarph chat` interactive REPL — multi-turn against any of five adapters + slash commands |
217
+ | **5.5** (v0.4.0) | `swarph onboard` + `swarph ratify` — six mechanics steps + handshake template + witness flip (PLAN.md §15) |
218
+ | **5.6** (v0.5.0 — this release) | **`swarph daemon`** — foreground inbox drain loop with atomic cursor writes; retires the orphaned-tail-F class (PLAN.md §16) |
219
+ | **5.6b** | REPL drain coroutine + `/inbox`/`/reply` slash commands + `@swarph.on_dm()` handler registration (mesh + cli) |
162
220
  | **3** | `--ask <peer>` mesh-aware one-shot via MeshClient |
163
- | **5.5** | `swarph onboard <peer-name>` + `swarph ratify <peer-name>` (PLAN.md §15) |
164
- | **5.6** | `swarph daemon` foreground drain loop + REPL drain coroutine + `/inbox`, `/reply` (PLAN.md §16) |
165
221
  | **6** | (already done) PyPI publish |
166
222
 
167
223
  ## Why split CLI from substrate
@@ -17,13 +17,68 @@ This is one of three repos in the v0.3.x architecture:
17
17
 
18
18
  ## Status
19
19
 
20
- **v0.3.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL.** Three verbs ship:
20
+ **v0.5.0 — Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify + Phase 5.6 daemon.** Six verbs ship:
21
21
 
22
22
  1. `swarph "prompt"` — Phase 2 one-shot mode (any of five providers)
23
23
  2. `swarph chat` — Phase 5 interactive REPL with multi-turn history + slash commands
24
24
  3. `swarph import <path>` — Phase 2.5 session import (Claude JSONL → swarph-native, with `--report-only` for honest pre-commit inspection)
25
+ 4. `swarph onboard <peer-name>` — Phase 5.5 mechanics-phase onboarding (PLAN.md §15.4)
26
+ 5. `swarph ratify <peer-name>` — Phase 5.5 witness ratification (PLAN.md §15.4a)
27
+ 6. `swarph daemon` — **NEW** Phase 5.6 foreground inbox drain loop (PLAN.md §16); structurally retires the orphaned-tail-F class
25
28
 
26
- Subsequent phases extend the CLI surface (`--ask <peer>`, onboard/ratify, daemon, additional source formats).
29
+ Subsequent phases extend the CLI surface (`--ask <peer>`, REPL drain coroutine + `/inbox` + `/reply` slash commands in 5.6b).
30
+
31
+ ### `swarph daemon` (Phase 5.6)
32
+
33
+ Replaces the 4-layer `tail -F | grep | Monitor | systemd | cron poll` stack with one foreground process. Liveness check collapses to:
34
+
35
+ ```bash
36
+ ps aux | grep '[s]warph daemon' # zero output = monitoring is down
37
+ ```
38
+
39
+ ```bash
40
+ $ swarph daemon --state-dir ~/swarph_state/lab-ovh --self lab-ovh
41
+ [swarph-daemon] starting: self=lab-ovh gateway=http://localhost:8788 poll=30s ...
42
+ [2026-05-08T21:00:30Z] id=728 from=droplet kind=answer → 'Drop review on Phase 5.5 PRs A+B...'
43
+ [2026-05-08T21:01:10Z] id=729 from=droplet kind=fyi → 'Both Phase 5.5 PRs merged...'
44
+ ^C
45
+ [swarph-daemon] signal 2 received — draining + flushing cursor
46
+ [swarph-daemon] shutdown: iterations=12 dms_seen=2 cursor.last_msg_id=729
47
+ ```
48
+
49
+ Loud-on-down (PLAN §16.5): never silently exits. Cursor writes are atomic (write-and-rename — corrupted mid-flush leaves the previous cursor intact). Backoff: 60s after 5 consecutive empty polls; 300s after 5 min of consecutive 5xx. SIGINT/SIGTERM trigger clean drain + flush.
50
+
51
+ `--auto-act` flag is documented for v0.5.1+ when handler registration via `@swarph.on_dm(...)` lands; v0.5.0 ships surface-only mode (DMs printed + JSONL-logged to `inbox.log`, no automatic replies).
52
+
53
+ ### `swarph onboard` + `swarph ratify` (Phase 5.5)
54
+
55
+ Per PLAN.md §15, onboarding splits into a **mechanics phase** (`swarph onboard`) that automates the boring parts (registry POST, scaffolding, token resolution) and a **manual contract phase** (the new peer composes the handshake DM in their own words). A witness peer judges the handshake and runs `swarph ratify <peer>` to flip `ratified=true`, gating `task_claim` server-side.
56
+
57
+ ```bash
58
+ # New peer self-onboards
59
+ $ swarph onboard razorpeter
60
+ [1/6] validate_node_name('razorpeter') ok
61
+ [2/6] prepare peer-registry row ok
62
+ [3/6] resolve MESH_GATEWAY_TOKEN ok
63
+ [4/6] POST .../peers/register ok (registered_unratified=true)
64
+ [5/6] verify_subscription_setup() ok
65
+ [6/6] scaffold ~/swarph_state/razorpeter/ ok
66
+
67
+ [manual] handshake template at /tmp/razorpeter-handshake.md
68
+ Edit each section in your own words, then send to your witness peer.
69
+
70
+ # After peer composes + sends handshake, witness ratifies
71
+ $ SWARPH_WITNESS=lab-ovh swarph ratify razorpeter \
72
+ --reason "handshake covers all four invariants in own words"
73
+ [1/6] validate_node_name('razorpeter') ok
74
+ [2/6] verify witness 'lab-ovh' is ratified ok
75
+ [3/6] verify 'razorpeter' is registered_unratified ok
76
+ [4/6] PATCH .../peers/razorpeter ok
77
+ [5/6] verify peer_ratifications audit row ok (id=N reason='...')
78
+ [6/6] invalidate local TTL cache ok
79
+ ```
80
+
81
+ Server-side gating (mesh-gateway PR A): unratified peers can read inbox + send DMs (so the handshake itself works) but `task_claim` returns 403. Witness must itself be ratified — no self-ratification, no unratified-witnesses-ratifying-others. Audit log (`peer_ratifications`) is append-only.
27
82
 
28
83
  ### `swarph chat`
29
84
 
@@ -126,10 +181,11 @@ Pong!
126
181
  | **0** | Scaffold — entry-point + status banner |
127
182
  | **2** (v0.1.0) | One-shot mode: `swarph "hello" --provider gemini` |
128
183
  | **2.5** (v0.2.0) | `swarph import` — Claude JSONL → swarph-native session format |
129
- | **5** (v0.3.0 — this release) | **`swarph chat` interactive REPL** — multi-turn against any of five adapters + slash commands (`/help`, `/clear`, `/system`, `/provider`, `/model`, `/history`, `/cost`, `/quit`) |
184
+ | **5** (v0.3.0) | `swarph chat` interactive REPL — multi-turn against any of five adapters + slash commands |
185
+ | **5.5** (v0.4.0) | `swarph onboard` + `swarph ratify` — six mechanics steps + handshake template + witness flip (PLAN.md §15) |
186
+ | **5.6** (v0.5.0 — this release) | **`swarph daemon`** — foreground inbox drain loop with atomic cursor writes; retires the orphaned-tail-F class (PLAN.md §16) |
187
+ | **5.6b** | REPL drain coroutine + `/inbox`/`/reply` slash commands + `@swarph.on_dm()` handler registration (mesh + cli) |
130
188
  | **3** | `--ask <peer>` mesh-aware one-shot via MeshClient |
131
- | **5.5** | `swarph onboard <peer-name>` + `swarph ratify <peer-name>` (PLAN.md §15) |
132
- | **5.6** | `swarph daemon` foreground drain loop + REPL drain coroutine + `/inbox`, `/reply` (PLAN.md §16) |
133
189
  | **6** | (already done) PyPI publish |
134
190
 
135
191
  ## Why split CLI from substrate
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "swarph-cli"
7
- version = "0.3.0"
8
- description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.3.0 ships the Phase 5 `swarph chat` REPL on top of Phase 2 one-shot + Phase 2.5 import (PLAN.md §13)."
7
+ version = "0.5.0"
8
+ description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.5.0 ships Phase 5.6 `swarph daemon` (foreground inbox drain — retires the orphaned-tail-F class) on top of Phase 2 one-shot + Phase 2.5 import + Phase 5 REPL + Phase 5.5 onboard/ratify (PLAN.md §13 / §16)."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
11
11
  requires-python = ">=3.10"
@@ -16,6 +16,6 @@ The architecture splits CLI from substrate so:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- __version__ = "0.3.0"
19
+ __version__ = "0.5.0"
20
20
 
21
21
  __all__ = ["__version__"]
@@ -0,0 +1,438 @@
1
+ """``swarph daemon`` — Phase 5.6 foreground drain loop per PLAN.md §16.
2
+
3
+ The structural retirement of the orphaned-tail-F class. Replaces the
4
+ 4-layer ``tail -F | grep | Monitor | systemd | cron poll`` stack with
5
+ one foreground process that polls the gateway directly and writes
6
+ the cursor transactionally (write-and-rename, no half-flushed state).
7
+
8
+ Liveness check collapses to::
9
+
10
+ ps aux | grep '[s]warph daemon'
11
+
12
+ — zero output = monitoring is down.
13
+
14
+ Default mode is **surface-only** (DMs printed + logged, never auto-replied).
15
+ ``--auto-act`` flips on the AI-to-AI default per CLAUDE.md DM SEMANTICS,
16
+ routing incoming DMs to handlers registered via ``@swarph.on_dm(...)``.
17
+ v0.5.0 ships the daemon + cursor + signals + backoff; handler registration
18
+ + ``MeshClient.watch()`` event stream + REPL drain coroutine + capability
19
+ advert + heartbeat self-reporting land in v0.5.1+ per PLAN §16.4 / §16.4a /
20
+ §16.4b.
21
+
22
+ Loud-on-down discipline (PLAN §16.5): the daemon never silently exits.
23
+ SIGINT / SIGTERM trigger a clean drain + cursor flush + non-zero shell
24
+ liveness signal; uncaught exceptions land on stderr loudly. ``ps aux``
25
+ is the only thing that needs to be checked.
26
+
27
+ Open question §16.7 #2 resolution: ``--auto-act`` default OFF (lab read +
28
+ drop's standing-auth lane discretion). Daemon-launchers in §15.4 step 6
29
+ include ``--auto-act`` explicitly so AI peers opt in at provisioning time.
30
+
31
+ Open question §16.7 #3 resolution: cursor format stays single-row JSON
32
+ with write-and-rename atomic semantics. If flush fails mid-write the
33
+ rename never happens and the previous cursor stands — no append-only
34
+ log needed.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import argparse
40
+ import asyncio
41
+ import json
42
+ import os
43
+ import signal
44
+ import sys
45
+ import time
46
+ import urllib.error
47
+ import urllib.request
48
+ from contextlib import suppress
49
+ from pathlib import Path
50
+ from typing import Optional
51
+
52
+
53
+ _DEFAULT_POLL_S = 30
54
+ _BACKOFF_EMPTY_THRESHOLD = 5 # consecutive empty polls before backing off
55
+ _BACKOFF_EMPTY_SECONDS = 60
56
+ _BACKOFF_5XX_THRESHOLD_SECONDS = 300 # 5 min of consecutive 5xx
57
+ _BACKOFF_5XX_SECONDS = 300
58
+ _LOUD_DISCONNECT_SECONDS = 600 # emit loud line every minute past this
59
+
60
+
61
+ def _build_parser() -> argparse.ArgumentParser:
62
+ p = argparse.ArgumentParser(
63
+ prog="swarph daemon",
64
+ description=(
65
+ "Phase 5.6 mesh inbox drain daemon per PLAN.md §16. "
66
+ "Foreground process; loud-on-down; transactional cursor."
67
+ ),
68
+ )
69
+ p.add_argument(
70
+ "--state-dir",
71
+ default=None,
72
+ help="state directory containing cursor.json + inbox.log "
73
+ "(default: ~/swarph_state/<self>/).",
74
+ )
75
+ p.add_argument(
76
+ "--self",
77
+ dest="self_name",
78
+ default=None,
79
+ help="canonical name of this peer (default: $SWARPH_SELF or "
80
+ "the directory name of --state-dir).",
81
+ )
82
+ p.add_argument(
83
+ "--gateway",
84
+ default=os.environ.get("MESH_GATEWAY_URL", "http://localhost:8788"),
85
+ help="mesh-gateway base URL.",
86
+ )
87
+ p.add_argument(
88
+ "--token-file",
89
+ default=None,
90
+ help="optional secrets file path (mode 0600 expected).",
91
+ )
92
+ p.add_argument(
93
+ "--poll-seconds",
94
+ type=int,
95
+ default=_DEFAULT_POLL_S,
96
+ help=f"base poll cadence in seconds (default: {_DEFAULT_POLL_S}).",
97
+ )
98
+ p.add_argument(
99
+ "--auto-act",
100
+ action="store_true",
101
+ help="route DMs to registered @swarph.on_dm handlers (v0.5.1+ — "
102
+ "in v0.5.0 this is a documentation flag; surface-only mode runs "
103
+ "regardless).",
104
+ )
105
+ p.add_argument(
106
+ "--once",
107
+ action="store_true",
108
+ help="run a single poll iteration then exit (test mode).",
109
+ )
110
+ return p
111
+
112
+
113
+ def _resolve_self_name(arg: Optional[str], state_dir: Path) -> str:
114
+ if arg:
115
+ return arg
116
+ env = os.environ.get("SWARPH_SELF")
117
+ if env:
118
+ return env
119
+ return state_dir.name
120
+
121
+
122
+ def _resolve_state_dir(arg: Optional[str], self_name_arg: Optional[str]) -> Path:
123
+ if arg:
124
+ return Path(arg).expanduser()
125
+ self_name = self_name_arg or os.environ.get("SWARPH_SELF")
126
+ if self_name:
127
+ return Path.home() / "swarph_state" / self_name
128
+ # Last resort — a self_name is needed to disambiguate; surface error.
129
+ raise SystemExit(
130
+ "swarph daemon: cannot resolve state directory. "
131
+ "Pass --state-dir <path> or set $SWARPH_SELF."
132
+ )
133
+
134
+
135
+ def _resolve_token(token_file_arg: Optional[str]) -> str:
136
+ """Mirror onboard's resolution. env → secrets.toml mode 0600 → prompt."""
137
+ from swarph_cli.commands.onboard import _resolve_token as _onboard_resolve
138
+
139
+ return _onboard_resolve(token_file_arg)
140
+
141
+
142
+ # ---------------------------------------------------------------------------
143
+ # Cursor — single-row JSON with write-and-rename atomic semantics
144
+ # ---------------------------------------------------------------------------
145
+
146
+
147
+ def _read_cursor(path: Path) -> dict:
148
+ if not path.exists():
149
+ return {"last_msg_id": 0, "tasks_snapshot": {}}
150
+ try:
151
+ return json.loads(path.read_text(encoding="utf-8"))
152
+ except json.JSONDecodeError as exc:
153
+ # Loud — corrupted cursor needs operator attention, not silent reset.
154
+ print(
155
+ f"[swarph-daemon] CORRUPTED cursor at {path}: {exc}. "
156
+ f"Refusing to overwrite. Inspect manually.",
157
+ file=sys.stderr,
158
+ flush=True,
159
+ )
160
+ raise
161
+
162
+
163
+ def _write_cursor_atomic(path: Path, cursor: dict) -> None:
164
+ """Write-and-rename: write to a tmp file in the same dir, then atomic
165
+ rename over the target. Failed mid-write leaves the previous cursor
166
+ intact — open question §16.7 #3 resolution."""
167
+ path.parent.mkdir(parents=True, exist_ok=True)
168
+ tmp = path.with_suffix(path.suffix + f".tmp.{os.getpid()}")
169
+ tmp.write_text(json.dumps(cursor, indent=2, sort_keys=True), encoding="utf-8")
170
+ os.replace(tmp, path) # atomic on POSIX + Windows ≥3.3
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # HTTP — stdlib only, no httpx
175
+ # ---------------------------------------------------------------------------
176
+
177
+
178
+ def _http_get(url: str, *, token: str, timeout: float = 10.0) -> tuple[int, dict]:
179
+ req = urllib.request.Request(
180
+ url, headers={"Authorization": f"Bearer {token}"}
181
+ )
182
+ try:
183
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
184
+ return resp.status, json.loads(resp.read().decode("utf-8") or "{}")
185
+ except urllib.error.HTTPError as exc:
186
+ try:
187
+ body = json.loads(exc.read().decode("utf-8") or "{}")
188
+ except Exception:
189
+ body = {"detail": str(exc)}
190
+ return exc.code, body
191
+ except urllib.error.URLError as exc:
192
+ # Network-level failure — gateway down, DNS, etc.
193
+ return 0, {"detail": str(exc)}
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Daemon state + loop
198
+ # ---------------------------------------------------------------------------
199
+
200
+
201
+ class DaemonState:
202
+ """Mutable state held by the drain loop. Surfaced as a class so tests
203
+ can inspect post-run."""
204
+
205
+ def __init__(self, *, self_name: str, state_dir: Path, gateway: str,
206
+ token: str, poll_s: int, auto_act: bool):
207
+ self.self_name = self_name
208
+ self.state_dir = state_dir
209
+ self.gateway = gateway
210
+ self.token = token
211
+ self.poll_s = poll_s
212
+ self.auto_act = auto_act
213
+ self.cursor_path = state_dir / "cursor.json"
214
+ self.inbox_log_path = state_dir / "inbox.log"
215
+ self.cursor = _read_cursor(self.cursor_path)
216
+ self.consecutive_empty = 0
217
+ self.disconnect_since: Optional[float] = None
218
+ self.iterations = 0
219
+ self.dms_seen = 0
220
+ self.shutdown_requested = False
221
+
222
+
223
+ def _log_dm(state: DaemonState, dm: dict) -> None:
224
+ """Both stdout (visible to operator + journald) AND inbox.log (cursor
225
+ audit trail). Inbox.log is append-only structured JSONL."""
226
+ line = (
227
+ f"[{dm.get('created_at', '?')}] "
228
+ f"id={dm['id']} from={dm.get('from_node')} kind={dm.get('kind')} "
229
+ f"→ {dm['content'][:120]!r}"
230
+ )
231
+ print(line, flush=True)
232
+ state.inbox_log_path.parent.mkdir(parents=True, exist_ok=True)
233
+ with state.inbox_log_path.open("a", encoding="utf-8") as f:
234
+ f.write(json.dumps(dm) + "\n")
235
+
236
+
237
+ def _route_to_handler(state: DaemonState, dm: dict) -> None:
238
+ """v0.5.0 stub. v0.5.1+ wires this through @swarph.on_dm() registrations
239
+ on the swarph-mesh side. For now, --auto-act prints a placeholder."""
240
+ if state.auto_act:
241
+ print(
242
+ f" [auto-act] handler dispatch deferred to v0.5.1 "
243
+ f"(@swarph.on_dm + MeshClient.watch); surfacing only.",
244
+ flush=True,
245
+ )
246
+
247
+
248
+ def _select_next_poll_seconds(state: DaemonState) -> int:
249
+ """Backoff per §16.2: empty-poll backoff after 5 consecutive empties,
250
+ 5xx backoff after 5 min of consecutive failures."""
251
+ if state.disconnect_since is not None:
252
+ if (time.time() - state.disconnect_since) > _BACKOFF_5XX_THRESHOLD_SECONDS:
253
+ return _BACKOFF_5XX_SECONDS
254
+ if state.consecutive_empty >= _BACKOFF_EMPTY_THRESHOLD:
255
+ return _BACKOFF_EMPTY_SECONDS
256
+ return state.poll_s
257
+
258
+
259
+ async def _drain_iteration(state: DaemonState) -> None:
260
+ """One poll → handle → cursor-write cycle. Errors logged loud; never
261
+ raises out of the loop."""
262
+ state.iterations += 1
263
+ last_id = state.cursor.get("last_msg_id", 0)
264
+ # Note: gateway query param is `to=` (NOT `to_node=`). The latter is
265
+ # silently ignored — bit the entire session's drain code which had
266
+ # python-side filters masking the issue. Defense-in-depth: also
267
+ # filter from_node != self_name client-side in case any future
268
+ # gateway quirk re-introduces outbound bleed-through.
269
+ url = (
270
+ f"{state.gateway}/messages?to={state.self_name}"
271
+ f"&limit=50"
272
+ )
273
+ status, body = _http_get(url, token=state.token)
274
+
275
+ if status == 0:
276
+ # Network-level failure
277
+ if state.disconnect_since is None:
278
+ state.disconnect_since = time.time()
279
+ elapsed = time.time() - state.disconnect_since
280
+ if elapsed > _LOUD_DISCONNECT_SECONDS:
281
+ print(
282
+ f"[swarph-daemon] LOUD: gateway unreachable for "
283
+ f"{elapsed:.0f}s — {body.get('detail', '?')}",
284
+ file=sys.stderr,
285
+ flush=True,
286
+ )
287
+ return
288
+ if status >= 500:
289
+ if state.disconnect_since is None:
290
+ state.disconnect_since = time.time()
291
+ print(
292
+ f"[swarph-daemon] gateway 5xx {status}: {body.get('detail', '?')}",
293
+ file=sys.stderr,
294
+ flush=True,
295
+ )
296
+ return
297
+ if status >= 400:
298
+ print(
299
+ f"[swarph-daemon] gateway {status}: {body.get('detail', '?')}",
300
+ file=sys.stderr,
301
+ flush=True,
302
+ )
303
+ return
304
+
305
+ # Success — clear disconnect tracking
306
+ state.disconnect_since = None
307
+
308
+ messages = [
309
+ m
310
+ for m in body.get("messages", [])
311
+ if m["id"] > last_id and m.get("from_node") != state.self_name
312
+ ]
313
+ if not messages:
314
+ state.consecutive_empty += 1
315
+ return
316
+
317
+ # Process oldest-first so cursor monotonically advances
318
+ messages.sort(key=lambda m: m["id"])
319
+ state.consecutive_empty = 0
320
+ new_last_id = last_id
321
+ for dm in messages:
322
+ _log_dm(state, dm)
323
+ _route_to_handler(state, dm)
324
+ state.dms_seen += 1
325
+ new_last_id = max(new_last_id, dm["id"])
326
+
327
+ state.cursor["last_msg_id"] = new_last_id
328
+ _write_cursor_atomic(state.cursor_path, state.cursor)
329
+
330
+
331
+ async def _drain_loop(state: DaemonState) -> None:
332
+ """Main loop. Returns on shutdown_requested. Exceptions in
333
+ _drain_iteration are caught + logged + retried; only signal handlers
334
+ set shutdown_requested."""
335
+ print(
336
+ f"[swarph-daemon] starting: self={state.self_name} "
337
+ f"gateway={state.gateway} poll={state.poll_s}s "
338
+ f"state={state.state_dir} auto_act={state.auto_act} "
339
+ f"cursor.last_msg_id={state.cursor.get('last_msg_id', 0)}",
340
+ flush=True,
341
+ )
342
+
343
+ while not state.shutdown_requested:
344
+ try:
345
+ await _drain_iteration(state)
346
+ except Exception as exc: # noqa: BLE001 — loud-on-error per §16.4
347
+ print(
348
+ f"[swarph-daemon] iteration error (continuing): "
349
+ f"{type(exc).__name__}: {exc}",
350
+ file=sys.stderr,
351
+ flush=True,
352
+ )
353
+
354
+ delay = _select_next_poll_seconds(state)
355
+ # Sleep in 1-second chunks so SIGINT/SIGTERM can interrupt promptly
356
+ for _ in range(delay):
357
+ if state.shutdown_requested:
358
+ break
359
+ await asyncio.sleep(1)
360
+
361
+ print(
362
+ f"[swarph-daemon] shutdown: iterations={state.iterations} "
363
+ f"dms_seen={state.dms_seen} cursor.last_msg_id={state.cursor.get('last_msg_id', 0)}",
364
+ flush=True,
365
+ )
366
+
367
+
368
+ def _install_signal_handlers(loop: asyncio.AbstractEventLoop, state: DaemonState) -> None:
369
+ """SIGINT + SIGTERM → set shutdown_requested. The loop drains cleanly
370
+ on the next sleep boundary (≤1s)."""
371
+
372
+ def _handler(signum, frame): # noqa: ARG001
373
+ if not state.shutdown_requested:
374
+ print(
375
+ f"[swarph-daemon] signal {signum} received — draining + flushing cursor",
376
+ flush=True,
377
+ )
378
+ state.shutdown_requested = True
379
+
380
+ # Use the signal module directly rather than loop.add_signal_handler so
381
+ # this works inside test harnesses where the loop's default policy may
382
+ # block signal-handler installation.
383
+ signal.signal(signal.SIGINT, _handler)
384
+ signal.signal(signal.SIGTERM, _handler)
385
+
386
+
387
+ def run_daemon(argv: list[str]) -> int:
388
+ """Entry point invoked by ``swarph_cli.main`` verb dispatch."""
389
+ args = _build_parser().parse_args(argv)
390
+
391
+ # Resolve identity + state path
392
+ self_name = args.self_name or os.environ.get("SWARPH_SELF")
393
+ if args.state_dir:
394
+ state_dir = Path(args.state_dir).expanduser()
395
+ if not self_name:
396
+ self_name = state_dir.name
397
+ elif self_name:
398
+ state_dir = Path.home() / "swarph_state" / self_name
399
+ else:
400
+ print(
401
+ "swarph daemon: cannot resolve identity. Pass --self <name> or "
402
+ "--state-dir <path> or set $SWARPH_SELF.",
403
+ file=sys.stderr,
404
+ flush=True,
405
+ )
406
+ return 2
407
+
408
+ state_dir.mkdir(parents=True, exist_ok=True)
409
+ token = _resolve_token(args.token_file)
410
+ if not token:
411
+ print("swarph daemon: empty MESH_GATEWAY_TOKEN", file=sys.stderr)
412
+ return 2
413
+
414
+ state = DaemonState(
415
+ self_name=self_name,
416
+ state_dir=state_dir,
417
+ gateway=args.gateway,
418
+ token=token,
419
+ poll_s=args.poll_seconds,
420
+ auto_act=args.auto_act,
421
+ )
422
+
423
+ if args.once:
424
+ # Test mode — single iteration, no signal handlers, no loop
425
+ asyncio.run(_drain_iteration(state))
426
+ return 0
427
+
428
+ loop = asyncio.new_event_loop()
429
+ asyncio.set_event_loop(loop)
430
+ _install_signal_handlers(loop, state)
431
+ try:
432
+ loop.run_until_complete(_drain_loop(state))
433
+ finally:
434
+ # Final cursor flush in case shutdown happened mid-iteration
435
+ with suppress(Exception):
436
+ _write_cursor_atomic(state.cursor_path, state.cursor)
437
+ loop.close()
438
+ return 0