swarph-cli 0.7.5__tar.gz → 0.7.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {swarph_cli-0.7.5/src/swarph_cli.egg-info → swarph_cli-0.7.7}/PKG-INFO +1 -1
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/pyproject.toml +1 -1
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/__init__.py +1 -1
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/install_hook.py +7 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/spawn.py +90 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/watchdog.py +131 -4
- {swarph_cli-0.7.5 → swarph_cli-0.7.7/src/swarph_cli.egg-info}/PKG-INFO +1 -1
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_spawn_command.py +105 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/LICENSE +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/README.md +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/setup.cfg +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/caller.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/cell.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/__init__.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/chat.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/daemon.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/hook_output.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/import_session.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/onboard.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/commands/ratify.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/main.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/parsers/__init__.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/parsers/claude.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/systemd/swarph-watchdog.default +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/systemd/swarph-watchdog.service +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli/systemd/swarph-watchdog.timer +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli.egg-info/SOURCES.txt +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli.egg-info/dependency_links.txt +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli.egg-info/entry_points.txt +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli.egg-info/requires.txt +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/src/swarph_cli.egg-info/top_level.txt +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_cell_loader.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_chat_command.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_claude_parser.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_daemon_command.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_hook_output.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_import_command.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_install_hook.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_main.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_onboard_command.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_ratify_command.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_smoke_chat.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_smoke_one_shot.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_smoke_phase_5_5.py +0 -0
- {swarph_cli-0.7.5 → swarph_cli-0.7.7}/tests/test_watchdog.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: swarph-cli
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.7
|
|
4
4
|
Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
|
|
5
5
|
Author: Pierre Samson, Claude Opus
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "swarph-cli"
|
|
7
|
-
version = "0.7.
|
|
7
|
+
version = "0.7.7"
|
|
8
8
|
description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -22,6 +22,13 @@ Idempotent: rerun safe. Detects existing hook entries pointing at
|
|
|
22
22
|
Skip when SWARPH_SPAWN=1 env (set by ``swarph spawn``) — avoids
|
|
23
23
|
double-injection when the spawn path already passed the prompt via
|
|
24
24
|
``--append-system-prompt``.
|
|
25
|
+
|
|
26
|
+
NOTE (2026-05-17): IF this file grows to manage 3+ DISTINCT hook events
|
|
27
|
+
(beyond SessionStart), revisit adopting a hook framework
|
|
28
|
+
(claude-hooks / cchooks / similar). Solo eval 2026-05-17 deferred
|
|
29
|
+
framework adoption because 1-hook surface didn't justify the
|
|
30
|
+
750-LOC dependency. See lab memory ``project_deferred_decisions.md``
|
|
31
|
+
for the full threshold conditions + revisit checklist.
|
|
25
32
|
"""
|
|
26
33
|
|
|
27
34
|
from __future__ import annotations
|
|
@@ -201,6 +201,61 @@ def _resolve_cell(args: argparse.Namespace) -> tuple[Cell, Optional[str]]:
|
|
|
201
201
|
return load_cell(path), requested_role
|
|
202
202
|
|
|
203
203
|
|
|
204
|
+
def _validate_routing(cell: Cell) -> None:
|
|
205
|
+
"""Phase 1B v0 (2026-05-19) — read + validate ``cell.extra.routing``.
|
|
206
|
+
|
|
207
|
+
Phase 1B-primary architecture commits the cell-membrane framing:
|
|
208
|
+
Claude CLI is the membrane for Anthropic-side spawns; non-Anthropic
|
|
209
|
+
cells get a different membrane (currently NOT implemented).
|
|
210
|
+
|
|
211
|
+
cell.yaml ``routing`` field shape (v0):
|
|
212
|
+
::
|
|
213
|
+
routing:
|
|
214
|
+
native: anthropic # only valid value in v0
|
|
215
|
+
|
|
216
|
+
v0 accepts ``routing`` absent (= default anthropic) OR
|
|
217
|
+
``routing.native: anthropic``. Any other value raises CellError
|
|
218
|
+
pointing at Phase 1B v1+ direction.
|
|
219
|
+
|
|
220
|
+
Forward-compat: reads via ``Cell.extra`` (same pattern as
|
|
221
|
+
cursor_path / tmux_session shipped in v0.7.2 before graduating to
|
|
222
|
+
typed fields in swarph-shared 0.4.x). When swarph-shared graduates
|
|
223
|
+
``routing`` to a typed Cell field, this helper swaps to typed
|
|
224
|
+
access with no API surface change for cell.yaml authors.
|
|
225
|
+
|
|
226
|
+
See research/swarph_cli/CELL_MEMBRANE_PHASE_0_RFC.md §5 (Phase 1B)
|
|
227
|
+
for the architectural context. See lab memory
|
|
228
|
+
``project_next_up.md`` for the commander 2026-05-19 decision that
|
|
229
|
+
Phase 1B is primary + path (c) Anthropic-only v0.
|
|
230
|
+
"""
|
|
231
|
+
extra = cell.extra or {}
|
|
232
|
+
routing = extra.get("routing")
|
|
233
|
+
if routing is None:
|
|
234
|
+
return # No routing field → default Anthropic, allow
|
|
235
|
+
if not isinstance(routing, dict):
|
|
236
|
+
raise CellError(
|
|
237
|
+
f"swarph spawn: cell.yaml `routing` must be a mapping, "
|
|
238
|
+
f"got {type(routing).__name__}. See "
|
|
239
|
+
f"research/swarph_cli/CELL_MEMBRANE_PHASE_0_RFC.md for the "
|
|
240
|
+
f"valid v0 schema."
|
|
241
|
+
)
|
|
242
|
+
native = routing.get("native", "anthropic")
|
|
243
|
+
if native == "anthropic":
|
|
244
|
+
return # Explicit Anthropic OR default → allow
|
|
245
|
+
# Any other native value is a Phase 1B v1+ feature not yet built
|
|
246
|
+
raise CellError(
|
|
247
|
+
f"swarph spawn: cell.yaml `routing.native: {native!r}` is not "
|
|
248
|
+
f"supported in v0 of Phase 1B (Anthropic-only). Non-Anthropic "
|
|
249
|
+
f"routing (e.g. routing.native: openrouter) is Phase 1B v1+ "
|
|
250
|
+
f"scope, deferred per commander 2026-05-19 until a concrete "
|
|
251
|
+
f"non-Anthropic-cell use case emerges. For now, remove the "
|
|
252
|
+
f"`routing.native` field OR set it to 'anthropic' to use the "
|
|
253
|
+
f"existing Claude CLI spawn path. See "
|
|
254
|
+
f"research/swarph_cli/CELL_MEMBRANE_PHASE_0_RFC.md §5 for the "
|
|
255
|
+
f"architectural direction."
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
204
259
|
def _session_state_exists(session_id: str) -> bool:
|
|
205
260
|
"""True if Claude Code already has on-disk session state for this UUID.
|
|
206
261
|
|
|
@@ -346,6 +401,15 @@ def run_spawn(argv: Optional[list[str]] = None) -> int:
|
|
|
346
401
|
print(f"swarph spawn: {exc}", file=sys.stderr)
|
|
347
402
|
return 1
|
|
348
403
|
|
|
404
|
+
# Phase 1B v0 (2026-05-19): validate cell.yaml routing field.
|
|
405
|
+
# In v0 only `routing.native: anthropic` (or absent) is accepted.
|
|
406
|
+
# Future non-Anthropic dispatch is Phase 1B v1+ scope.
|
|
407
|
+
try:
|
|
408
|
+
_validate_routing(cell)
|
|
409
|
+
except CellError as exc:
|
|
410
|
+
print(f"swarph spawn: {exc}", file=sys.stderr)
|
|
411
|
+
return 1
|
|
412
|
+
|
|
349
413
|
# When user typed a slot-role (e.g. `swarph spawn drop-on-meta-edge-2`)
|
|
350
414
|
# the cell.yaml resolved to the BASE file (drop-on-meta-edge.yaml) so
|
|
351
415
|
# cell.role = "drop-on-meta-edge". But the operator wants slot 2's
|
|
@@ -418,6 +482,32 @@ def run_spawn(argv: Optional[list[str]] = None) -> int:
|
|
|
418
482
|
)
|
|
419
483
|
return 127
|
|
420
484
|
|
|
485
|
+
# Windows-platform known-issues banner. Claude Code's TUI (Ink-based)
|
|
486
|
+
# has documented input/rendering bugs on Windows native consoles
|
|
487
|
+
# (conhost.exe in particular). Specific symptom commander hit
|
|
488
|
+
# 2026-05-17 on workstation-lc: pressing Enter inserts literal 'm'
|
|
489
|
+
# character instead of submitting. See docs/WINDOWS_KNOWN_ISSUES.md
|
|
490
|
+
# for the full hypothesis chain + workarounds (Windows Terminal vs
|
|
491
|
+
# conhost, WSL2 fallback, TERM env injection).
|
|
492
|
+
#
|
|
493
|
+
# Banner is suppressed by --no-banner OR when the operator has
|
|
494
|
+
# already acknowledged via SWARPH_WIN_ACK=1 in env (set once after
|
|
495
|
+
# reading the doc).
|
|
496
|
+
if (
|
|
497
|
+
sys.platform == "win32"
|
|
498
|
+
and not args.no_banner
|
|
499
|
+
and not os.environ.get("SWARPH_WIN_ACK")
|
|
500
|
+
):
|
|
501
|
+
print(
|
|
502
|
+
"swarph spawn: WARNING — Windows shell detected. Claude Code's "
|
|
503
|
+
"TUI has documented input/rendering issues on Windows native "
|
|
504
|
+
"consoles (conhost.exe). Known symptom: Enter inserts literal "
|
|
505
|
+
"'m' character. See docs/WINDOWS_KNOWN_ISSUES.md for "
|
|
506
|
+
"workarounds (use Windows Terminal not conhost, or WSL2). "
|
|
507
|
+
"Set SWARPH_WIN_ACK=1 in env to suppress this warning.",
|
|
508
|
+
file=sys.stderr,
|
|
509
|
+
)
|
|
510
|
+
|
|
421
511
|
try:
|
|
422
512
|
os.chdir(cell.cwd)
|
|
423
513
|
except OSError as exc:
|
|
@@ -71,7 +71,9 @@ import subprocess
|
|
|
71
71
|
import sys
|
|
72
72
|
import time
|
|
73
73
|
import urllib.error
|
|
74
|
+
import urllib.parse
|
|
74
75
|
import urllib.request
|
|
76
|
+
from datetime import datetime, timedelta, timezone
|
|
75
77
|
from pathlib import Path
|
|
76
78
|
from typing import Optional
|
|
77
79
|
|
|
@@ -85,12 +87,23 @@ _DEFAULT_GATEWAY_URL = "http://localhost:8788"
|
|
|
85
87
|
# is comfortably above legitimate-pause noise + comfortably below the
|
|
86
88
|
# 30min cursor-staleness threshold, so the two gates compose cleanly.
|
|
87
89
|
_DEFAULT_PANE_ACTIVITY_THRESHOLD_SEC = 600
|
|
90
|
+
# Phase 4 (v0.7.6) — peer-health-event poll defaults. The recovery
|
|
91
|
+
# event we care about is `usage_limit_reset` (throttle cleared; session
|
|
92
|
+
# may be sitting idle unaware of queued DMs). 600s window catches a
|
|
93
|
+
# reset that fired up to 10min before this cron tick. 120s recovery
|
|
94
|
+
# threshold gives the session a brief grace period to notice the reset
|
|
95
|
+
# itself before we send-keys at it.
|
|
96
|
+
_DEFAULT_PEER_HEALTH_WINDOW_SEC = 600
|
|
97
|
+
_DEFAULT_PEER_HEALTH_RECOVERY_THRESHOLD_SEC = 120
|
|
98
|
+
_RECOVERY_EVENT_TYPES = ("usage_limit_reset",)
|
|
88
99
|
|
|
89
100
|
_USAGE = """\
|
|
90
101
|
Usage:
|
|
91
102
|
swarph watchdog --check [--cell ROLE] [--cursor PATH] [--threshold SEC]
|
|
92
103
|
[--gateway URL] [--tmux-session NAME]
|
|
93
104
|
[--peer NAME] [--no-respawn]
|
|
105
|
+
[--peer-health-poll] [--peer-health-window-sec SEC]
|
|
106
|
+
[--peer-health-recovery-threshold SEC]
|
|
94
107
|
[--log PATH] [--verbose]
|
|
95
108
|
swarph watchdog --install-service [--cell ROLE] [--dry-run]
|
|
96
109
|
|
|
@@ -127,6 +140,18 @@ Flags:
|
|
|
127
140
|
--tmux-session NAME tmux session name; default = cell role
|
|
128
141
|
--peer NAME mesh peer name for unread-DM query; default = cell name
|
|
129
142
|
--no-respawn A1 only; don't escalate to A2 (dry-run mode)
|
|
143
|
+
--peer-health-poll Phase 4: also query /peer-health-events.
|
|
144
|
+
On recent usage_limit_reset event, treat
|
|
145
|
+
sessions as wake-candidates even before
|
|
146
|
+
the 30min cursor-staleness threshold.
|
|
147
|
+
Requires MESH_GATEWAY_TOKEN in env.
|
|
148
|
+
--peer-health-window-sec SEC how far back to look for recovery
|
|
149
|
+
events; default 600 (10 min)
|
|
150
|
+
--peer-health-recovery-threshold SEC min cursor staleness before a recovery
|
|
151
|
+
event promotes the session to wake-
|
|
152
|
+
candidate; default 120 (2 min). Avoids
|
|
153
|
+
poking a session that JUST got reset
|
|
154
|
+
and is already self-recovering.
|
|
130
155
|
--log PATH append diagnostic log; default $XDG_STATE_HOME/swarph/watchdog.log
|
|
131
156
|
--verbose also write diagnostics to stderr
|
|
132
157
|
|
|
@@ -282,6 +307,57 @@ def _gateway_unread_count(gateway: str, peer: str, token: Optional[str]) -> Opti
|
|
|
282
307
|
return None
|
|
283
308
|
|
|
284
309
|
|
|
310
|
+
def _gateway_recent_recovery_event(
|
|
311
|
+
gateway: str,
|
|
312
|
+
peer: str,
|
|
313
|
+
window_sec: int,
|
|
314
|
+
token: Optional[str],
|
|
315
|
+
) -> Optional[dict]:
|
|
316
|
+
"""Phase 4 (v0.7.6) — query /peer-health-events for a recent recovery event.
|
|
317
|
+
|
|
318
|
+
Returns the most recent event whose ``event_type`` is in
|
|
319
|
+
``_RECOVERY_EVENT_TYPES`` (currently just ``usage_limit_reset``) for
|
|
320
|
+
this peer within the last ``window_sec`` seconds. Returns None if no
|
|
321
|
+
such event exists OR if the query fails (treat absence + error as
|
|
322
|
+
"no override"; the regular cursor-staleness path still applies).
|
|
323
|
+
|
|
324
|
+
Why this matters: the lab + drop both hit ``usage_limit_reset`` from
|
|
325
|
+
Claude's quota system — the throttle clears, but the session has no
|
|
326
|
+
autonomous mechanism to notice. DMs queued during the throttle sit
|
|
327
|
+
unread until commander manually chimes the session, OR until the
|
|
328
|
+
30min cursor-staleness threshold trips A1. Phase 4 closes that gap
|
|
329
|
+
by lowering the threshold to ``--peer-health-recovery-threshold``
|
|
330
|
+
(default 2min) once the gateway sees the reset event.
|
|
331
|
+
|
|
332
|
+
Detection ≠ recovery distinction: the gateway already CAPTURES these
|
|
333
|
+
events (claude_session_event_logger.py + POST /peer-health-events).
|
|
334
|
+
What was missing was the wake-up mechanism — this function plus the
|
|
335
|
+
fall-through in run_check is the watchdog half of the loop.
|
|
336
|
+
"""
|
|
337
|
+
since_dt = datetime.now(timezone.utc) - timedelta(seconds=window_sec)
|
|
338
|
+
since_iso = since_dt.isoformat()
|
|
339
|
+
query = urllib.parse.urlencode(
|
|
340
|
+
{"peer": peer, "since": since_iso, "limit": 50},
|
|
341
|
+
)
|
|
342
|
+
url = f"{gateway.rstrip('/')}/peer-health-events?{query}"
|
|
343
|
+
req = urllib.request.Request(url)
|
|
344
|
+
if token:
|
|
345
|
+
req.add_header("Authorization", f"Bearer {token}")
|
|
346
|
+
try:
|
|
347
|
+
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
348
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
349
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError, json.JSONDecodeError):
|
|
350
|
+
return None
|
|
351
|
+
events = data.get("events") if isinstance(data, dict) else None
|
|
352
|
+
if not isinstance(events, list):
|
|
353
|
+
return None
|
|
354
|
+
# Server sorts by time DESC, so the first match is the most recent.
|
|
355
|
+
for ev in events:
|
|
356
|
+
if isinstance(ev, dict) and ev.get("event_type") in _RECOVERY_EVENT_TYPES:
|
|
357
|
+
return ev
|
|
358
|
+
return None
|
|
359
|
+
|
|
360
|
+
|
|
285
361
|
def _process_alive(tmux_session: str) -> bool:
|
|
286
362
|
"""Detect if a claude process is running inside the tmux session.
|
|
287
363
|
|
|
@@ -515,10 +591,41 @@ def run_check(args: argparse.Namespace) -> int:
|
|
|
515
591
|
diag["cursor_age_sec"] = cursor_age
|
|
516
592
|
|
|
517
593
|
if cursor_age <= threshold:
|
|
518
|
-
#
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
594
|
+
# Phase 4 (v0.7.6) — peer-health-event override. If the gateway
|
|
595
|
+
# observed a recent recovery event (usage_limit_reset) for this
|
|
596
|
+
# peer AND the cursor is at least somewhat stale, fall through
|
|
597
|
+
# to the A1 path so an idle-after-reset session gets nudged.
|
|
598
|
+
# When --peer-health-poll is OFF, behavior is identical to v0.7.5.
|
|
599
|
+
if args.peer_health_poll:
|
|
600
|
+
recovery_event = _gateway_recent_recovery_event(
|
|
601
|
+
gateway, peer, args.peer_health_window_sec, token,
|
|
602
|
+
)
|
|
603
|
+
diag["peer_health_poll"] = True
|
|
604
|
+
diag["recovery_event_seen"] = bool(recovery_event)
|
|
605
|
+
if recovery_event:
|
|
606
|
+
diag["recovery_event_type"] = recovery_event.get("event_type")
|
|
607
|
+
diag["recovery_event_time"] = recovery_event.get("time")
|
|
608
|
+
if recovery_event and cursor_age > args.peer_health_recovery_threshold:
|
|
609
|
+
# Promote to wake-candidate. Don't return — fall through
|
|
610
|
+
# below to the existing process_alive / unread / F1-F3
|
|
611
|
+
# gates, which still get a vote. This is a threshold
|
|
612
|
+
# override, not a gate bypass.
|
|
613
|
+
diag["phase4_override"] = "fall_through_to_a1"
|
|
614
|
+
else:
|
|
615
|
+
# Either no recovery event, OR cursor is fresh enough
|
|
616
|
+
# that the session is likely self-recovering. No action.
|
|
617
|
+
diag["decision"] = (
|
|
618
|
+
"healthy_cursor_fresh_recovery_too_recent"
|
|
619
|
+
if recovery_event
|
|
620
|
+
else "healthy_cursor_fresh"
|
|
621
|
+
)
|
|
622
|
+
_log_event(log_path, "noop", diag, verbose)
|
|
623
|
+
return 0
|
|
624
|
+
else:
|
|
625
|
+
# Cursor recent — Claude has been active. No action.
|
|
626
|
+
diag["decision"] = "healthy_cursor_fresh"
|
|
627
|
+
_log_event(log_path, "noop", diag, verbose)
|
|
628
|
+
return 0
|
|
522
629
|
|
|
523
630
|
# FALLBACK signal: pgrep claude (per mother #1021 AND-gate)
|
|
524
631
|
process_alive = _process_alive(tmux_session)
|
|
@@ -799,6 +906,26 @@ def run_watchdog(argv: Optional[list[str]] = None) -> int:
|
|
|
799
906
|
p.add_argument("--tmux-session", default=None)
|
|
800
907
|
p.add_argument("--peer", default=None)
|
|
801
908
|
p.add_argument("--no-respawn", action="store_true")
|
|
909
|
+
p.add_argument(
|
|
910
|
+
"--peer-health-poll", action="store_true",
|
|
911
|
+
help="Phase 4 (v0.7.6): also query mesh-gateway /peer-health-events. "
|
|
912
|
+
"On recent usage_limit_reset event, treat sessions as wake-"
|
|
913
|
+
"candidates even before the 30min cursor-staleness threshold. "
|
|
914
|
+
"Requires MESH_GATEWAY_TOKEN in env. Default OFF (opt-in).",
|
|
915
|
+
)
|
|
916
|
+
p.add_argument(
|
|
917
|
+
"--peer-health-window-sec",
|
|
918
|
+
type=int,
|
|
919
|
+
default=_DEFAULT_PEER_HEALTH_WINDOW_SEC,
|
|
920
|
+
help="Phase 4: window for recovery-event lookup; default 600 (10 min).",
|
|
921
|
+
)
|
|
922
|
+
p.add_argument(
|
|
923
|
+
"--peer-health-recovery-threshold",
|
|
924
|
+
type=int,
|
|
925
|
+
default=_DEFAULT_PEER_HEALTH_RECOVERY_THRESHOLD_SEC,
|
|
926
|
+
help="Phase 4: min cursor staleness for recovery event to promote "
|
|
927
|
+
"session to wake-candidate; default 120 (2 min).",
|
|
928
|
+
)
|
|
802
929
|
p.add_argument("--log", default=None)
|
|
803
930
|
p.add_argument("--verbose", action="store_true")
|
|
804
931
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: swarph-cli
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.7
|
|
4
4
|
Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
|
|
5
5
|
Author: Pierre Samson, Claude Opus
|
|
6
6
|
License: MIT
|
|
@@ -428,3 +428,108 @@ def test_build_claude_argv_uses_resume_when_state_exists(fake_cell_yaml, tmp_pat
|
|
|
428
428
|
assert "--session-id" not in argv
|
|
429
429
|
# UUID still passed (just as --resume's value not --session-id's)
|
|
430
430
|
assert uuid in argv
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ---------------------------------------------------------------------------
|
|
434
|
+
# Phase 1B v0 — cell.yaml routing field (2026-05-19)
|
|
435
|
+
# ---------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def test_validate_routing_absent_allows(fake_cell_yaml):
|
|
439
|
+
"""No `routing` field → default Anthropic, _validate_routing returns silently."""
|
|
440
|
+
from swarph_cli.commands.spawn import _validate_routing
|
|
441
|
+
cell = load_cell(fake_cell_yaml)
|
|
442
|
+
# fake_cell_yaml has no routing field → should pass
|
|
443
|
+
_validate_routing(cell) # no exception = pass
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def test_validate_routing_explicit_anthropic_allows(tmp_path):
|
|
447
|
+
"""`routing.native: anthropic` → allowed (explicit form)."""
|
|
448
|
+
from swarph_cli.commands.spawn import _validate_routing
|
|
449
|
+
payload = {
|
|
450
|
+
"schema_version": SCHEMA_VERSION_V1,
|
|
451
|
+
"name": "lab-ovh",
|
|
452
|
+
"role": "lab-test",
|
|
453
|
+
"cwd": str(tmp_path),
|
|
454
|
+
"provider": "claude",
|
|
455
|
+
"routing": {"native": "anthropic"},
|
|
456
|
+
}
|
|
457
|
+
p = tmp_path / "cell.yaml"
|
|
458
|
+
p.write_text(yaml.safe_dump(payload), encoding="utf-8")
|
|
459
|
+
cell = load_cell(p)
|
|
460
|
+
_validate_routing(cell) # no exception = pass
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def test_validate_routing_non_anthropic_rejects(tmp_path):
|
|
464
|
+
"""`routing.native: openrouter` → rejected with v0 + v1 direction message."""
|
|
465
|
+
from swarph_cli.commands.spawn import _validate_routing
|
|
466
|
+
from swarph_cli.cell import CellError
|
|
467
|
+
payload = {
|
|
468
|
+
"schema_version": SCHEMA_VERSION_V1,
|
|
469
|
+
"name": "lab-ovh",
|
|
470
|
+
"role": "lab-test",
|
|
471
|
+
"cwd": str(tmp_path),
|
|
472
|
+
"provider": "claude",
|
|
473
|
+
"routing": {"native": "openrouter"},
|
|
474
|
+
}
|
|
475
|
+
p = tmp_path / "cell.yaml"
|
|
476
|
+
p.write_text(yaml.safe_dump(payload), encoding="utf-8")
|
|
477
|
+
cell = load_cell(p)
|
|
478
|
+
with pytest.raises(CellError) as exc_info:
|
|
479
|
+
_validate_routing(cell)
|
|
480
|
+
err = str(exc_info.value)
|
|
481
|
+
assert "openrouter" in err
|
|
482
|
+
assert "v0" in err
|
|
483
|
+
assert "Phase 1B" in err
|
|
484
|
+
assert "anthropic" in err # should point at the only valid value
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def test_validate_routing_non_dict_rejects(tmp_path):
|
|
488
|
+
"""`routing: "anthropic"` (string instead of dict) → schema error."""
|
|
489
|
+
from swarph_cli.commands.spawn import _validate_routing
|
|
490
|
+
from swarph_cli.cell import CellError
|
|
491
|
+
payload = {
|
|
492
|
+
"schema_version": SCHEMA_VERSION_V1,
|
|
493
|
+
"name": "lab-ovh",
|
|
494
|
+
"role": "lab-test",
|
|
495
|
+
"cwd": str(tmp_path),
|
|
496
|
+
"provider": "claude",
|
|
497
|
+
"routing": "anthropic", # WRONG — should be dict
|
|
498
|
+
}
|
|
499
|
+
p = tmp_path / "cell.yaml"
|
|
500
|
+
p.write_text(yaml.safe_dump(payload), encoding="utf-8")
|
|
501
|
+
cell = load_cell(p)
|
|
502
|
+
with pytest.raises(CellError) as exc_info:
|
|
503
|
+
_validate_routing(cell)
|
|
504
|
+
assert "mapping" in str(exc_info.value)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def test_validate_routing_omitted_native_allows(tmp_path):
|
|
508
|
+
"""`routing: {}` (empty dict, no native key) → defaults to anthropic, allows."""
|
|
509
|
+
from swarph_cli.commands.spawn import _validate_routing
|
|
510
|
+
payload = {
|
|
511
|
+
"schema_version": SCHEMA_VERSION_V1,
|
|
512
|
+
"name": "lab-ovh",
|
|
513
|
+
"role": "lab-test",
|
|
514
|
+
"cwd": str(tmp_path),
|
|
515
|
+
"provider": "claude",
|
|
516
|
+
"routing": {},
|
|
517
|
+
}
|
|
518
|
+
p = tmp_path / "cell.yaml"
|
|
519
|
+
p.write_text(yaml.safe_dump(payload), encoding="utf-8")
|
|
520
|
+
cell = load_cell(p)
|
|
521
|
+
_validate_routing(cell) # default anthropic = allowed
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def test_run_spawn_rejects_non_anthropic_routing_in_dry_run(
|
|
525
|
+
fake_cell_yaml, isolated_xdg, capsys
|
|
526
|
+
):
|
|
527
|
+
"""End-to-end: `swarph spawn --dry-run` with non-anthropic routing → exit 1 + error message."""
|
|
528
|
+
payload = yaml.safe_load(fake_cell_yaml.read_text())
|
|
529
|
+
payload["routing"] = {"native": "gemini"}
|
|
530
|
+
fake_cell_yaml.write_text(yaml.safe_dump(payload))
|
|
531
|
+
rc = run_spawn(["--dry-run", str(fake_cell_yaml)])
|
|
532
|
+
assert rc == 1
|
|
533
|
+
captured = capsys.readouterr()
|
|
534
|
+
assert "gemini" in captured.err
|
|
535
|
+
assert "Phase 1B" in captured.err
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|