swarph-cli 0.7.5__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {swarph_cli-0.7.5/src/swarph_cli.egg-info → swarph_cli-0.7.6}/PKG-INFO +1 -1
  2. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/pyproject.toml +1 -1
  3. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/__init__.py +1 -1
  4. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/watchdog.py +131 -4
  5. {swarph_cli-0.7.5 → swarph_cli-0.7.6/src/swarph_cli.egg-info}/PKG-INFO +1 -1
  6. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/LICENSE +0 -0
  7. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/README.md +0 -0
  8. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/setup.cfg +0 -0
  9. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/caller.py +0 -0
  10. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/cell.py +0 -0
  11. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/__init__.py +0 -0
  12. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/chat.py +0 -0
  13. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/daemon.py +0 -0
  14. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/hook_output.py +0 -0
  15. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/import_session.py +0 -0
  16. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/install_hook.py +0 -0
  17. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/onboard.py +0 -0
  18. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/ratify.py +0 -0
  19. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/commands/spawn.py +0 -0
  20. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/main.py +0 -0
  21. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/parsers/__init__.py +0 -0
  22. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/parsers/claude.py +0 -0
  23. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/systemd/swarph-watchdog.default +0 -0
  24. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/systemd/swarph-watchdog.service +0 -0
  25. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli/systemd/swarph-watchdog.timer +0 -0
  26. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli.egg-info/SOURCES.txt +0 -0
  27. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli.egg-info/dependency_links.txt +0 -0
  28. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli.egg-info/entry_points.txt +0 -0
  29. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli.egg-info/requires.txt +0 -0
  30. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/src/swarph_cli.egg-info/top_level.txt +0 -0
  31. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_cell_loader.py +0 -0
  32. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_chat_command.py +0 -0
  33. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_claude_parser.py +0 -0
  34. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_daemon_command.py +0 -0
  35. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_hook_output.py +0 -0
  36. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_import_command.py +0 -0
  37. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_install_hook.py +0 -0
  38. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_main.py +0 -0
  39. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_onboard_command.py +0 -0
  40. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_ratify_command.py +0 -0
  41. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_smoke_chat.py +0 -0
  42. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_smoke_one_shot.py +0 -0
  43. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_smoke_phase_5_5.py +0 -0
  44. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_spawn_command.py +0 -0
  45. {swarph_cli-0.7.5 → swarph_cli-0.7.6}/tests/test_watchdog.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarph-cli
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
5
5
  Author: Pierre Samson, Claude Opus
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "swarph-cli"
7
- version = "0.7.5"
7
+ version = "0.7.6"
8
8
  description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED)."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -16,6 +16,6 @@ The architecture splits CLI from substrate so:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- __version__ = "0.7.5"
19
+ __version__ = "0.7.6"
20
20
 
21
21
  __all__ = ["__version__"]
@@ -71,7 +71,9 @@ import subprocess
71
71
  import sys
72
72
  import time
73
73
  import urllib.error
74
+ import urllib.parse
74
75
  import urllib.request
76
+ from datetime import datetime, timedelta, timezone
75
77
  from pathlib import Path
76
78
  from typing import Optional
77
79
 
@@ -85,12 +87,23 @@ _DEFAULT_GATEWAY_URL = "http://localhost:8788"
85
87
  # is comfortably above legitimate-pause noise + comfortably below the
86
88
  # 30min cursor-staleness threshold, so the two gates compose cleanly.
87
89
  _DEFAULT_PANE_ACTIVITY_THRESHOLD_SEC = 600
90
+ # Phase 4 (v0.7.6) — peer-health-event poll defaults. The recovery
91
+ # event we care about is `usage_limit_reset` (throttle cleared; session
92
+ # may be sitting idle unaware of queued DMs). 600s window catches a
93
+ # reset that fired up to 10min before this cron tick. 120s recovery
94
+ # threshold gives the session a brief grace period to notice the reset
95
+ # itself before we send-keys at it.
96
+ _DEFAULT_PEER_HEALTH_WINDOW_SEC = 600
97
+ _DEFAULT_PEER_HEALTH_RECOVERY_THRESHOLD_SEC = 120
98
+ _RECOVERY_EVENT_TYPES = ("usage_limit_reset",)
88
99
 
89
100
  _USAGE = """\
90
101
  Usage:
91
102
  swarph watchdog --check [--cell ROLE] [--cursor PATH] [--threshold SEC]
92
103
  [--gateway URL] [--tmux-session NAME]
93
104
  [--peer NAME] [--no-respawn]
105
+ [--peer-health-poll] [--peer-health-window-sec SEC]
106
+ [--peer-health-recovery-threshold SEC]
94
107
  [--log PATH] [--verbose]
95
108
  swarph watchdog --install-service [--cell ROLE] [--dry-run]
96
109
 
@@ -127,6 +140,18 @@ Flags:
127
140
  --tmux-session NAME tmux session name; default = cell role
128
141
  --peer NAME mesh peer name for unread-DM query; default = cell name
129
142
  --no-respawn A1 only; don't escalate to A2 (dry-run mode)
143
+ --peer-health-poll Phase 4: also query /peer-health-events.
144
+ On recent usage_limit_reset event, treat
145
+ sessions as wake-candidates even before
146
+ the 30min cursor-staleness threshold.
147
+ Requires MESH_GATEWAY_TOKEN in env.
148
+ --peer-health-window-sec SEC how far back to look for recovery
149
+ events; default 600 (10 min)
150
+ --peer-health-recovery-threshold SEC min cursor staleness before a recovery
151
+ event promotes the session to wake-
152
+ candidate; default 120 (2 min). Avoids
153
+ poking a session that JUST got reset
154
+ and is already self-recovering.
130
155
  --log PATH append diagnostic log; default $XDG_STATE_HOME/swarph/watchdog.log
131
156
  --verbose also write diagnostics to stderr
132
157
 
@@ -282,6 +307,57 @@ def _gateway_unread_count(gateway: str, peer: str, token: Optional[str]) -> Opti
282
307
  return None
283
308
 
284
309
 
310
+ def _gateway_recent_recovery_event(
311
+ gateway: str,
312
+ peer: str,
313
+ window_sec: int,
314
+ token: Optional[str],
315
+ ) -> Optional[dict]:
316
+ """Phase 4 (v0.7.6) — query /peer-health-events for a recent recovery event.
317
+
318
+ Returns the most recent event whose ``event_type`` is in
319
+ ``_RECOVERY_EVENT_TYPES`` (currently just ``usage_limit_reset``) for
320
+ this peer within the last ``window_sec`` seconds. Returns None if no
321
+ such event exists OR if the query fails (treat absence + error as
322
+ "no override"; the regular cursor-staleness path still applies).
323
+
324
+ Why this matters: the lab + drop both hit ``usage_limit_reset`` from
325
+ Claude's quota system — the throttle clears, but the session has no
326
+ autonomous mechanism to notice. DMs queued during the throttle sit
327
+ unread until commander manually chimes the session, OR until the
328
+ 30min cursor-staleness threshold trips A1. Phase 4 closes that gap
329
+ by lowering the threshold to ``--peer-health-recovery-threshold``
330
+ (default 2min) once the gateway sees the reset event.
331
+
332
+ Detection ≠ recovery distinction: the gateway already CAPTURES these
333
+ events (claude_session_event_logger.py + POST /peer-health-events).
334
+ What was missing was the wake-up mechanism — this function plus the
335
+ fall-through in run_check is the watchdog half of the loop.
336
+ """
337
+ since_dt = datetime.now(timezone.utc) - timedelta(seconds=window_sec)
338
+ since_iso = since_dt.isoformat()
339
+ query = urllib.parse.urlencode(
340
+ {"peer": peer, "since": since_iso, "limit": 50},
341
+ )
342
+ url = f"{gateway.rstrip('/')}/peer-health-events?{query}"
343
+ req = urllib.request.Request(url)
344
+ if token:
345
+ req.add_header("Authorization", f"Bearer {token}")
346
+ try:
347
+ with urllib.request.urlopen(req, timeout=5) as resp:
348
+ data = json.loads(resp.read().decode("utf-8"))
349
+ except (urllib.error.URLError, urllib.error.HTTPError, OSError, json.JSONDecodeError):
350
+ return None
351
+ events = data.get("events") if isinstance(data, dict) else None
352
+ if not isinstance(events, list):
353
+ return None
354
+ # Server sorts by time DESC, so the first match is the most recent.
355
+ for ev in events:
356
+ if isinstance(ev, dict) and ev.get("event_type") in _RECOVERY_EVENT_TYPES:
357
+ return ev
358
+ return None
359
+
360
+
285
361
  def _process_alive(tmux_session: str) -> bool:
286
362
  """Detect if a claude process is running inside the tmux session.
287
363
 
@@ -515,10 +591,41 @@ def run_check(args: argparse.Namespace) -> int:
515
591
  diag["cursor_age_sec"] = cursor_age
516
592
 
517
593
  if cursor_age <= threshold:
518
- # Cursor recentClaude has been active. No action.
519
- diag["decision"] = "healthy_cursor_fresh"
520
- _log_event(log_path, "noop", diag, verbose)
521
- return 0
594
+ # Phase 4 (v0.7.6) peer-health-event override. If the gateway
595
+ # observed a recent recovery event (usage_limit_reset) for this
596
+ # peer AND the cursor is at least somewhat stale, fall through
597
+ # to the A1 path so an idle-after-reset session gets nudged.
598
+ # When --peer-health-poll is OFF, behavior is identical to v0.7.5.
599
+ if args.peer_health_poll:
600
+ recovery_event = _gateway_recent_recovery_event(
601
+ gateway, peer, args.peer_health_window_sec, token,
602
+ )
603
+ diag["peer_health_poll"] = True
604
+ diag["recovery_event_seen"] = bool(recovery_event)
605
+ if recovery_event:
606
+ diag["recovery_event_type"] = recovery_event.get("event_type")
607
+ diag["recovery_event_time"] = recovery_event.get("time")
608
+ if recovery_event and cursor_age > args.peer_health_recovery_threshold:
609
+ # Promote to wake-candidate. Don't return — fall through
610
+ # below to the existing process_alive / unread / F1-F3
611
+ # gates, which still get a vote. This is a threshold
612
+ # override, not a gate bypass.
613
+ diag["phase4_override"] = "fall_through_to_a1"
614
+ else:
615
+ # Either no recovery event, OR cursor is fresh enough
616
+ # that the session is likely self-recovering. No action.
617
+ diag["decision"] = (
618
+ "healthy_cursor_fresh_recovery_too_recent"
619
+ if recovery_event
620
+ else "healthy_cursor_fresh"
621
+ )
622
+ _log_event(log_path, "noop", diag, verbose)
623
+ return 0
624
+ else:
625
+ # Cursor recent — Claude has been active. No action.
626
+ diag["decision"] = "healthy_cursor_fresh"
627
+ _log_event(log_path, "noop", diag, verbose)
628
+ return 0
522
629
 
523
630
  # FALLBACK signal: pgrep claude (per mother #1021 AND-gate)
524
631
  process_alive = _process_alive(tmux_session)
@@ -799,6 +906,26 @@ def run_watchdog(argv: Optional[list[str]] = None) -> int:
799
906
  p.add_argument("--tmux-session", default=None)
800
907
  p.add_argument("--peer", default=None)
801
908
  p.add_argument("--no-respawn", action="store_true")
909
+ p.add_argument(
910
+ "--peer-health-poll", action="store_true",
911
+ help="Phase 4 (v0.7.6): also query mesh-gateway /peer-health-events. "
912
+ "On recent usage_limit_reset event, treat sessions as wake-"
913
+ "candidates even before the 30min cursor-staleness threshold. "
914
+ "Requires MESH_GATEWAY_TOKEN in env. Default OFF (opt-in).",
915
+ )
916
+ p.add_argument(
917
+ "--peer-health-window-sec",
918
+ type=int,
919
+ default=_DEFAULT_PEER_HEALTH_WINDOW_SEC,
920
+ help="Phase 4: window for recovery-event lookup; default 600 (10 min).",
921
+ )
922
+ p.add_argument(
923
+ "--peer-health-recovery-threshold",
924
+ type=int,
925
+ default=_DEFAULT_PEER_HEALTH_RECOVERY_THRESHOLD_SEC,
926
+ help="Phase 4: min cursor staleness for recovery event to promote "
927
+ "session to wake-candidate; default 120 (2 min).",
928
+ )
802
929
  p.add_argument("--log", default=None)
803
930
  p.add_argument("--verbose", action="store_true")
804
931
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarph-cli
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
5
5
  Author: Pierre Samson, Claude Opus
6
6
  License: MIT
File without changes
File without changes
File without changes