swarph-cli 0.7.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {swarph_cli-0.7.0/src/swarph_cli.egg-info → swarph_cli-0.7.1}/PKG-INFO +1 -1
  2. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/pyproject.toml +1 -1
  3. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/__init__.py +1 -1
  4. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/watchdog.py +83 -5
  5. {swarph_cli-0.7.0 → swarph_cli-0.7.1/src/swarph_cli.egg-info}/PKG-INFO +1 -1
  6. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_watchdog.py +135 -8
  7. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/LICENSE +0 -0
  8. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/README.md +0 -0
  9. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/setup.cfg +0 -0
  10. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/caller.py +0 -0
  11. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/cell.py +0 -0
  12. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/__init__.py +0 -0
  13. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/chat.py +0 -0
  14. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/daemon.py +0 -0
  15. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/hook_output.py +0 -0
  16. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/import_session.py +0 -0
  17. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/install_hook.py +0 -0
  18. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/onboard.py +0 -0
  19. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/ratify.py +0 -0
  20. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/commands/spawn.py +0 -0
  21. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/main.py +0 -0
  22. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/parsers/__init__.py +0 -0
  23. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli/parsers/claude.py +0 -0
  24. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli.egg-info/SOURCES.txt +0 -0
  25. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli.egg-info/dependency_links.txt +0 -0
  26. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli.egg-info/entry_points.txt +0 -0
  27. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli.egg-info/requires.txt +0 -0
  28. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/src/swarph_cli.egg-info/top_level.txt +0 -0
  29. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_cell_loader.py +0 -0
  30. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_chat_command.py +0 -0
  31. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_claude_parser.py +0 -0
  32. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_daemon_command.py +0 -0
  33. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_hook_output.py +0 -0
  34. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_import_command.py +0 -0
  35. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_install_hook.py +0 -0
  36. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_main.py +0 -0
  37. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_onboard_command.py +0 -0
  38. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_ratify_command.py +0 -0
  39. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_smoke_chat.py +0 -0
  40. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_smoke_one_shot.py +0 -0
  41. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_smoke_phase_5_5.py +0 -0
  42. {swarph_cli-0.7.0 → swarph_cli-0.7.1}/tests/test_spawn_command.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarph-cli
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
5
5
  Author: Pierre Samson, Claude Opus
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "swarph-cli"
7
- version = "0.7.0"
7
+ version = "0.7.1"
8
8
  description = "The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED)."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -16,6 +16,6 @@ The architecture splits CLI from substrate so:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- __version__ = "0.7.0"
19
+ __version__ = "0.7.1"
20
20
 
21
21
  __all__ = ["__version__"]
@@ -263,6 +263,54 @@ def _spawn_via_swarph(role: str, tmux_session: str) -> bool:
263
263
  return False
264
264
 
265
265
 
266
+ def _a1_marker_path(log_path: Path, role: str) -> Path:
267
+ """Marker file recording the cursor_mtime at which A1 was last fired.
268
+
269
+ Co-located with the watchdog log so it inherits the same XDG_STATE_HOME
270
+ discipline. Cleared on cursor-advance OR A2 escalation. Used to suppress
271
+ repeat A1 fires within the same stale window — fix for the spam incident
272
+ where cron fired A1 every 5min for 65min into an active session's tmux
273
+ input buffer (commander #1092 + droplet #1087).
274
+
275
+ Keyed on ``role`` alone today. When the F4 follow-up (cell.yaml-pinned
276
+ cursor_path + tmux_session per mother+beta #1064/#1065) lands and the
277
+ sibling-instance pattern (alpha+beta drop-on-meta-edge per
278
+ project_drop_mitosis_to_meta_edge) ships at scale, two siblings sharing
279
+ the same base ``role`` would clobber each other's markers. Re-key on
280
+ ``(role, tmux_session)`` once F4 lands — flagged by mother in #1103.
281
+ """
282
+ return log_path.parent / f"a1-fired-{role}.marker"
283
+
284
+
285
+ def _a1_already_fired_at(marker: Path, cursor_mtime: int) -> bool:
286
+ """Returns True if a previous A1 was fired with this exact cursor_mtime.
287
+
288
+ Same cursor_mtime ⇒ no cursor advance since last fire ⇒ we're still in
289
+ the same stale window ⇒ another A1 would spam. Suppresses the fire.
290
+ """
291
+ try:
292
+ return int(marker.read_text().strip()) == cursor_mtime
293
+ except (FileNotFoundError, OSError, ValueError):
294
+ return False
295
+
296
+
297
+ def _record_a1_fired(marker: Path, cursor_mtime: int) -> None:
298
+ """Best-effort marker write; failures are logged elsewhere but never block."""
299
+ try:
300
+ marker.parent.mkdir(parents=True, exist_ok=True)
301
+ marker.write_text(str(cursor_mtime))
302
+ except OSError:
303
+ pass
304
+
305
+
306
+ def _clear_a1_marker(marker: Path) -> None:
307
+ """Idempotent marker removal. Called on A2 escalation paths."""
308
+ try:
309
+ marker.unlink()
310
+ except (FileNotFoundError, OSError):
311
+ pass
312
+
313
+
266
314
  def _log_event(log_path: Path, event: str, details: dict, verbose: bool = False) -> None:
267
315
  log_path.parent.mkdir(parents=True, exist_ok=True)
268
316
  entry = {
@@ -323,13 +371,19 @@ def run_check(args: argparse.Namespace) -> int:
323
371
  unread = _gateway_unread_count(gateway, peer, token)
324
372
  diag["unread_count"] = unread
325
373
 
326
- # Decision matrix:
327
- # cursor_stale + process_alive + unread > 0 → A1 (alive but throttled, prompt may unblock)
374
+ # Decision matrix (post commander #1092 + droplet #1087 + #1089 hardening):
328
375
  # cursor_stale + not process_alive → A2 (dead, respawn regardless of unread)
376
+ # cursor_stale + process_alive + unread > 0 → A1 (alive but throttled, prompt may unblock)
329
377
  # cursor_stale + process_alive + unread = 0 → noop (no DMs to drain anyway)
330
- # cursor_stale + unread = None A1 (assume unread; gateway down session dead)
378
+ # cursor_stale + process_alive + unread None noop (F2 fail-closed: can't verify work, don't poke)
379
+ # cursor_stale + a1_marker matches cursor_mtime → noop (F1 same-window suppression)
380
+
381
+ marker = _a1_marker_path(log_path, role)
382
+ diag["a1_marker"] = str(marker)
331
383
 
332
384
  if not process_alive:
385
+ # A2 escalation — clear the A1 marker so the next A1 (after respawn) fires.
386
+ _clear_a1_marker(marker)
333
387
  diag["decision"] = "a2_respawn_process_dead"
334
388
  if args.no_respawn:
335
389
  diag["dry_run_skip"] = True
@@ -340,17 +394,27 @@ def run_check(args: argparse.Namespace) -> int:
340
394
  _log_event(log_path, "a2_respawn", diag, verbose)
341
395
  return 2 if spawn_ok else 4
342
396
 
343
- # Process is alive but cursor is stale — A1 escalation
397
+ # Process is alive but cursor is stale.
398
+ # F2 — fail-closed when unread can't be verified. Trade false-negative for
399
+ # false-positive ("respect peer-time when uncertain" per droplet #1089).
400
+ # Production incident shape (commander #1092): gateway returned None for
401
+ # unread; old code fell through to A1, spamming the tmux buffer for 65min.
402
+ if unread is None:
403
+ diag["decision"] = "noop_unread_unknown"
404
+ _log_event(log_path, "noop", diag, verbose)
405
+ return 0
406
+
344
407
  if unread == 0:
345
408
  diag["decision"] = "noop_no_unread"
346
409
  _log_event(log_path, "noop", diag, verbose)
347
410
  return 0
348
411
 
349
- diag["decision"] = "a1_send_keys"
350
412
  if not _tmux_session_exists(tmux_session):
351
413
  # Process alive somewhere but tmux session gone — partial state.
352
414
  # Treat as A2 case: respawn fresh sibling.
415
+ _clear_a1_marker(marker)
353
416
  diag["tmux_missing"] = True
417
+ diag["decision"] = "a2_respawn_tmux_missing"
354
418
  if args.no_respawn:
355
419
  _log_event(log_path, "a2_dry_run", diag, verbose)
356
420
  return 2
@@ -359,12 +423,26 @@ def run_check(args: argparse.Namespace) -> int:
359
423
  _log_event(log_path, "a2_respawn", diag, verbose)
360
424
  return 2 if spawn_ok else 4
361
425
 
426
+ # F1 — same-stale-window suppression. If A1 already fired at this exact
427
+ # cursor_mtime, further A1s would only stack wake-prompts in the tmux
428
+ # input buffer (commander #1092: 13 fires across 65min on a session that
429
+ # was actively working but cursor only updates at turn-end). Fire AT MOST
430
+ # ONCE per stale window; re-arm only when cursor advances (recovery) or
431
+ # A2 escalates (respawn clears the marker above).
432
+ if _a1_already_fired_at(marker, cursor_mtime):
433
+ diag["decision"] = "noop_a1_already_fired_this_window"
434
+ _log_event(log_path, "noop", diag, verbose)
435
+ return 0
436
+
437
+ diag["decision"] = "a1_send_keys"
362
438
  wake_text = (
363
439
  f"watchdog wake — cursor stale {cursor_age}s, "
364
440
  f"unread={unread}; please drain inbox"
365
441
  )
366
442
  sent = _tmux_send_keys(tmux_session, wake_text)
367
443
  diag["send_keys_ok"] = sent
444
+ if sent:
445
+ _record_a1_fired(marker, cursor_mtime)
368
446
  _log_event(log_path, "a1_send_keys", diag, verbose)
369
447
  return 1 if sent else 4
370
448
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarph-cli
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: The `swarph` binary — multi-LLM CLI with mesh-gateway integration. v0.7.0 ships Phase 7 substrate-doc R7 §11.1.7 operator-tooling layer in 5 increments: PR-A `--new-instance` flag (sibling-spawn case) + PR-B auto-suffix on collision (sibling-slot persistence) + PR-C SessionStart hook (closes bare-claude operator-paste gap) + watchdog (stranded-session recovery) + PR-D swarph-shared cell.yaml relocation (cell-yaml schema graduates to swarph-shared 0.3.0 kernel-tier; substrate-doc R7 §11.1.5 (O5) RESOLVED).
5
5
  Author: Pierre Samson, Claude Opus
6
6
  License: MIT
@@ -228,27 +228,154 @@ def test_stale_cursor_alive_tmux_missing_fires_a2(
228
228
 
229
229
 
230
230
  # ---------------------------------------------------------------------------
231
- # Detection error gateway unreachable → assume unread (still try A1)
231
+ # F2fail-closed on unread=None
232
232
  # ---------------------------------------------------------------------------
233
233
 
234
234
 
235
- def test_gateway_unreachable_does_not_block_a1(
235
+ def test_gateway_unread_unknown_returns_noop(
236
236
  isolated_state, stale_cursor, monkeypatch, capsys
237
237
  ):
238
- """If gateway is down, return None for unread watchdog still tries A1
239
- (assume unread; gateway-down session-dead)."""
238
+ """F2 fix (commander #1092 / droplet #1089)if gateway returns None
239
+ for unread count, fail CLOSED rather than firing A1.
240
+
241
+ Old behavior fired A1 on None ("gateway down ≠ session dead"); production
242
+ surfaced the case where gateway is fine but the count is still None
243
+ (parser mismatch, transient error), and A1 spammed the tmux buffer
244
+ 13 times across 65min. New contract: 'respect peer-time when uncertain' —
245
+ trade false-negative (occasional missed wake on real strands) for
246
+ elimination of the false-positive spam class.
247
+ """
240
248
  with patch("swarph_cli.commands.watchdog._process_alive", return_value=True), \
241
249
  patch("swarph_cli.commands.watchdog._gateway_unread_count", return_value=None), \
242
250
  patch("swarph_cli.commands.watchdog._tmux_session_exists", return_value=True), \
243
- patch("swarph_cli.commands.watchdog._tmux_send_keys", return_value=True) as send_mock:
251
+ patch("swarph_cli.commands.watchdog._tmux_send_keys") as send_mock:
244
252
  rc = run_watchdog(argv=[
245
253
  "--check", "--cell", "lab",
246
254
  "--cursor", str(stale_cursor),
247
255
  "--threshold", "60",
248
256
  ])
249
- # unread=None passes the `if unread == 0` short-circuit and goes to A1
250
- assert rc == 1
251
- send_mock.assert_called_once()
257
+ assert rc == 0
258
+ send_mock.assert_not_called()
259
+
260
+
261
+ # ---------------------------------------------------------------------------
262
+ # F1 — same-stale-window A1 suppression
263
+ # ---------------------------------------------------------------------------
264
+
265
+
266
+ def test_a1_fires_at_most_once_per_stale_window(
267
+ isolated_state, stale_cursor, monkeypatch
268
+ ):
269
+ """F1 fix — repeated checks within the same stale window (cursor mtime
270
+ unchanged) fire A1 only on the first invocation; subsequent checks
271
+ noop with reason 'noop_a1_already_fired_this_window'.
272
+
273
+ Production incident (commander #1092): cron at */5 fired A1 13 times
274
+ across 65min into an actively-working session's tmux buffer because
275
+ cursor only updates at turn-end, not mid-bash. After F1, watchdog
276
+ fires AT MOST ONCE per stale window; re-arms on cursor advance.
277
+ """
278
+ log_path = isolated_state / "wd.log"
279
+ with patch("swarph_cli.commands.watchdog._process_alive", return_value=True), \
280
+ patch("swarph_cli.commands.watchdog._gateway_unread_count", return_value=3), \
281
+ patch("swarph_cli.commands.watchdog._tmux_session_exists", return_value=True), \
282
+ patch("swarph_cli.commands.watchdog._tmux_send_keys", return_value=True) as send_mock:
283
+ # First invocation — A1 fires
284
+ rc1 = run_watchdog(argv=[
285
+ "--check", "--cell", "lab",
286
+ "--cursor", str(stale_cursor),
287
+ "--threshold", "60",
288
+ "--log", str(log_path),
289
+ ])
290
+ # Second invocation, no cursor change — A1 must NOT fire again
291
+ rc2 = run_watchdog(argv=[
292
+ "--check", "--cell", "lab",
293
+ "--cursor", str(stale_cursor),
294
+ "--threshold", "60",
295
+ "--log", str(log_path),
296
+ ])
297
+ # Third invocation — still suppressed
298
+ rc3 = run_watchdog(argv=[
299
+ "--check", "--cell", "lab",
300
+ "--cursor", str(stale_cursor),
301
+ "--threshold", "60",
302
+ "--log", str(log_path),
303
+ ])
304
+ assert rc1 == 1
305
+ assert rc2 == 0
306
+ assert rc3 == 0
307
+ assert send_mock.call_count == 1 # NOT 3 — suppressed on rc2 and rc3
308
+ # Second log entry should record the suppression reason explicitly
309
+ lines = [ln for ln in log_path.read_text().splitlines() if ln.strip()]
310
+ parsed_second = json.loads(lines[1])
311
+ assert parsed_second["details"]["decision"] == "noop_a1_already_fired_this_window"
312
+
313
+
314
+ def test_a1_rearms_after_cursor_advance(
315
+ isolated_state, stale_cursor, monkeypatch
316
+ ):
317
+ """F1 fix — after the suppressed window, if cursor advances (session
318
+ recovered, even briefly), the marker no longer matches and subsequent
319
+ stale windows fire A1 again. Ensures we don't permanently mute A1 on
320
+ a peer that recovered then re-stranded."""
321
+ import os as _os
322
+ with patch("swarph_cli.commands.watchdog._process_alive", return_value=True), \
323
+ patch("swarph_cli.commands.watchdog._gateway_unread_count", return_value=2), \
324
+ patch("swarph_cli.commands.watchdog._tmux_session_exists", return_value=True), \
325
+ patch("swarph_cli.commands.watchdog._tmux_send_keys", return_value=True) as send_mock:
326
+ # First A1 fires
327
+ run_watchdog(argv=[
328
+ "--check", "--cell", "lab",
329
+ "--cursor", str(stale_cursor),
330
+ "--threshold", "60",
331
+ ])
332
+ # Simulate cursor advancing 5min — but still stale (8min > 60s threshold)
333
+ new_mtime = time.time() - 480
334
+ _os.utime(stale_cursor, (new_mtime, new_mtime))
335
+ # Second invocation — A1 must fire again (cursor advanced ⇒ new window)
336
+ run_watchdog(argv=[
337
+ "--check", "--cell", "lab",
338
+ "--cursor", str(stale_cursor),
339
+ "--threshold", "60",
340
+ ])
341
+ assert send_mock.call_count == 2
342
+
343
+
344
+ def test_a2_escalation_clears_a1_marker(
345
+ isolated_state, stale_cursor, monkeypatch
346
+ ):
347
+ """F1 fix — A2 respawn path clears the marker so the post-respawn
348
+ session starts with a clean slate (otherwise a recovered+re-stranded
349
+ session would inherit a stale marker matching the OLD cursor_mtime,
350
+ which is theoretically possible since marker stores cursor_mtime not
351
+ epoch-now). Defensive cleanup."""
352
+ log_path = isolated_state / "wd.log"
353
+ with patch("swarph_cli.commands.watchdog._process_alive", return_value=True), \
354
+ patch("swarph_cli.commands.watchdog._gateway_unread_count", return_value=5), \
355
+ patch("swarph_cli.commands.watchdog._tmux_session_exists", return_value=True), \
356
+ patch("swarph_cli.commands.watchdog._tmux_send_keys", return_value=True):
357
+ # First fire — record marker
358
+ run_watchdog(argv=[
359
+ "--check", "--cell", "lab",
360
+ "--cursor", str(stale_cursor),
361
+ "--threshold", "60",
362
+ "--log", str(log_path),
363
+ ])
364
+ marker = log_path.parent / "a1-fired-lab.marker"
365
+ assert marker.exists()
366
+
367
+ # Now force A2 path (process dead) and confirm marker is gone
368
+ with patch("swarph_cli.commands.watchdog._process_alive", return_value=False), \
369
+ patch("swarph_cli.commands.watchdog._gateway_unread_count", return_value=5), \
370
+ patch("swarph_cli.commands.watchdog._tmux_session_exists", return_value=False), \
371
+ patch("swarph_cli.commands.watchdog._spawn_via_swarph", return_value=True):
372
+ run_watchdog(argv=[
373
+ "--check", "--cell", "lab",
374
+ "--cursor", str(stale_cursor),
375
+ "--threshold", "60",
376
+ "--log", str(log_path),
377
+ ])
378
+ assert not marker.exists()
252
379
 
253
380
 
254
381
  # ---------------------------------------------------------------------------
File without changes
File without changes
File without changes