subagent-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
subagent/state.py ADDED
@@ -0,0 +1,1049 @@
1
+ """Runtime state store backed by SQLite."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import secrets
7
+ import sqlite3
8
+ import uuid
9
+ from contextlib import contextmanager
10
+ from dataclasses import dataclass
11
+ from datetime import UTC, datetime
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Any, Iterator
15
+
16
+ from .errors import SubagentError
17
+ from .paths import resolve_state_db_path
18
+
19
+ SCHEMA_SQL = """
20
+ CREATE TABLE IF NOT EXISTS controllers (
21
+ controller_id TEXT PRIMARY KEY,
22
+ label TEXT NOT NULL,
23
+ workspace_key TEXT NOT NULL UNIQUE,
24
+ created_at TEXT NOT NULL,
25
+ updated_at TEXT NOT NULL
26
+ );
27
+
28
+ CREATE TABLE IF NOT EXISTS controller_instances (
29
+ instance_id TEXT PRIMARY KEY,
30
+ controller_id TEXT NOT NULL,
31
+ epoch INTEGER NOT NULL,
32
+ token TEXT NOT NULL,
33
+ pid INTEGER,
34
+ is_active INTEGER NOT NULL DEFAULT 1,
35
+ created_at TEXT NOT NULL,
36
+ released_at TEXT,
37
+ FOREIGN KEY (controller_id) REFERENCES controllers(controller_id)
38
+ );
39
+
40
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_controller_active_instance
41
+ ON controller_instances(controller_id)
42
+ WHERE is_active = 1;
43
+
44
+ CREATE TABLE IF NOT EXISTS workers (
45
+ worker_id TEXT PRIMARY KEY,
46
+ controller_id TEXT NOT NULL,
47
+ label TEXT NOT NULL,
48
+ launcher TEXT NOT NULL,
49
+ profile TEXT NOT NULL,
50
+ packs_json TEXT NOT NULL,
51
+ cwd TEXT NOT NULL,
52
+ session_id TEXT,
53
+ runtime_pid INTEGER,
54
+ runtime_socket TEXT,
55
+ state TEXT NOT NULL,
56
+ recovery_state TEXT NOT NULL,
57
+ active_turn_id TEXT,
58
+ created_at TEXT NOT NULL,
59
+ updated_at TEXT NOT NULL,
60
+ stopped_at TEXT,
61
+ last_error TEXT,
62
+ FOREIGN KEY (controller_id) REFERENCES controllers(controller_id)
63
+ );
64
+
65
+ CREATE INDEX IF NOT EXISTS idx_workers_controller_id
66
+ ON workers(controller_id);
67
+
68
+ CREATE TABLE IF NOT EXISTS worker_events (
69
+ event_id TEXT PRIMARY KEY,
70
+ worker_id TEXT NOT NULL,
71
+ event_seq INTEGER NOT NULL,
72
+ ts TEXT NOT NULL,
73
+ event_type TEXT NOT NULL,
74
+ turn_id TEXT,
75
+ data_json TEXT NOT NULL,
76
+ raw_json TEXT,
77
+ FOREIGN KEY (worker_id) REFERENCES workers(worker_id)
78
+ );
79
+
80
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_events_seq
81
+ ON worker_events(worker_id, event_seq);
82
+
83
+ CREATE INDEX IF NOT EXISTS idx_worker_events_ts
84
+ ON worker_events(worker_id, ts);
85
+
86
+ CREATE TABLE IF NOT EXISTS approval_requests (
87
+ request_id TEXT PRIMARY KEY,
88
+ worker_id TEXT NOT NULL,
89
+ turn_id TEXT,
90
+ status TEXT NOT NULL,
91
+ kind TEXT NOT NULL,
92
+ message TEXT NOT NULL,
93
+ options_json TEXT NOT NULL,
94
+ created_at TEXT NOT NULL,
95
+ decided_at TEXT,
96
+ decision TEXT,
97
+ selected_option_id TEXT,
98
+ selected_alias TEXT,
99
+ note TEXT,
100
+ FOREIGN KEY (worker_id) REFERENCES workers(worker_id)
101
+ );
102
+
103
+ CREATE INDEX IF NOT EXISTS idx_approval_requests_worker_status
104
+ ON approval_requests(worker_id, status, created_at);
105
+
106
+ CREATE TABLE IF NOT EXISTS handoff_snapshots (
107
+ snapshot_id TEXT PRIMARY KEY,
108
+ worker_id TEXT NOT NULL,
109
+ controller_id TEXT NOT NULL,
110
+ source_turn_id TEXT,
111
+ handoff_path TEXT NOT NULL,
112
+ checkpoint_path TEXT NOT NULL,
113
+ created_at TEXT NOT NULL,
114
+ FOREIGN KEY (worker_id) REFERENCES workers(worker_id),
115
+ FOREIGN KEY (controller_id) REFERENCES controllers(controller_id)
116
+ );
117
+
118
+ CREATE INDEX IF NOT EXISTS idx_handoff_snapshots_worker_created
119
+ ON handoff_snapshots(worker_id, created_at DESC);
120
+ """
121
+
122
+ WORKER_STATE_STARTING = "starting"
123
+ WORKER_STATE_IDLE = "idle"
124
+ WORKER_STATE_RUNNING = "running"
125
+ WORKER_STATE_WAITING_APPROVAL = "waiting_approval"
126
+ WORKER_STATE_CANCELING = "canceling"
127
+ WORKER_STATE_STOPPED = "stopped"
128
+ WORKER_STATE_ERROR = "error"
129
+
130
+ WORKER_RUNTIME_STATES = {
131
+ WORKER_STATE_STARTING,
132
+ WORKER_STATE_IDLE,
133
+ WORKER_STATE_RUNNING,
134
+ WORKER_STATE_WAITING_APPROVAL,
135
+ WORKER_STATE_CANCELING,
136
+ WORKER_STATE_STOPPED,
137
+ WORKER_STATE_ERROR,
138
+ }
139
+
140
+ WORKER_ALLOWED_TRANSITIONS: dict[str, set[str]] = {
141
+ WORKER_STATE_STARTING: {WORKER_STATE_IDLE, WORKER_STATE_ERROR, WORKER_STATE_STOPPED},
142
+ WORKER_STATE_IDLE: {WORKER_STATE_RUNNING, WORKER_STATE_ERROR, WORKER_STATE_STOPPED},
143
+ WORKER_STATE_RUNNING: {
144
+ WORKER_STATE_WAITING_APPROVAL,
145
+ WORKER_STATE_CANCELING,
146
+ WORKER_STATE_IDLE,
147
+ WORKER_STATE_ERROR,
148
+ WORKER_STATE_STOPPED,
149
+ },
150
+ WORKER_STATE_WAITING_APPROVAL: {
151
+ WORKER_STATE_RUNNING,
152
+ WORKER_STATE_CANCELING,
153
+ WORKER_STATE_ERROR,
154
+ WORKER_STATE_STOPPED,
155
+ },
156
+ WORKER_STATE_CANCELING: {WORKER_STATE_IDLE, WORKER_STATE_ERROR, WORKER_STATE_STOPPED},
157
+ WORKER_STATE_ERROR: {WORKER_STATE_STOPPED},
158
+ WORKER_STATE_STOPPED: set(),
159
+ }
160
+
161
+ APPROVAL_STATUS_PENDING = "pending"
162
+ APPROVAL_STATUS_DECIDED = "decided"
163
+ APPROVAL_STATUS_CANCELED = "canceled"
164
+
165
+
166
+ def utc_now() -> str:
167
+ return datetime.now(tz=UTC).replace(microsecond=0).isoformat()
168
+
169
+
170
+ def _row_to_dict(row: sqlite3.Row | None) -> dict[str, Any] | None:
171
+ if row is None:
172
+ return None
173
+ return {key: row[key] for key in row.keys()}
174
+
175
+
176
+ def _deserialize_worker_row(worker: dict[str, Any] | None) -> dict[str, Any] | None:
177
+ if worker is None:
178
+ return None
179
+ packs_raw = worker.get("packs_json")
180
+ packs: list[str] = []
181
+ if isinstance(packs_raw, str):
182
+ try:
183
+ parsed = json.loads(packs_raw)
184
+ except json.JSONDecodeError:
185
+ parsed = []
186
+ if isinstance(parsed, list):
187
+ packs = [str(item) for item in parsed]
188
+ payload = dict(worker)
189
+ payload["packs"] = packs
190
+ payload.pop("packs_json", None)
191
+ return payload
192
+
193
+
194
+ def _parse_json_field(value: Any, fallback: Any) -> Any:
195
+ if not isinstance(value, str):
196
+ return fallback
197
+ try:
198
+ parsed = json.loads(value)
199
+ except json.JSONDecodeError:
200
+ return fallback
201
+ return parsed
202
+
203
+
204
+ def _deserialize_event_row(event: dict[str, Any] | None) -> dict[str, Any] | None:
205
+ if event is None:
206
+ return None
207
+ payload = dict(event)
208
+ payload["data"] = _parse_json_field(payload.get("data_json"), {})
209
+ payload["raw"] = _parse_json_field(payload.get("raw_json"), None)
210
+ payload.pop("data_json", None)
211
+ payload.pop("raw_json", None)
212
+ return payload
213
+
214
+
215
+ def _deserialize_approval_row(request: dict[str, Any] | None) -> dict[str, Any] | None:
216
+ if request is None:
217
+ return None
218
+ payload = dict(request)
219
+ payload["options"] = _parse_json_field(payload.get("options_json"), [])
220
+ payload.pop("options_json", None)
221
+ return payload
222
+
223
+
224
+ @dataclass(slots=True)
225
+ class ControllerHandle:
226
+ controller_id: str
227
+ instance_id: str
228
+ epoch: int
229
+ token: str
230
+ pid: int
231
+ created_at: str
232
+
233
+ def to_dict(self, include_token: bool = True) -> dict[str, Any]:
234
+ payload: dict[str, Any] = {
235
+ "controllerId": self.controller_id,
236
+ "instanceId": self.instance_id,
237
+ "epoch": self.epoch,
238
+ "pid": self.pid,
239
+ "createdAt": self.created_at,
240
+ }
241
+ if include_token:
242
+ payload["token"] = self.token
243
+ return payload
244
+
245
+
246
+ class StateStore:
247
+ def __init__(self, db_path: Path | None = None) -> None:
248
+ self.db_path = db_path.expanduser().resolve() if db_path else resolve_state_db_path()
249
+
250
+ @contextmanager
251
+ def connection(self) -> Iterator[sqlite3.Connection]:
252
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
253
+ conn = sqlite3.connect(self.db_path)
254
+ conn.row_factory = sqlite3.Row
255
+ try:
256
+ conn.execute("PRAGMA foreign_keys = ON")
257
+ yield conn
258
+ conn.commit()
259
+ except Exception:
260
+ conn.rollback()
261
+ raise
262
+ finally:
263
+ conn.close()
264
+
265
+ def bootstrap(self) -> None:
266
+ with self.connection() as conn:
267
+ conn.executescript(SCHEMA_SQL)
268
+ self._run_migrations(conn)
269
+
270
+ def _run_migrations(self, conn: sqlite3.Connection) -> None:
271
+ self._ensure_column(conn, table_name="workers", column_name="active_turn_id", column_type="TEXT")
272
+ self._ensure_column(conn, table_name="workers", column_name="runtime_pid", column_type="INTEGER")
273
+ self._ensure_column(conn, table_name="workers", column_name="runtime_socket", column_type="TEXT")
274
+
275
+ def _ensure_column(
276
+ self,
277
+ conn: sqlite3.Connection,
278
+ *,
279
+ table_name: str,
280
+ column_name: str,
281
+ column_type: str,
282
+ ) -> None:
283
+ rows = conn.execute(f"PRAGMA table_info({table_name})").fetchall()
284
+ existing = {str(row["name"]) for row in rows}
285
+ if column_name in existing:
286
+ return
287
+ conn.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")
288
+
289
+ def register_controller(self, controller_id: str, label: str, workspace_key: str) -> dict[str, Any]:
290
+ now = utc_now()
291
+ with self.connection() as conn:
292
+ try:
293
+ conn.execute(
294
+ """
295
+ INSERT INTO controllers(controller_id, label, workspace_key, created_at, updated_at)
296
+ VALUES (?, ?, ?, ?, ?)
297
+ ON CONFLICT(controller_id) DO UPDATE SET
298
+ label = excluded.label,
299
+ workspace_key = excluded.workspace_key,
300
+ updated_at = excluded.updated_at
301
+ """,
302
+ (controller_id, label, workspace_key, now, now),
303
+ )
304
+ except sqlite3.IntegrityError as exc:
305
+ raise SubagentError(
306
+ code="CONTROLLER_OWNERSHIP_CONFLICT",
307
+ message=(
308
+ "Workspace already belongs to another controller. "
309
+ "Use `controller attach --takeover` with the existing controller."
310
+ ),
311
+ details={"workspaceKey": workspace_key},
312
+ ) from exc
313
+ row = conn.execute(
314
+ "SELECT * FROM controllers WHERE controller_id = ?",
315
+ (controller_id,),
316
+ ).fetchone()
317
+ assert row is not None
318
+ return _row_to_dict(row) or {}
319
+
320
+ def get_controller(self, controller_id: str) -> dict[str, Any] | None:
321
+ with self.connection() as conn:
322
+ row = conn.execute(
323
+ "SELECT * FROM controllers WHERE controller_id = ?",
324
+ (controller_id,),
325
+ ).fetchone()
326
+ return _row_to_dict(row)
327
+
328
+ def list_controllers(self) -> list[dict[str, Any]]:
329
+ with self.connection() as conn:
330
+ rows = conn.execute(
331
+ "SELECT * FROM controllers ORDER BY created_at DESC, controller_id DESC"
332
+ ).fetchall()
333
+ return [_row_to_dict(row) or {} for row in rows]
334
+
335
+ def get_controller_by_workspace(self, workspace_key: str) -> dict[str, Any] | None:
336
+ with self.connection() as conn:
337
+ row = conn.execute(
338
+ "SELECT * FROM controllers WHERE workspace_key = ?",
339
+ (workspace_key,),
340
+ ).fetchone()
341
+ return _row_to_dict(row)
342
+
343
+ def get_active_instance(self, controller_id: str) -> dict[str, Any] | None:
344
+ with self.connection() as conn:
345
+ row = conn.execute(
346
+ """
347
+ SELECT instance_id, controller_id, epoch, token, pid, created_at
348
+ FROM controller_instances
349
+ WHERE controller_id = ? AND is_active = 1
350
+ """,
351
+ (controller_id,),
352
+ ).fetchone()
353
+ return _row_to_dict(row)
354
+
355
+ def list_active_instances(self) -> list[dict[str, Any]]:
356
+ with self.connection() as conn:
357
+ rows = conn.execute(
358
+ """
359
+ SELECT instance_id, controller_id, epoch, token, pid, created_at
360
+ FROM controller_instances
361
+ WHERE is_active = 1
362
+ ORDER BY created_at DESC
363
+ """
364
+ ).fetchall()
365
+ return [_row_to_dict(row) or {} for row in rows]
366
+
367
+ def acquire_owner_handle(
368
+ self,
369
+ controller_id: str,
370
+ *,
371
+ takeover: bool,
372
+ pid: int | None = None,
373
+ ) -> ControllerHandle:
374
+ effective_pid = pid if pid is not None else os.getpid()
375
+ with self.connection() as conn:
376
+ conn.execute("BEGIN IMMEDIATE")
377
+ controller = conn.execute(
378
+ "SELECT controller_id FROM controllers WHERE controller_id = ?",
379
+ (controller_id,),
380
+ ).fetchone()
381
+ if controller is None:
382
+ raise SubagentError(
383
+ code="CONTROLLER_NOT_FOUND",
384
+ message=f"Controller not found: {controller_id}",
385
+ details={"controllerId": controller_id},
386
+ )
387
+ active = conn.execute(
388
+ """
389
+ SELECT instance_id, epoch, token, pid
390
+ FROM controller_instances
391
+ WHERE controller_id = ? AND is_active = 1
392
+ """,
393
+ (controller_id,),
394
+ ).fetchone()
395
+ if active is not None and not takeover:
396
+ raise SubagentError(
397
+ code="CONTROLLER_OWNERSHIP_CONFLICT",
398
+ message="Controller already has an active owner. Use --takeover to replace it.",
399
+ details={"controllerId": controller_id},
400
+ )
401
+ if active is not None and takeover:
402
+ now = utc_now()
403
+ conn.execute(
404
+ """
405
+ UPDATE controller_instances
406
+ SET is_active = 0, released_at = ?
407
+ WHERE controller_id = ? AND is_active = 1
408
+ """,
409
+ (now, controller_id),
410
+ )
411
+
412
+ max_epoch = conn.execute(
413
+ "SELECT COALESCE(MAX(epoch), 0) AS max_epoch FROM controller_instances WHERE controller_id = ?",
414
+ (controller_id,),
415
+ ).fetchone()
416
+ next_epoch = int(max_epoch["max_epoch"]) + 1 if max_epoch else 1
417
+ token = secrets.token_urlsafe(24)
418
+ instance_id = f"ci_{uuid.uuid4().hex[:12]}"
419
+ created_at = utc_now()
420
+ conn.execute(
421
+ """
422
+ INSERT INTO controller_instances(instance_id, controller_id, epoch, token, pid, is_active, created_at)
423
+ VALUES (?, ?, ?, ?, ?, 1, ?)
424
+ """,
425
+ (instance_id, controller_id, next_epoch, token, effective_pid, created_at),
426
+ )
427
+ return ControllerHandle(
428
+ controller_id=controller_id,
429
+ instance_id=instance_id,
430
+ epoch=next_epoch,
431
+ token=token,
432
+ pid=effective_pid,
433
+ created_at=created_at,
434
+ )
435
+
436
+ def validate_handle(self, controller_id: str, epoch: int, token: str) -> bool:
437
+ with self.connection() as conn:
438
+ row = conn.execute(
439
+ """
440
+ SELECT 1
441
+ FROM controller_instances
442
+ WHERE controller_id = ? AND epoch = ? AND token = ? AND is_active = 1
443
+ """,
444
+ (controller_id, epoch, token),
445
+ ).fetchone()
446
+ return row is not None
447
+
448
+ def release_owner_handle(
449
+ self,
450
+ *,
451
+ controller_id: str,
452
+ epoch: int | None = None,
453
+ token: str | None = None,
454
+ force: bool = False,
455
+ ) -> dict[str, Any]:
456
+ controller = self.get_controller(controller_id)
457
+ if controller is None:
458
+ raise SubagentError(
459
+ code="CONTROLLER_NOT_FOUND",
460
+ message=f"Controller not found: {controller_id}",
461
+ details={"controllerId": controller_id},
462
+ )
463
+ active = self.get_active_instance(controller_id)
464
+ if active is None:
465
+ return {
466
+ "controllerId": controller_id,
467
+ "released": False,
468
+ "reason": "NO_ACTIVE_OWNER",
469
+ }
470
+ if not force:
471
+ if epoch is None or token is None:
472
+ raise SubagentError(
473
+ code="INVALID_CONTROLLER_HANDLE",
474
+ message="Release requires epoch and token, or use --force.",
475
+ details={"controllerId": controller_id},
476
+ )
477
+ if int(active["epoch"]) != int(epoch) or str(active["token"]) != str(token):
478
+ raise SubagentError(
479
+ code="INVALID_CONTROLLER_HANDLE",
480
+ message="Controller handle is stale or invalid",
481
+ details={"controllerId": controller_id, "epoch": epoch},
482
+ )
483
+ released_at = utc_now()
484
+ with self.connection() as conn:
485
+ conn.execute(
486
+ """
487
+ UPDATE controller_instances
488
+ SET is_active = 0, released_at = ?
489
+ WHERE controller_id = ? AND is_active = 1
490
+ """,
491
+ (released_at, controller_id),
492
+ )
493
+ return {
494
+ "controllerId": controller_id,
495
+ "released": True,
496
+ "releasedAt": released_at,
497
+ "instanceId": active["instance_id"],
498
+ "epoch": active["epoch"],
499
+ }
500
+
501
+ def get_controller_status(self, controller_id: str) -> dict[str, Any]:
502
+ controller = self.get_controller(controller_id)
503
+ if controller is None:
504
+ raise SubagentError(
505
+ code="CONTROLLER_NOT_FOUND",
506
+ message=f"Controller not found: {controller_id}",
507
+ details={"controllerId": controller_id},
508
+ )
509
+ active = self.get_active_instance(controller_id)
510
+ state = "active" if active is not None else "dormant"
511
+ return {
512
+ "controllerId": controller["controller_id"],
513
+ "label": controller["label"],
514
+ "workspaceKey": controller["workspace_key"],
515
+ "state": state,
516
+ "activeOwner": {
517
+ "instanceId": active["instance_id"],
518
+ "epoch": active["epoch"],
519
+ "pid": active["pid"],
520
+ "createdAt": active["created_at"],
521
+ }
522
+ if active
523
+ else None,
524
+ }
525
+
526
+ def create_worker(
527
+ self,
528
+ *,
529
+ controller_id: str,
530
+ launcher: str,
531
+ profile: str,
532
+ packs: list[str],
533
+ cwd: str,
534
+ label: str,
535
+ session_id: str | None = None,
536
+ ) -> dict[str, Any]:
537
+ controller = self.get_controller(controller_id)
538
+ if controller is None:
539
+ raise SubagentError(
540
+ code="CONTROLLER_NOT_FOUND",
541
+ message=f"Controller not found: {controller_id}",
542
+ details={"controllerId": controller_id},
543
+ )
544
+ worker_id = f"w_{uuid.uuid4().hex[:10]}"
545
+ effective_session_id = session_id or f"sess_{uuid.uuid4().hex[:12]}"
546
+ now = utc_now()
547
+ with self.connection() as conn:
548
+ conn.execute(
549
+ """
550
+ INSERT INTO workers(
551
+ worker_id, controller_id, label, launcher, profile, packs_json,
552
+ cwd, session_id, runtime_pid, runtime_socket,
553
+ state, recovery_state, active_turn_id, created_at, updated_at
554
+ )
555
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
556
+ """,
557
+ (
558
+ worker_id,
559
+ controller_id,
560
+ label,
561
+ launcher,
562
+ profile,
563
+ json.dumps(packs, ensure_ascii=False),
564
+ cwd,
565
+ effective_session_id,
566
+ None,
567
+ None,
568
+ WORKER_STATE_STARTING,
569
+ "restartable",
570
+ None,
571
+ now,
572
+ now,
573
+ ),
574
+ )
575
+ conn.execute(
576
+ """
577
+ UPDATE workers
578
+ SET state = ?, updated_at = ?
579
+ WHERE worker_id = ?
580
+ """,
581
+ (WORKER_STATE_IDLE, utc_now(), worker_id),
582
+ )
583
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
584
+ return _deserialize_worker_row(_row_to_dict(row)) or {}
585
+
586
+ def list_workers(self, *, controller_id: str | None = None) -> list[dict[str, Any]]:
587
+ query = "SELECT * FROM workers"
588
+ params: tuple[Any, ...] = ()
589
+ if controller_id is not None:
590
+ query += " WHERE controller_id = ?"
591
+ params = (controller_id,)
592
+ query += " ORDER BY created_at DESC, worker_id DESC"
593
+ with self.connection() as conn:
594
+ rows = conn.execute(query, params).fetchall()
595
+ workers = [_deserialize_worker_row(_row_to_dict(row)) for row in rows]
596
+ return [worker for worker in workers if worker is not None]
597
+
598
+ def get_worker(self, worker_id: str) -> dict[str, Any] | None:
599
+ with self.connection() as conn:
600
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
601
+ return _deserialize_worker_row(_row_to_dict(row))
602
+
603
+ def update_worker_state(
604
+ self,
605
+ worker_id: str,
606
+ *,
607
+ next_state: str,
608
+ allow_any_transition: bool = False,
609
+ last_error: str | None = None,
610
+ ) -> dict[str, Any]:
611
+ if next_state not in WORKER_RUNTIME_STATES:
612
+ raise SubagentError(
613
+ code="INVALID_WORKER_STATE",
614
+ message=f"Unknown worker state: {next_state}",
615
+ details={"state": next_state},
616
+ )
617
+ worker = self.get_worker(worker_id)
618
+ if worker is None:
619
+ raise SubagentError(
620
+ code="WORKER_NOT_FOUND",
621
+ message=f"Worker not found: {worker_id}",
622
+ details={"workerId": worker_id},
623
+ )
624
+ current_state = str(worker["state"])
625
+ if current_state == next_state:
626
+ return worker
627
+ if not allow_any_transition and next_state not in WORKER_ALLOWED_TRANSITIONS[current_state]:
628
+ raise SubagentError(
629
+ code="INVALID_WORKER_STATE_TRANSITION",
630
+ message=f"Cannot transition worker from {current_state} to {next_state}",
631
+ details={
632
+ "workerId": worker_id,
633
+ "currentState": current_state,
634
+ "nextState": next_state,
635
+ },
636
+ )
637
+ now = utc_now()
638
+ stopped_at = now if next_state == WORKER_STATE_STOPPED else None
639
+ clear_active_turn = next_state in {
640
+ WORKER_STATE_IDLE,
641
+ WORKER_STATE_STOPPED,
642
+ WORKER_STATE_ERROR,
643
+ }
644
+ with self.connection() as conn:
645
+ conn.execute(
646
+ """
647
+ UPDATE workers
648
+ SET state = ?,
649
+ updated_at = ?,
650
+ stopped_at = COALESCE(?, stopped_at),
651
+ last_error = ?,
652
+ active_turn_id = CASE WHEN ? THEN NULL ELSE active_turn_id END
653
+ WHERE worker_id = ?
654
+ """,
655
+ (next_state, now, stopped_at, last_error, int(clear_active_turn), worker_id),
656
+ )
657
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
658
+ return _deserialize_worker_row(_row_to_dict(row)) or {}
659
+
660
+ def stop_worker(self, worker_id: str, *, force: bool = False) -> dict[str, Any]:
661
+ worker = self.get_worker(worker_id)
662
+ if worker is None:
663
+ raise SubagentError(
664
+ code="WORKER_NOT_FOUND",
665
+ message=f"Worker not found: {worker_id}",
666
+ details={"workerId": worker_id},
667
+ )
668
+ if str(worker["state"]) == WORKER_STATE_STOPPED:
669
+ return worker
670
+ return self.update_worker_state(
671
+ worker_id,
672
+ next_state=WORKER_STATE_STOPPED,
673
+ allow_any_transition=force,
674
+ )
675
+
676
+ def set_worker_active_turn(self, worker_id: str, turn_id: str | None) -> dict[str, Any]:
677
+ worker = self.get_worker(worker_id)
678
+ if worker is None:
679
+ raise SubagentError(
680
+ code="WORKER_NOT_FOUND",
681
+ message=f"Worker not found: {worker_id}",
682
+ details={"workerId": worker_id},
683
+ )
684
+ with self.connection() as conn:
685
+ conn.execute(
686
+ "UPDATE workers SET active_turn_id = ?, updated_at = ? WHERE worker_id = ?",
687
+ (turn_id, utc_now(), worker_id),
688
+ )
689
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
690
+ return _deserialize_worker_row(_row_to_dict(row)) or {}
691
+
692
+ def set_worker_session_id(self, worker_id: str, session_id: str) -> dict[str, Any]:
693
+ worker = self.get_worker(worker_id)
694
+ if worker is None:
695
+ raise SubagentError(
696
+ code="WORKER_NOT_FOUND",
697
+ message=f"Worker not found: {worker_id}",
698
+ details={"workerId": worker_id},
699
+ )
700
+ with self.connection() as conn:
701
+ conn.execute(
702
+ "UPDATE workers SET session_id = ?, updated_at = ? WHERE worker_id = ?",
703
+ (session_id, utc_now(), worker_id),
704
+ )
705
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
706
+ return _deserialize_worker_row(_row_to_dict(row)) or {}
707
+
708
+ def set_worker_runtime_endpoint(
709
+ self,
710
+ worker_id: str,
711
+ *,
712
+ runtime_pid: int | None,
713
+ runtime_socket: str | None,
714
+ ) -> dict[str, Any]:
715
+ worker = self.get_worker(worker_id)
716
+ if worker is None:
717
+ raise SubagentError(
718
+ code="WORKER_NOT_FOUND",
719
+ message=f"Worker not found: {worker_id}",
720
+ details={"workerId": worker_id},
721
+ )
722
+ with self.connection() as conn:
723
+ conn.execute(
724
+ """
725
+ UPDATE workers
726
+ SET runtime_pid = ?, runtime_socket = ?, updated_at = ?
727
+ WHERE worker_id = ?
728
+ """,
729
+ (runtime_pid, runtime_socket, utc_now(), worker_id),
730
+ )
731
+ row = conn.execute("SELECT * FROM workers WHERE worker_id = ?", (worker_id,)).fetchone()
732
+ return _deserialize_worker_row(_row_to_dict(row)) or {}
733
+
734
+ def clear_worker_runtime_endpoint(self, worker_id: str) -> dict[str, Any]:
735
+ return self.set_worker_runtime_endpoint(
736
+ worker_id,
737
+ runtime_pid=None,
738
+ runtime_socket=None,
739
+ )
740
+
741
+ def _resolve_event_cursor_seq(
742
+ self,
743
+ conn: sqlite3.Connection,
744
+ *,
745
+ worker_id: str,
746
+ from_event_id: str | None,
747
+ ) -> int:
748
+ if from_event_id is None:
749
+ return 0
750
+ row = conn.execute(
751
+ "SELECT event_seq FROM worker_events WHERE worker_id = ? AND event_id = ?",
752
+ (worker_id, from_event_id),
753
+ ).fetchone()
754
+ if row is None:
755
+ raise SubagentError(
756
+ code="EVENT_NOT_FOUND",
757
+ message=f"Event not found: {from_event_id}",
758
+ details={"workerId": worker_id, "eventId": from_event_id},
759
+ )
760
+ return int(row["event_seq"])
761
+
762
+ def append_worker_event(
763
+ self,
764
+ worker_id: str,
765
+ *,
766
+ event_type: str,
767
+ data: dict[str, Any],
768
+ turn_id: str | None = None,
769
+ raw: dict[str, Any] | None = None,
770
+ ) -> dict[str, Any]:
771
+ worker = self.get_worker(worker_id)
772
+ if worker is None:
773
+ raise SubagentError(
774
+ code="WORKER_NOT_FOUND",
775
+ message=f"Worker not found: {worker_id}",
776
+ details={"workerId": worker_id},
777
+ )
778
+ with self.connection() as conn:
779
+ conn.execute("BEGIN IMMEDIATE")
780
+ max_row = conn.execute(
781
+ "SELECT COALESCE(MAX(event_seq), 0) AS max_seq FROM worker_events WHERE worker_id = ?",
782
+ (worker_id,),
783
+ ).fetchone()
784
+ next_seq = int(max_row["max_seq"]) + 1 if max_row else 1
785
+ event_id = f"ev_{uuid.uuid4().hex[:12]}"
786
+ ts = utc_now()
787
+ conn.execute(
788
+ """
789
+ INSERT INTO worker_events(event_id, worker_id, event_seq, ts, event_type, turn_id, data_json, raw_json)
790
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
791
+ """,
792
+ (
793
+ event_id,
794
+ worker_id,
795
+ next_seq,
796
+ ts,
797
+ event_type,
798
+ turn_id,
799
+ json.dumps(data, ensure_ascii=False),
800
+ json.dumps(raw, ensure_ascii=False) if raw is not None else None,
801
+ ),
802
+ )
803
+ row = conn.execute("SELECT * FROM worker_events WHERE event_id = ?", (event_id,)).fetchone()
804
+ return _deserialize_event_row(_row_to_dict(row)) or {}
805
+
806
+ def list_worker_events(
807
+ self,
808
+ worker_id: str,
809
+ *,
810
+ from_event_id: str | None = None,
811
+ limit: int | None = None,
812
+ ) -> list[dict[str, Any]]:
813
+ worker = self.get_worker(worker_id)
814
+ if worker is None:
815
+ raise SubagentError(
816
+ code="WORKER_NOT_FOUND",
817
+ message=f"Worker not found: {worker_id}",
818
+ details={"workerId": worker_id},
819
+ )
820
+ with self.connection() as conn:
821
+ cursor_seq = self._resolve_event_cursor_seq(
822
+ conn,
823
+ worker_id=worker_id,
824
+ from_event_id=from_event_id,
825
+ )
826
+ query = (
827
+ "SELECT * FROM worker_events WHERE worker_id = ? AND event_seq > ? "
828
+ "ORDER BY event_seq ASC"
829
+ )
830
+ params: list[Any] = [worker_id, cursor_seq]
831
+ if limit is not None:
832
+ query += " LIMIT ?"
833
+ params.append(limit)
834
+ rows = conn.execute(query, tuple(params)).fetchall()
835
+ events = [_deserialize_event_row(_row_to_dict(row)) for row in rows]
836
+ return [event for event in events if event is not None]
837
+
838
+ def get_latest_worker_event(self, worker_id: str) -> dict[str, Any] | None:
839
+ worker = self.get_worker(worker_id)
840
+ if worker is None:
841
+ raise SubagentError(
842
+ code="WORKER_NOT_FOUND",
843
+ message=f"Worker not found: {worker_id}",
844
+ details={"workerId": worker_id},
845
+ )
846
+ with self.connection() as conn:
847
+ row = conn.execute(
848
+ "SELECT * FROM worker_events WHERE worker_id = ? ORDER BY event_seq DESC LIMIT 1",
849
+ (worker_id,),
850
+ ).fetchone()
851
+ return _deserialize_event_row(_row_to_dict(row))
852
+
853
+ def create_approval_request(
854
+ self,
855
+ worker_id: str,
856
+ *,
857
+ turn_id: str | None,
858
+ message: str,
859
+ kind: str = "tool.call",
860
+ options: list[dict[str, Any]] | None = None,
861
+ ) -> dict[str, Any]:
862
+ worker = self.get_worker(worker_id)
863
+ if worker is None:
864
+ raise SubagentError(
865
+ code="WORKER_NOT_FOUND",
866
+ message=f"Worker not found: {worker_id}",
867
+ details={"workerId": worker_id},
868
+ )
869
+ normalized_options = options or [
870
+ {"id": "allow", "alias": "allow", "label": "Allow"},
871
+ {"id": "deny", "alias": "deny", "label": "Deny"},
872
+ ]
873
+ request_id = f"ap_{uuid.uuid4().hex[:10]}"
874
+ created_at = utc_now()
875
+ with self.connection() as conn:
876
+ conn.execute(
877
+ """
878
+ INSERT INTO approval_requests(
879
+ request_id, worker_id, turn_id, status, kind, message, options_json, created_at
880
+ )
881
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
882
+ """,
883
+ (
884
+ request_id,
885
+ worker_id,
886
+ turn_id,
887
+ APPROVAL_STATUS_PENDING,
888
+ kind,
889
+ message,
890
+ json.dumps(normalized_options, ensure_ascii=False),
891
+ created_at,
892
+ ),
893
+ )
894
+ row = conn.execute(
895
+ "SELECT * FROM approval_requests WHERE request_id = ?",
896
+ (request_id,),
897
+ ).fetchone()
898
+ return _deserialize_approval_row(_row_to_dict(row)) or {}
899
+
900
+ def get_approval_request(self, worker_id: str, request_id: str) -> dict[str, Any] | None:
901
+ with self.connection() as conn:
902
+ row = conn.execute(
903
+ """
904
+ SELECT *
905
+ FROM approval_requests
906
+ WHERE worker_id = ? AND request_id = ?
907
+ """,
908
+ (worker_id, request_id),
909
+ ).fetchone()
910
+ return _deserialize_approval_row(_row_to_dict(row))
911
+
912
+ def list_pending_approval_requests(self, worker_id: str) -> list[dict[str, Any]]:
913
+ worker = self.get_worker(worker_id)
914
+ if worker is None:
915
+ raise SubagentError(
916
+ code="WORKER_NOT_FOUND",
917
+ message=f"Worker not found: {worker_id}",
918
+ details={"workerId": worker_id},
919
+ )
920
+ with self.connection() as conn:
921
+ rows = conn.execute(
922
+ """
923
+ SELECT *
924
+ FROM approval_requests
925
+ WHERE worker_id = ? AND status = ?
926
+ ORDER BY created_at ASC
927
+ """,
928
+ (worker_id, APPROVAL_STATUS_PENDING),
929
+ ).fetchall()
930
+ requests = [_deserialize_approval_row(_row_to_dict(row)) for row in rows]
931
+ return [request for request in requests if request is not None]
932
+
933
+ def decide_approval_request(
934
+ self,
935
+ worker_id: str,
936
+ request_id: str,
937
+ *,
938
+ decision: str,
939
+ selected_option_id: str,
940
+ selected_alias: str | None,
941
+ note: str | None = None,
942
+ ) -> dict[str, Any]:
943
+ request = self.get_approval_request(worker_id, request_id)
944
+ if request is None:
945
+ raise SubagentError(
946
+ code="APPROVAL_NOT_FOUND",
947
+ message=f"Approval request not found: {request_id}",
948
+ details={"workerId": worker_id, "requestId": request_id},
949
+ )
950
+ if str(request["status"]) != APPROVAL_STATUS_PENDING:
951
+ raise SubagentError(
952
+ code="APPROVAL_NOT_PENDING",
953
+ message=f"Approval request is not pending: {request_id}",
954
+ details={"requestId": request_id, "status": request["status"]},
955
+ )
956
+ now = utc_now()
957
+ with self.connection() as conn:
958
+ conn.execute(
959
+ """
960
+ UPDATE approval_requests
961
+ SET status = ?, decided_at = ?, decision = ?, selected_option_id = ?, selected_alias = ?, note = ?
962
+ WHERE request_id = ? AND worker_id = ?
963
+ """,
964
+ (
965
+ APPROVAL_STATUS_DECIDED,
966
+ now,
967
+ decision,
968
+ selected_option_id,
969
+ selected_alias,
970
+ note,
971
+ request_id,
972
+ worker_id,
973
+ ),
974
+ )
975
+ row = conn.execute(
976
+ """
977
+ SELECT *
978
+ FROM approval_requests
979
+ WHERE request_id = ? AND worker_id = ?
980
+ """,
981
+ (request_id, worker_id),
982
+ ).fetchone()
983
+ return _deserialize_approval_row(_row_to_dict(row)) or {}
984
+
985
+ def register_handoff_snapshot(
986
+ self,
987
+ *,
988
+ worker_id: str,
989
+ source_turn_id: str | None,
990
+ handoff_path: str,
991
+ checkpoint_path: str,
992
+ ) -> dict[str, Any]:
993
+ worker = self.get_worker(worker_id)
994
+ if worker is None:
995
+ raise SubagentError(
996
+ code="WORKER_NOT_FOUND",
997
+ message=f"Worker not found: {worker_id}",
998
+ details={"workerId": worker_id},
999
+ )
1000
+ snapshot_id = f"hs_{uuid.uuid4().hex[:10]}"
1001
+ created_at = utc_now()
1002
+ with self.connection() as conn:
1003
+ conn.execute(
1004
+ """
1005
+ INSERT INTO handoff_snapshots(
1006
+ snapshot_id, worker_id, controller_id, source_turn_id, handoff_path, checkpoint_path, created_at
1007
+ )
1008
+ VALUES (?, ?, ?, ?, ?, ?, ?)
1009
+ """,
1010
+ (
1011
+ snapshot_id,
1012
+ worker_id,
1013
+ worker["controller_id"],
1014
+ source_turn_id,
1015
+ handoff_path,
1016
+ checkpoint_path,
1017
+ created_at,
1018
+ ),
1019
+ )
1020
+ row = conn.execute(
1021
+ """
1022
+ SELECT *
1023
+ FROM handoff_snapshots
1024
+ WHERE snapshot_id = ?
1025
+ """,
1026
+ (snapshot_id,),
1027
+ ).fetchone()
1028
+ return _row_to_dict(row) or {}
1029
+
1030
+ def get_latest_handoff_snapshot(self, worker_id: str) -> dict[str, Any] | None:
1031
+ worker = self.get_worker(worker_id)
1032
+ if worker is None:
1033
+ raise SubagentError(
1034
+ code="WORKER_NOT_FOUND",
1035
+ message=f"Worker not found: {worker_id}",
1036
+ details={"workerId": worker_id},
1037
+ )
1038
+ with self.connection() as conn:
1039
+ row = conn.execute(
1040
+ """
1041
+ SELECT *
1042
+ FROM handoff_snapshots
1043
+ WHERE worker_id = ?
1044
+ ORDER BY created_at DESC
1045
+ LIMIT 1
1046
+ """,
1047
+ (worker_id,),
1048
+ ).fetchone()
1049
+ return _row_to_dict(row)