zwarm 2.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/__init__.py +38 -0
- zwarm/adapters/__init__.py +21 -0
- zwarm/adapters/base.py +109 -0
- zwarm/adapters/claude_code.py +357 -0
- zwarm/adapters/codex_mcp.py +1262 -0
- zwarm/adapters/registry.py +69 -0
- zwarm/adapters/test_codex_mcp.py +274 -0
- zwarm/adapters/test_registry.py +68 -0
- zwarm/cli/__init__.py +0 -0
- zwarm/cli/main.py +2503 -0
- zwarm/core/__init__.py +0 -0
- zwarm/core/compact.py +329 -0
- zwarm/core/config.py +344 -0
- zwarm/core/environment.py +173 -0
- zwarm/core/models.py +315 -0
- zwarm/core/state.py +355 -0
- zwarm/core/test_compact.py +312 -0
- zwarm/core/test_config.py +160 -0
- zwarm/core/test_models.py +265 -0
- zwarm/orchestrator.py +683 -0
- zwarm/prompts/__init__.py +10 -0
- zwarm/prompts/orchestrator.py +230 -0
- zwarm/sessions/__init__.py +26 -0
- zwarm/sessions/manager.py +792 -0
- zwarm/test_orchestrator_watchers.py +23 -0
- zwarm/tools/__init__.py +17 -0
- zwarm/tools/delegation.py +784 -0
- zwarm/watchers/__init__.py +31 -0
- zwarm/watchers/base.py +131 -0
- zwarm/watchers/builtin.py +518 -0
- zwarm/watchers/llm_watcher.py +319 -0
- zwarm/watchers/manager.py +181 -0
- zwarm/watchers/registry.py +57 -0
- zwarm/watchers/test_watchers.py +237 -0
- zwarm-2.3.5.dist-info/METADATA +309 -0
- zwarm-2.3.5.dist-info/RECORD +38 -0
- zwarm-2.3.5.dist-info/WHEEL +4 -0
- zwarm-2.3.5.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,792 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Codex Session Manager - Background process management for Codex agents.
|
|
3
|
+
|
|
4
|
+
Architecture:
|
|
5
|
+
- Each session runs `codex exec --json` in a background subprocess
|
|
6
|
+
- Output is streamed to .zwarm/sessions/<session_id>/output.jsonl
|
|
7
|
+
- Session metadata stored in meta.json
|
|
8
|
+
- Can inject follow-up messages by starting new turns with context
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import signal
|
|
16
|
+
import subprocess
|
|
17
|
+
import time
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
from uuid import uuid4
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SessionStatus(str, Enum):
|
|
27
|
+
"""Status of a codex session."""
|
|
28
|
+
PENDING = "pending" # Created but not started
|
|
29
|
+
RUNNING = "running" # Process is running
|
|
30
|
+
COMPLETED = "completed" # Process exited successfully
|
|
31
|
+
FAILED = "failed" # Process exited with error
|
|
32
|
+
KILLED = "killed" # Manually killed
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class SessionMessage:
|
|
37
|
+
"""A message in a session's history."""
|
|
38
|
+
role: str # "user", "assistant", "system", "tool"
|
|
39
|
+
content: str
|
|
40
|
+
timestamp: str = ""
|
|
41
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict:
|
|
44
|
+
return {
|
|
45
|
+
"role": self.role,
|
|
46
|
+
"content": self.content,
|
|
47
|
+
"timestamp": self.timestamp,
|
|
48
|
+
"metadata": self.metadata,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_dict(cls, data: dict) -> "SessionMessage":
|
|
53
|
+
return cls(
|
|
54
|
+
role=data.get("role", "unknown"),
|
|
55
|
+
content=data.get("content", ""),
|
|
56
|
+
timestamp=data.get("timestamp", ""),
|
|
57
|
+
metadata=data.get("metadata", {}),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class CodexSession:
|
|
63
|
+
"""A managed Codex session."""
|
|
64
|
+
id: str
|
|
65
|
+
task: str
|
|
66
|
+
status: SessionStatus
|
|
67
|
+
working_dir: Path
|
|
68
|
+
created_at: str
|
|
69
|
+
updated_at: str
|
|
70
|
+
pid: int | None = None
|
|
71
|
+
exit_code: int | None = None
|
|
72
|
+
model: str = "gpt-5.1-codex-mini"
|
|
73
|
+
turn: int = 1
|
|
74
|
+
messages: list[SessionMessage] = field(default_factory=list)
|
|
75
|
+
token_usage: dict[str, int] = field(default_factory=dict)
|
|
76
|
+
error: str | None = None
|
|
77
|
+
# Source tracking: "user" for direct spawns, "orchestrator:<instance_id>" for delegated
|
|
78
|
+
source: str = "user"
|
|
79
|
+
# Adapter used: "codex", "claude_code", etc.
|
|
80
|
+
adapter: str = "codex"
|
|
81
|
+
|
|
82
|
+
def to_dict(self) -> dict:
|
|
83
|
+
return {
|
|
84
|
+
"id": self.id,
|
|
85
|
+
"task": self.task,
|
|
86
|
+
"status": self.status.value,
|
|
87
|
+
"working_dir": str(self.working_dir),
|
|
88
|
+
"created_at": self.created_at,
|
|
89
|
+
"updated_at": self.updated_at,
|
|
90
|
+
"pid": self.pid,
|
|
91
|
+
"exit_code": self.exit_code,
|
|
92
|
+
"model": self.model,
|
|
93
|
+
"turn": self.turn,
|
|
94
|
+
"messages": [m.to_dict() for m in self.messages],
|
|
95
|
+
"token_usage": self.token_usage,
|
|
96
|
+
"error": self.error,
|
|
97
|
+
"source": self.source,
|
|
98
|
+
"adapter": self.adapter,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def from_dict(cls, data: dict) -> "CodexSession":
|
|
103
|
+
return cls(
|
|
104
|
+
id=data["id"],
|
|
105
|
+
task=data["task"],
|
|
106
|
+
status=SessionStatus(data["status"]),
|
|
107
|
+
working_dir=Path(data["working_dir"]),
|
|
108
|
+
created_at=data["created_at"],
|
|
109
|
+
updated_at=data["updated_at"],
|
|
110
|
+
pid=data.get("pid"),
|
|
111
|
+
exit_code=data.get("exit_code"),
|
|
112
|
+
model=data.get("model", "gpt-5.1-codex-mini"),
|
|
113
|
+
turn=data.get("turn", 1),
|
|
114
|
+
messages=[SessionMessage.from_dict(m) for m in data.get("messages", [])],
|
|
115
|
+
token_usage=data.get("token_usage", {}),
|
|
116
|
+
error=data.get("error"),
|
|
117
|
+
source=data.get("source", "user"),
|
|
118
|
+
adapter=data.get("adapter", "codex"),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def is_running(self) -> bool:
|
|
123
|
+
"""Check if the session process is still running."""
|
|
124
|
+
if self.pid is None:
|
|
125
|
+
return False
|
|
126
|
+
try:
|
|
127
|
+
os.kill(self.pid, 0) # Signal 0 just checks if process exists
|
|
128
|
+
return True
|
|
129
|
+
except OSError:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def short_id(self) -> str:
|
|
134
|
+
"""Get first 8 chars of ID for display."""
|
|
135
|
+
return self.id[:8]
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def runtime(self) -> str:
|
|
139
|
+
"""Get human-readable runtime."""
|
|
140
|
+
created = datetime.fromisoformat(self.created_at)
|
|
141
|
+
now = datetime.now()
|
|
142
|
+
delta = now - created
|
|
143
|
+
|
|
144
|
+
if delta.total_seconds() < 60:
|
|
145
|
+
return f"{int(delta.total_seconds())}s"
|
|
146
|
+
elif delta.total_seconds() < 3600:
|
|
147
|
+
return f"{int(delta.total_seconds() / 60)}m"
|
|
148
|
+
else:
|
|
149
|
+
return f"{delta.total_seconds() / 3600:.1f}h"
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def source_display(self) -> str:
|
|
153
|
+
"""Get short display string for source."""
|
|
154
|
+
if self.source == "user":
|
|
155
|
+
return "you"
|
|
156
|
+
elif self.source.startswith("orchestrator:"):
|
|
157
|
+
# Extract instance ID and shorten it
|
|
158
|
+
instance_id = self.source.split(":", 1)[1]
|
|
159
|
+
return f"orch:{instance_id[:4]}"
|
|
160
|
+
else:
|
|
161
|
+
return self.source[:8]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class CodexSessionManager:
|
|
165
|
+
"""
|
|
166
|
+
Manages background Codex sessions.
|
|
167
|
+
|
|
168
|
+
Sessions are stored in:
|
|
169
|
+
.zwarm/sessions/<session_id>/
|
|
170
|
+
meta.json - Session metadata
|
|
171
|
+
output.jsonl - Raw JSONL output from codex exec
|
|
172
|
+
turns/
|
|
173
|
+
turn_1.jsonl
|
|
174
|
+
turn_2.jsonl
|
|
175
|
+
...
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def __init__(self, state_dir: Path | str = ".zwarm"):
|
|
179
|
+
self.state_dir = Path(state_dir)
|
|
180
|
+
self.sessions_dir = self.state_dir / "sessions"
|
|
181
|
+
self.sessions_dir.mkdir(parents=True, exist_ok=True)
|
|
182
|
+
|
|
183
|
+
def _session_dir(self, session_id: str) -> Path:
|
|
184
|
+
"""Get the directory for a session."""
|
|
185
|
+
return self.sessions_dir / session_id
|
|
186
|
+
|
|
187
|
+
def _meta_path(self, session_id: str) -> Path:
|
|
188
|
+
"""Get the metadata file path for a session."""
|
|
189
|
+
return self._session_dir(session_id) / "meta.json"
|
|
190
|
+
|
|
191
|
+
def _output_path(self, session_id: str, turn: int = 1) -> Path:
|
|
192
|
+
"""Get the output file path for a session turn."""
|
|
193
|
+
session_dir = self._session_dir(session_id)
|
|
194
|
+
turns_dir = session_dir / "turns"
|
|
195
|
+
turns_dir.mkdir(parents=True, exist_ok=True)
|
|
196
|
+
return turns_dir / f"turn_{turn}.jsonl"
|
|
197
|
+
|
|
198
|
+
def _save_session(self, session: CodexSession) -> None:
|
|
199
|
+
"""Save session metadata."""
|
|
200
|
+
session.updated_at = datetime.now().isoformat()
|
|
201
|
+
meta_path = self._meta_path(session.id)
|
|
202
|
+
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
meta_path.write_text(json.dumps(session.to_dict(), indent=2))
|
|
204
|
+
|
|
205
|
+
def _load_session(self, session_id: str) -> CodexSession | None:
|
|
206
|
+
"""Load session from disk."""
|
|
207
|
+
meta_path = self._meta_path(session_id)
|
|
208
|
+
if not meta_path.exists():
|
|
209
|
+
return None
|
|
210
|
+
try:
|
|
211
|
+
data = json.loads(meta_path.read_text())
|
|
212
|
+
return CodexSession.from_dict(data)
|
|
213
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
214
|
+
print(f"Error loading session {session_id}: {e}")
|
|
215
|
+
return None
|
|
216
|
+
|
|
217
|
+
def list_sessions(self, status: SessionStatus | None = None) -> list[CodexSession]:
|
|
218
|
+
"""List all sessions, optionally filtered by status."""
|
|
219
|
+
sessions = []
|
|
220
|
+
if not self.sessions_dir.exists():
|
|
221
|
+
return sessions
|
|
222
|
+
|
|
223
|
+
for session_dir in self.sessions_dir.iterdir():
|
|
224
|
+
if not session_dir.is_dir():
|
|
225
|
+
continue
|
|
226
|
+
session = self._load_session(session_dir.name)
|
|
227
|
+
if session:
|
|
228
|
+
# Update status if process died OR output indicates completion
|
|
229
|
+
# (output check is more reliable than PID check due to PID reuse)
|
|
230
|
+
if session.status == SessionStatus.RUNNING:
|
|
231
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
232
|
+
self._update_session_status(session)
|
|
233
|
+
|
|
234
|
+
if status is None or session.status == status:
|
|
235
|
+
sessions.append(session)
|
|
236
|
+
|
|
237
|
+
# Sort by created_at descending (newest first)
|
|
238
|
+
sessions.sort(key=lambda s: s.created_at, reverse=True)
|
|
239
|
+
return sessions
|
|
240
|
+
|
|
241
|
+
def get_session(self, session_id: str) -> CodexSession | None:
|
|
242
|
+
"""Get a session by ID (supports partial ID matching)."""
|
|
243
|
+
# Try exact match first
|
|
244
|
+
session = self._load_session(session_id)
|
|
245
|
+
if session:
|
|
246
|
+
if session.status == SessionStatus.RUNNING:
|
|
247
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
248
|
+
self._update_session_status(session)
|
|
249
|
+
return session
|
|
250
|
+
|
|
251
|
+
# Try partial match
|
|
252
|
+
for session_dir in self.sessions_dir.iterdir():
|
|
253
|
+
if session_dir.name.startswith(session_id):
|
|
254
|
+
session = self._load_session(session_dir.name)
|
|
255
|
+
if session:
|
|
256
|
+
if session.status == SessionStatus.RUNNING:
|
|
257
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
258
|
+
self._update_session_status(session)
|
|
259
|
+
return session
|
|
260
|
+
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
def _is_output_complete(self, session_id: str, turn: int) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Check if output file indicates the task completed.
|
|
266
|
+
|
|
267
|
+
Looks for completion markers like 'turn.completed' or 'task.completed'
|
|
268
|
+
in the JSONL output. This is more reliable than PID checking.
|
|
269
|
+
"""
|
|
270
|
+
output_path = self._output_path(session_id, turn)
|
|
271
|
+
if not output_path.exists():
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
content = output_path.read_text()
|
|
276
|
+
for line in content.strip().split("\n"):
|
|
277
|
+
if not line.strip():
|
|
278
|
+
continue
|
|
279
|
+
try:
|
|
280
|
+
event = json.loads(line)
|
|
281
|
+
event_type = event.get("type", "")
|
|
282
|
+
# Check for any completion marker
|
|
283
|
+
if event_type in ("turn.completed", "task.completed", "completed", "done"):
|
|
284
|
+
return True
|
|
285
|
+
# Also check for error as a form of completion
|
|
286
|
+
if event_type == "error":
|
|
287
|
+
return True
|
|
288
|
+
except json.JSONDecodeError:
|
|
289
|
+
continue
|
|
290
|
+
except Exception:
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
def _update_session_status(self, session: CodexSession) -> None:
|
|
296
|
+
"""Update session status after process completion."""
|
|
297
|
+
# Parse output to determine status
|
|
298
|
+
output_path = self._output_path(session.id, session.turn)
|
|
299
|
+
if output_path.exists():
|
|
300
|
+
messages, usage, error = self._parse_output(output_path)
|
|
301
|
+
session.messages = messages
|
|
302
|
+
session.token_usage = usage
|
|
303
|
+
|
|
304
|
+
# Check if we got actual assistant responses
|
|
305
|
+
has_response = any(m.role == "assistant" for m in messages)
|
|
306
|
+
|
|
307
|
+
if error and not has_response:
|
|
308
|
+
# Only mark as failed if we have an error AND no response
|
|
309
|
+
session.status = SessionStatus.FAILED
|
|
310
|
+
session.error = error
|
|
311
|
+
elif error and has_response:
|
|
312
|
+
# Got response but also an error (e.g., network disconnect at end)
|
|
313
|
+
# Treat as completed but note the error
|
|
314
|
+
session.status = SessionStatus.COMPLETED
|
|
315
|
+
session.error = f"Completed with error: {error}"
|
|
316
|
+
else:
|
|
317
|
+
session.status = SessionStatus.COMPLETED
|
|
318
|
+
else:
|
|
319
|
+
session.status = SessionStatus.FAILED
|
|
320
|
+
session.error = "No output file found"
|
|
321
|
+
|
|
322
|
+
self._save_session(session)
|
|
323
|
+
|
|
324
|
+
def start_session(
|
|
325
|
+
self,
|
|
326
|
+
task: str,
|
|
327
|
+
working_dir: Path | None = None,
|
|
328
|
+
model: str = "gpt-5.1-codex-mini",
|
|
329
|
+
sandbox: str = "workspace-write",
|
|
330
|
+
source: str = "user",
|
|
331
|
+
adapter: str = "codex",
|
|
332
|
+
) -> CodexSession:
|
|
333
|
+
"""
|
|
334
|
+
Start a new Codex session in the background.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
task: The task description
|
|
338
|
+
working_dir: Working directory for codex (default: cwd)
|
|
339
|
+
model: Model to use
|
|
340
|
+
sandbox: Sandbox mode
|
|
341
|
+
source: Who spawned this session ("user" or "orchestrator:<id>")
|
|
342
|
+
adapter: Which adapter to use ("codex", "claude_code")
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
The created session
|
|
346
|
+
"""
|
|
347
|
+
session_id = str(uuid4())
|
|
348
|
+
working_dir = working_dir or Path.cwd()
|
|
349
|
+
now = datetime.now().isoformat()
|
|
350
|
+
|
|
351
|
+
session = CodexSession(
|
|
352
|
+
id=session_id,
|
|
353
|
+
task=task,
|
|
354
|
+
status=SessionStatus.PENDING,
|
|
355
|
+
working_dir=working_dir,
|
|
356
|
+
created_at=now,
|
|
357
|
+
updated_at=now,
|
|
358
|
+
model=model,
|
|
359
|
+
turn=1,
|
|
360
|
+
messages=[SessionMessage(role="user", content=task, timestamp=now)],
|
|
361
|
+
source=source,
|
|
362
|
+
adapter=adapter,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Create session directory
|
|
366
|
+
session_dir = self._session_dir(session_id)
|
|
367
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
368
|
+
|
|
369
|
+
# Build command
|
|
370
|
+
cmd = [
|
|
371
|
+
"codex", "exec",
|
|
372
|
+
"--json",
|
|
373
|
+
"--full-auto",
|
|
374
|
+
"--skip-git-repo-check",
|
|
375
|
+
"--model", model,
|
|
376
|
+
"-C", str(working_dir.absolute()),
|
|
377
|
+
]
|
|
378
|
+
|
|
379
|
+
# Add sandbox mode
|
|
380
|
+
if sandbox == "danger-full-access":
|
|
381
|
+
cmd.append("--dangerously-bypass-approvals-and-sandbox")
|
|
382
|
+
elif sandbox == "workspace-write":
|
|
383
|
+
# Default codex behavior
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
cmd.extend(["--", task])
|
|
387
|
+
|
|
388
|
+
# Start process with output redirected to file
|
|
389
|
+
output_path = self._output_path(session_id, 1)
|
|
390
|
+
output_file = open(output_path, "w")
|
|
391
|
+
|
|
392
|
+
proc = subprocess.Popen(
|
|
393
|
+
cmd,
|
|
394
|
+
cwd=working_dir,
|
|
395
|
+
stdout=output_file,
|
|
396
|
+
stderr=subprocess.STDOUT,
|
|
397
|
+
start_new_session=True, # Detach from parent process group
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
session.pid = proc.pid
|
|
401
|
+
session.status = SessionStatus.RUNNING
|
|
402
|
+
self._save_session(session)
|
|
403
|
+
|
|
404
|
+
return session
|
|
405
|
+
|
|
406
|
+
def inject_message(
|
|
407
|
+
self,
|
|
408
|
+
session_id: str,
|
|
409
|
+
message: str,
|
|
410
|
+
) -> CodexSession | None:
|
|
411
|
+
"""
|
|
412
|
+
Inject a follow-up message into a completed session.
|
|
413
|
+
|
|
414
|
+
This starts a new turn with the conversation context.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
session_id: Session to continue
|
|
418
|
+
message: The follow-up message
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
Updated session or None if not found/not ready
|
|
422
|
+
"""
|
|
423
|
+
session = self.get_session(session_id)
|
|
424
|
+
if not session:
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
if session.status == SessionStatus.RUNNING:
|
|
428
|
+
# Can't inject while running - would need to implement
|
|
429
|
+
# a more complex IPC mechanism for that
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
# Build context from previous messages
|
|
433
|
+
context_parts = []
|
|
434
|
+
for msg in session.messages:
|
|
435
|
+
if msg.role == "user":
|
|
436
|
+
context_parts.append(f"USER: {msg.content}")
|
|
437
|
+
elif msg.role == "assistant":
|
|
438
|
+
context_parts.append(f"ASSISTANT: {msg.content}")
|
|
439
|
+
|
|
440
|
+
# Create augmented prompt with context
|
|
441
|
+
augmented_task = f"""Continue the following conversation:
|
|
442
|
+
|
|
443
|
+
{chr(10).join(context_parts)}
|
|
444
|
+
|
|
445
|
+
USER: {message}
|
|
446
|
+
|
|
447
|
+
Continue from where you left off, addressing the user's new message."""
|
|
448
|
+
|
|
449
|
+
# Start new turn
|
|
450
|
+
session.turn += 1
|
|
451
|
+
now = datetime.now().isoformat()
|
|
452
|
+
session.messages.append(SessionMessage(role="user", content=message, timestamp=now))
|
|
453
|
+
|
|
454
|
+
# Build command
|
|
455
|
+
cmd = [
|
|
456
|
+
"codex", "exec",
|
|
457
|
+
"--json",
|
|
458
|
+
"--full-auto",
|
|
459
|
+
"--skip-git-repo-check",
|
|
460
|
+
"--model", session.model,
|
|
461
|
+
"-C", str(session.working_dir.absolute()),
|
|
462
|
+
"--", augmented_task,
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
# Start process
|
|
466
|
+
output_path = self._output_path(session.id, session.turn)
|
|
467
|
+
output_file = open(output_path, "w")
|
|
468
|
+
|
|
469
|
+
proc = subprocess.Popen(
|
|
470
|
+
cmd,
|
|
471
|
+
cwd=session.working_dir,
|
|
472
|
+
stdout=output_file,
|
|
473
|
+
stderr=subprocess.STDOUT,
|
|
474
|
+
start_new_session=True,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
session.pid = proc.pid
|
|
478
|
+
session.status = SessionStatus.RUNNING
|
|
479
|
+
self._save_session(session)
|
|
480
|
+
|
|
481
|
+
return session
|
|
482
|
+
|
|
483
|
+
def kill_session(self, session_id: str, delete: bool = False) -> bool:
|
|
484
|
+
"""
|
|
485
|
+
Kill a running session.
|
|
486
|
+
|
|
487
|
+
Args:
|
|
488
|
+
session_id: Session to kill
|
|
489
|
+
delete: If True, also delete session data entirely
|
|
490
|
+
|
|
491
|
+
Returns True if killed, False if not found or not running.
|
|
492
|
+
"""
|
|
493
|
+
session = self.get_session(session_id)
|
|
494
|
+
if not session:
|
|
495
|
+
return False
|
|
496
|
+
|
|
497
|
+
if session.pid and session.is_running:
|
|
498
|
+
try:
|
|
499
|
+
# Kill the entire process group
|
|
500
|
+
os.killpg(os.getpgid(session.pid), signal.SIGTERM)
|
|
501
|
+
time.sleep(0.5)
|
|
502
|
+
|
|
503
|
+
# Force kill if still running
|
|
504
|
+
if session.is_running:
|
|
505
|
+
os.killpg(os.getpgid(session.pid), signal.SIGKILL)
|
|
506
|
+
except (OSError, ProcessLookupError):
|
|
507
|
+
pass
|
|
508
|
+
|
|
509
|
+
if delete:
|
|
510
|
+
return self.delete_session(session.id)
|
|
511
|
+
|
|
512
|
+
session.status = SessionStatus.KILLED
|
|
513
|
+
session.error = "Manually killed"
|
|
514
|
+
self._save_session(session)
|
|
515
|
+
return True
|
|
516
|
+
|
|
517
|
+
def delete_session(self, session_id: str) -> bool:
|
|
518
|
+
"""
|
|
519
|
+
Delete a session entirely (removes from disk).
|
|
520
|
+
|
|
521
|
+
Kills the process first if still running.
|
|
522
|
+
|
|
523
|
+
Returns True if deleted, False if not found.
|
|
524
|
+
"""
|
|
525
|
+
import shutil
|
|
526
|
+
|
|
527
|
+
session = self.get_session(session_id)
|
|
528
|
+
if not session:
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
# Kill if running
|
|
532
|
+
if session.pid and session.is_running:
|
|
533
|
+
try:
|
|
534
|
+
os.killpg(os.getpgid(session.pid), signal.SIGTERM)
|
|
535
|
+
time.sleep(0.3)
|
|
536
|
+
if session.is_running:
|
|
537
|
+
os.killpg(os.getpgid(session.pid), signal.SIGKILL)
|
|
538
|
+
except (OSError, ProcessLookupError):
|
|
539
|
+
pass
|
|
540
|
+
|
|
541
|
+
# Remove session directory
|
|
542
|
+
session_dir = self._session_dir(session.id)
|
|
543
|
+
if session_dir.exists():
|
|
544
|
+
shutil.rmtree(session_dir)
|
|
545
|
+
return True
|
|
546
|
+
|
|
547
|
+
return False
|
|
548
|
+
|
|
549
|
+
def get_output(self, session_id: str, turn: int | None = None) -> str:
|
|
550
|
+
"""Get raw JSONL output for a session."""
|
|
551
|
+
session = self.get_session(session_id)
|
|
552
|
+
if not session:
|
|
553
|
+
return ""
|
|
554
|
+
|
|
555
|
+
if turn is None:
|
|
556
|
+
turn = session.turn
|
|
557
|
+
|
|
558
|
+
output_path = self._output_path(session.id, turn)
|
|
559
|
+
if not output_path.exists():
|
|
560
|
+
return ""
|
|
561
|
+
|
|
562
|
+
return output_path.read_text()
|
|
563
|
+
|
|
564
|
+
def get_messages(self, session_id: str) -> list[SessionMessage]:
|
|
565
|
+
"""Get parsed messages for a session across all turns."""
|
|
566
|
+
session = self.get_session(session_id)
|
|
567
|
+
if not session:
|
|
568
|
+
return []
|
|
569
|
+
|
|
570
|
+
all_messages = []
|
|
571
|
+
|
|
572
|
+
# Get messages from each turn
|
|
573
|
+
for turn in range(1, session.turn + 1):
|
|
574
|
+
output_path = self._output_path(session.id, turn)
|
|
575
|
+
if output_path.exists():
|
|
576
|
+
messages, _, _ = self._parse_output(output_path)
|
|
577
|
+
all_messages.extend(messages)
|
|
578
|
+
|
|
579
|
+
return all_messages
|
|
580
|
+
|
|
581
|
+
def _parse_output(self, output_path: Path) -> tuple[list[SessionMessage], dict[str, int], str | None]:
|
|
582
|
+
"""
|
|
583
|
+
Parse JSONL output from codex exec.
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
(messages, token_usage, error)
|
|
587
|
+
"""
|
|
588
|
+
messages: list[SessionMessage] = []
|
|
589
|
+
usage: dict[str, int] = {}
|
|
590
|
+
error: str | None = None
|
|
591
|
+
|
|
592
|
+
if not output_path.exists():
|
|
593
|
+
return messages, usage, "Output file not found"
|
|
594
|
+
|
|
595
|
+
content = output_path.read_text()
|
|
596
|
+
|
|
597
|
+
for line in content.strip().split("\n"):
|
|
598
|
+
if not line.strip():
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
event = json.loads(line)
|
|
603
|
+
except json.JSONDecodeError:
|
|
604
|
+
continue
|
|
605
|
+
|
|
606
|
+
event_type = event.get("type", "")
|
|
607
|
+
|
|
608
|
+
if event_type == "item.completed":
|
|
609
|
+
item = event.get("item", {})
|
|
610
|
+
item_type = item.get("type", "")
|
|
611
|
+
|
|
612
|
+
if item_type == "agent_message":
|
|
613
|
+
text = item.get("text", "")
|
|
614
|
+
if text:
|
|
615
|
+
messages.append(SessionMessage(
|
|
616
|
+
role="assistant",
|
|
617
|
+
content=text,
|
|
618
|
+
timestamp=datetime.now().isoformat(),
|
|
619
|
+
))
|
|
620
|
+
|
|
621
|
+
elif item_type == "reasoning":
|
|
622
|
+
# Could optionally capture reasoning
|
|
623
|
+
pass
|
|
624
|
+
|
|
625
|
+
elif item_type == "function_call":
|
|
626
|
+
# Track tool calls
|
|
627
|
+
func_name = item.get("name", "unknown")
|
|
628
|
+
messages.append(SessionMessage(
|
|
629
|
+
role="tool",
|
|
630
|
+
content=f"[Calling: {func_name}]",
|
|
631
|
+
metadata={"function": func_name},
|
|
632
|
+
))
|
|
633
|
+
|
|
634
|
+
elif item_type == "function_call_output":
|
|
635
|
+
output = item.get("output", "")
|
|
636
|
+
if output and len(output) < 500:
|
|
637
|
+
messages.append(SessionMessage(
|
|
638
|
+
role="tool",
|
|
639
|
+
content=f"[Output]: {output[:500]}",
|
|
640
|
+
))
|
|
641
|
+
|
|
642
|
+
elif event_type == "turn.completed":
|
|
643
|
+
turn_usage = event.get("usage", {})
|
|
644
|
+
for key, value in turn_usage.items():
|
|
645
|
+
usage[key] = usage.get(key, 0) + value
|
|
646
|
+
# Compute total_tokens if not present
|
|
647
|
+
if "total_tokens" not in usage:
|
|
648
|
+
usage["total_tokens"] = usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
|
649
|
+
|
|
650
|
+
elif event_type == "error":
|
|
651
|
+
error = event.get("message", str(event))
|
|
652
|
+
|
|
653
|
+
return messages, usage, error
|
|
654
|
+
|
|
655
|
+
def get_trajectory(self, session_id: str, full: bool = False, max_output_len: int = 200) -> list[dict]:
|
|
656
|
+
"""
|
|
657
|
+
Get the full trajectory of a session - all steps in order.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
session_id: Session to get trajectory for
|
|
661
|
+
full: If True, include full untruncated content
|
|
662
|
+
max_output_len: Max length for outputs when full=False
|
|
663
|
+
|
|
664
|
+
Returns a list of step dicts with type, summary, and details.
|
|
665
|
+
This shows the "broad strokes" of what the agent did.
|
|
666
|
+
"""
|
|
667
|
+
if full:
|
|
668
|
+
max_output_len = 999999 # Effectively unlimited
|
|
669
|
+
session = self.get_session(session_id)
|
|
670
|
+
if not session:
|
|
671
|
+
return []
|
|
672
|
+
|
|
673
|
+
trajectory = []
|
|
674
|
+
|
|
675
|
+
for turn in range(1, session.turn + 1):
|
|
676
|
+
output_path = self._output_path(session.id, turn)
|
|
677
|
+
if not output_path.exists():
|
|
678
|
+
continue
|
|
679
|
+
|
|
680
|
+
content = output_path.read_text()
|
|
681
|
+
step_num = 0
|
|
682
|
+
|
|
683
|
+
for line in content.strip().split("\n"):
|
|
684
|
+
if not line.strip():
|
|
685
|
+
continue
|
|
686
|
+
|
|
687
|
+
try:
|
|
688
|
+
event = json.loads(line)
|
|
689
|
+
except json.JSONDecodeError:
|
|
690
|
+
continue
|
|
691
|
+
|
|
692
|
+
event_type = event.get("type", "")
|
|
693
|
+
|
|
694
|
+
if event_type == "item.completed":
|
|
695
|
+
item = event.get("item", {})
|
|
696
|
+
item_type = item.get("type", "")
|
|
697
|
+
step_num += 1
|
|
698
|
+
|
|
699
|
+
if item_type == "reasoning":
|
|
700
|
+
text = item.get("text", "")
|
|
701
|
+
summary_len = max_output_len if full else 100
|
|
702
|
+
trajectory.append({
|
|
703
|
+
"turn": turn,
|
|
704
|
+
"step": step_num,
|
|
705
|
+
"type": "reasoning",
|
|
706
|
+
"summary": text[:summary_len] + ("..." if len(text) > summary_len else ""),
|
|
707
|
+
"full_text": text if full else None,
|
|
708
|
+
})
|
|
709
|
+
|
|
710
|
+
elif item_type == "command_execution":
|
|
711
|
+
cmd = item.get("command", "")
|
|
712
|
+
output = item.get("aggregated_output", "")
|
|
713
|
+
exit_code = item.get("exit_code")
|
|
714
|
+
# Truncate output
|
|
715
|
+
output_preview = output[:max_output_len]
|
|
716
|
+
if len(output) > max_output_len:
|
|
717
|
+
output_preview += "..."
|
|
718
|
+
trajectory.append({
|
|
719
|
+
"turn": turn,
|
|
720
|
+
"step": step_num,
|
|
721
|
+
"type": "command",
|
|
722
|
+
"command": cmd,
|
|
723
|
+
"output": output_preview.strip(),
|
|
724
|
+
"exit_code": exit_code,
|
|
725
|
+
})
|
|
726
|
+
|
|
727
|
+
elif item_type == "function_call":
|
|
728
|
+
func_name = item.get("name", "unknown")
|
|
729
|
+
args = item.get("arguments", {})
|
|
730
|
+
args_str = str(args)
|
|
731
|
+
args_len = max_output_len if full else 100
|
|
732
|
+
trajectory.append({
|
|
733
|
+
"turn": turn,
|
|
734
|
+
"step": step_num,
|
|
735
|
+
"type": "tool_call",
|
|
736
|
+
"tool": func_name,
|
|
737
|
+
"args_preview": args_str[:args_len] + ("..." if len(args_str) > args_len else ""),
|
|
738
|
+
"full_args": args if full else None,
|
|
739
|
+
})
|
|
740
|
+
|
|
741
|
+
elif item_type == "function_call_output":
|
|
742
|
+
output = item.get("output", "")
|
|
743
|
+
output_preview = output[:max_output_len]
|
|
744
|
+
if len(output) > max_output_len:
|
|
745
|
+
output_preview += "..."
|
|
746
|
+
trajectory.append({
|
|
747
|
+
"turn": turn,
|
|
748
|
+
"step": step_num,
|
|
749
|
+
"type": "tool_output",
|
|
750
|
+
"output": output_preview,
|
|
751
|
+
})
|
|
752
|
+
|
|
753
|
+
elif item_type == "agent_message":
|
|
754
|
+
text = item.get("text", "")
|
|
755
|
+
summary_len = max_output_len if full else 200
|
|
756
|
+
trajectory.append({
|
|
757
|
+
"turn": turn,
|
|
758
|
+
"step": step_num,
|
|
759
|
+
"type": "message",
|
|
760
|
+
"summary": text[:summary_len] + ("..." if len(text) > summary_len else ""),
|
|
761
|
+
"full_text": text if full else None,
|
|
762
|
+
"full_length": len(text),
|
|
763
|
+
})
|
|
764
|
+
|
|
765
|
+
return trajectory
|
|
766
|
+
|
|
767
|
+
def cleanup_completed(self, keep_days: int = 7) -> int:
|
|
768
|
+
"""
|
|
769
|
+
Remove old completed/failed/killed sessions.
|
|
770
|
+
|
|
771
|
+
Args:
|
|
772
|
+
keep_days: Keep sessions newer than this many days
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
Number of sessions cleaned up
|
|
776
|
+
"""
|
|
777
|
+
import shutil
|
|
778
|
+
from datetime import timedelta
|
|
779
|
+
|
|
780
|
+
cutoff = datetime.now() - timedelta(days=keep_days)
|
|
781
|
+
cleaned = 0
|
|
782
|
+
|
|
783
|
+
for session in self.list_sessions():
|
|
784
|
+
if session.status in (SessionStatus.COMPLETED, SessionStatus.FAILED, SessionStatus.KILLED):
|
|
785
|
+
created = datetime.fromisoformat(session.created_at)
|
|
786
|
+
if created < cutoff:
|
|
787
|
+
session_dir = self._session_dir(session.id)
|
|
788
|
+
if session_dir.exists():
|
|
789
|
+
shutil.rmtree(session_dir)
|
|
790
|
+
cleaned += 1
|
|
791
|
+
|
|
792
|
+
return cleaned
|