trajectoriz 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trajectoriz/__init__.py +685 -0
- trajectoriz/cli.py +377 -0
- trajectoriz-0.2.0.dist-info/METADATA +85 -0
- trajectoriz-0.2.0.dist-info/RECORD +8 -0
- trajectoriz-0.2.0.dist-info/WHEEL +5 -0
- trajectoriz-0.2.0.dist-info/entry_points.txt +2 -0
- trajectoriz-0.2.0.dist-info/licenses/LICENSE +21 -0
- trajectoriz-0.2.0.dist-info/top_level.txt +1 -0
trajectoriz/__init__.py
ADDED
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
"""trajectoriz: locate and parse agent trajectory files on the local machine."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sqlite3
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def iter_claude_trajectories(claude_dir=None):
|
|
14
|
+
"""Yield all Claude Code trajectory JSONL paths."""
|
|
15
|
+
d = Path(claude_dir) if claude_dir else Path.home() / ".claude"
|
|
16
|
+
if d.is_dir():
|
|
17
|
+
yield from sorted(d.glob("projects/**/*.jsonl"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def claude_project_dir(repo_root: str, claude_dir=None) -> Path:
|
|
21
|
+
"""Return the Claude Code project directory for a given repo root."""
|
|
22
|
+
d = Path(claude_dir) if claude_dir else Path.home() / ".claude"
|
|
23
|
+
slug = re.sub(r"[^a-zA-Z0-9]", "-", repo_root)
|
|
24
|
+
return d / "projects" / slug
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def iter_claude_project_trajectories(repo_root: str, claude_dir=None):
|
|
28
|
+
"""Yield Claude Code trajectory JSONL paths for a specific project."""
|
|
29
|
+
d = claude_project_dir(repo_root, claude_dir)
|
|
30
|
+
if d.is_dir():
|
|
31
|
+
yield from sorted(d.glob("*.jsonl"))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def iter_codex_trajectories(codex_dir=None):
|
|
35
|
+
"""Yield all Codex CLI session JSONL paths."""
|
|
36
|
+
d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
|
|
37
|
+
base = d / "sessions"
|
|
38
|
+
if base.is_dir():
|
|
39
|
+
yield from sorted(base.rglob("*.jsonl"))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def iter_codex_rollout_files(codex_dir=None):
|
|
43
|
+
"""Yield Codex CLI rollout JSONL paths (rollout-*.jsonl files only)."""
|
|
44
|
+
d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
|
|
45
|
+
base = d / "sessions"
|
|
46
|
+
if base.is_dir():
|
|
47
|
+
yield from sorted(base.rglob("rollout-*.jsonl"))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def iter_pi_trajectories(pi_dir=None):
|
|
51
|
+
"""Yield all pi coding agent session JSONL paths."""
|
|
52
|
+
if pi_dir:
|
|
53
|
+
d = Path(pi_dir) / "sessions"
|
|
54
|
+
else:
|
|
55
|
+
env = os.environ.get("PI_CODING_AGENT_DIR")
|
|
56
|
+
d = Path(env) / "sessions" if env else Path.home() / ".pi" / "agent" / "sessions"
|
|
57
|
+
if d.is_dir():
|
|
58
|
+
yield from sorted(d.rglob("*.jsonl"))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def iter_cursor_trajectories(cursor_dir=None):
|
|
62
|
+
"""Yield all Cursor trajectory JSONL paths."""
|
|
63
|
+
d = Path(cursor_dir) if cursor_dir else Path.home() / ".cursor"
|
|
64
|
+
if not d.is_dir():
|
|
65
|
+
return
|
|
66
|
+
seen = set()
|
|
67
|
+
for pattern in ("sessions/**/*.jsonl", "projects/**/*.jsonl"):
|
|
68
|
+
for p in sorted(d.glob(pattern)):
|
|
69
|
+
if p not in seen:
|
|
70
|
+
seen.add(p)
|
|
71
|
+
yield p
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def iter_copilot_event_trajectories(copilot_dir=None):
|
|
75
|
+
"""Yield Copilot CLI session event JSONL paths (~/.copilot/session-state/*/events.jsonl)."""
|
|
76
|
+
d = Path(copilot_dir) if copilot_dir else Path.home() / ".copilot"
|
|
77
|
+
base = d / "session-state"
|
|
78
|
+
if base.is_dir():
|
|
79
|
+
yield from sorted(base.glob("*/events.jsonl"))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def iter_agent_probe_trajectories(agent_probe_dir=None):
|
|
83
|
+
"""Yield all agent_probe session JSONL paths (~/.local/share/agent_probe/*/*/*. jsonl)."""
|
|
84
|
+
d = (
|
|
85
|
+
Path(agent_probe_dir)
|
|
86
|
+
if agent_probe_dir
|
|
87
|
+
else Path.home() / ".local" / "share" / "agent_probe"
|
|
88
|
+
)
|
|
89
|
+
if d.is_dir():
|
|
90
|
+
yield from sorted(d.glob("*/*/*.jsonl"))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def iter_opencode_sessions(opencode_dir=None):
|
|
94
|
+
"""Yield (id, updated_at_ms, model_json, directory, first_prompt) from the opencode SQLite store."""
|
|
95
|
+
d = (
|
|
96
|
+
Path(opencode_dir)
|
|
97
|
+
if opencode_dir
|
|
98
|
+
else Path.home() / ".local" / "share" / "opencode"
|
|
99
|
+
)
|
|
100
|
+
db = d / "opencode.db"
|
|
101
|
+
if not db.exists():
|
|
102
|
+
return
|
|
103
|
+
try:
|
|
104
|
+
conn = sqlite3.connect(str(db))
|
|
105
|
+
try:
|
|
106
|
+
rows = conn.execute(
|
|
107
|
+
"SELECT id, time_updated, model, directory FROM session ORDER BY time_updated DESC"
|
|
108
|
+
).fetchall()
|
|
109
|
+
for session_id, ts_ms, model_json, directory in rows:
|
|
110
|
+
first_prompt = ""
|
|
111
|
+
try:
|
|
112
|
+
row = conn.execute(
|
|
113
|
+
"""
|
|
114
|
+
SELECT p.data
|
|
115
|
+
FROM message m
|
|
116
|
+
JOIN part p ON m.id = p.message_id
|
|
117
|
+
WHERE m.session_id = ? AND json_extract(m.data, '$.role') = 'user'
|
|
118
|
+
ORDER BY m.time_created, p.time_created
|
|
119
|
+
LIMIT 1
|
|
120
|
+
""",
|
|
121
|
+
(session_id,),
|
|
122
|
+
).fetchone()
|
|
123
|
+
if row:
|
|
124
|
+
first_prompt = json.loads(row[0]).get("text", "").strip()
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
yield (session_id, ts_ms, model_json, directory, first_prompt)
|
|
128
|
+
finally:
|
|
129
|
+
conn.close()
|
|
130
|
+
except Exception:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def iter_codex_db_sessions(codex_dir=None):
|
|
135
|
+
"""Yield (id, updated_at_ms, first_user_message, model_provider, model, cwd) from ~/.codex/state_5.sqlite."""
|
|
136
|
+
d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
|
|
137
|
+
db = d / "state_5.sqlite"
|
|
138
|
+
if not db.exists():
|
|
139
|
+
return
|
|
140
|
+
try:
|
|
141
|
+
conn = sqlite3.connect(str(db))
|
|
142
|
+
try:
|
|
143
|
+
rows = conn.execute(
|
|
144
|
+
"SELECT id, updated_at_ms, first_user_message, model_provider, model, cwd"
|
|
145
|
+
" FROM threads ORDER BY updated_at_ms DESC"
|
|
146
|
+
).fetchall()
|
|
147
|
+
yield from rows
|
|
148
|
+
finally:
|
|
149
|
+
conn.close()
|
|
150
|
+
except Exception:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _extract_content_text(content) -> str:
|
|
155
|
+
if isinstance(content, str):
|
|
156
|
+
return content.strip()
|
|
157
|
+
if isinstance(content, list):
|
|
158
|
+
return " ".join(
|
|
159
|
+
b.get("text", "")
|
|
160
|
+
for b in content
|
|
161
|
+
if isinstance(b, dict) and b.get("type") == "text"
|
|
162
|
+
).strip()
|
|
163
|
+
return ""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def get_first_user_message_claude(jsonl_path) -> tuple[str, str]:
|
|
167
|
+
"""Return (timestamp, first_user_text) from a Claude Code trajectory JSONL."""
|
|
168
|
+
timestamp = ""
|
|
169
|
+
try:
|
|
170
|
+
with open(Path(jsonl_path), encoding="utf-8") as f:
|
|
171
|
+
meta_prompt_ids: set = set()
|
|
172
|
+
for line in f:
|
|
173
|
+
line = line.strip()
|
|
174
|
+
if not line:
|
|
175
|
+
continue
|
|
176
|
+
try:
|
|
177
|
+
d = json.loads(line)
|
|
178
|
+
except json.JSONDecodeError:
|
|
179
|
+
continue
|
|
180
|
+
if not timestamp:
|
|
181
|
+
timestamp = d.get("timestamp", "")
|
|
182
|
+
if d.get("isMeta"):
|
|
183
|
+
meta_prompt_ids.add(d.get("promptId", ""))
|
|
184
|
+
continue
|
|
185
|
+
if d.get("type") == "user":
|
|
186
|
+
if d.get("promptId") in meta_prompt_ids:
|
|
187
|
+
continue
|
|
188
|
+
text = _extract_content_text(d.get("message", {}).get("content", ""))
|
|
189
|
+
if text:
|
|
190
|
+
return timestamp, text
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
return timestamp, ""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def get_first_user_message_copilot(jsonl_path) -> tuple[str, str]:
|
|
197
|
+
"""Return (timestamp, first_user_text) from a Copilot events JSONL."""
|
|
198
|
+
timestamp = ""
|
|
199
|
+
try:
|
|
200
|
+
with open(Path(jsonl_path), encoding="utf-8") as f:
|
|
201
|
+
for line in f:
|
|
202
|
+
line = line.strip()
|
|
203
|
+
if not line:
|
|
204
|
+
continue
|
|
205
|
+
try:
|
|
206
|
+
d = json.loads(line)
|
|
207
|
+
except json.JSONDecodeError:
|
|
208
|
+
continue
|
|
209
|
+
if not timestamp:
|
|
210
|
+
timestamp = d.get("timestamp", "")
|
|
211
|
+
if d.get("type") == "user.message":
|
|
212
|
+
text = _extract_content_text(d.get("data", {}).get("content", ""))
|
|
213
|
+
if text:
|
|
214
|
+
return timestamp, text
|
|
215
|
+
except OSError:
|
|
216
|
+
pass
|
|
217
|
+
return timestamp, ""
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def get_first_user_message_agent_probe(jsonl_path) -> tuple[str, str]:
|
|
221
|
+
"""Return (timestamp, first_user_text) from an agent_probe trajectory JSONL."""
|
|
222
|
+
timestamp = ""
|
|
223
|
+
try:
|
|
224
|
+
with open(Path(jsonl_path), encoding="utf-8") as f:
|
|
225
|
+
for line in f:
|
|
226
|
+
line = line.strip()
|
|
227
|
+
if not line:
|
|
228
|
+
continue
|
|
229
|
+
try:
|
|
230
|
+
d = json.loads(line)
|
|
231
|
+
except json.JSONDecodeError:
|
|
232
|
+
continue
|
|
233
|
+
if not timestamp:
|
|
234
|
+
timestamp = d.get("timestamp", "")
|
|
235
|
+
event_type = d.get("type")
|
|
236
|
+
if event_type == "user":
|
|
237
|
+
content = d.get("message", {}).get("content", "")
|
|
238
|
+
elif event_type == "user.message":
|
|
239
|
+
content = d.get("data", {}).get("content", "")
|
|
240
|
+
else:
|
|
241
|
+
continue
|
|
242
|
+
text = _extract_content_text(content)
|
|
243
|
+
if text:
|
|
244
|
+
return timestamp, text
|
|
245
|
+
except OSError:
|
|
246
|
+
pass
|
|
247
|
+
return timestamp, ""
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def get_first_user_message(jsonl_path) -> tuple[str, str]:
|
|
251
|
+
"""Return (timestamp, first_user_text), dispatching by trajectory source."""
|
|
252
|
+
path = Path(jsonl_path)
|
|
253
|
+
if path.is_relative_to(Path.home() / ".claude"):
|
|
254
|
+
return get_first_user_message_claude(path)
|
|
255
|
+
if path.is_relative_to(Path.home() / ".copilot"):
|
|
256
|
+
return get_first_user_message_copilot(path)
|
|
257
|
+
if path.is_relative_to(Path.home() / ".local" / "share" / "agent_probe"):
|
|
258
|
+
return get_first_user_message_agent_probe(path)
|
|
259
|
+
return "", ""
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def get_cwd_from_trajectory(jsonl_path) -> str:
|
|
263
|
+
"""Extract the working directory from a JSONL trajectory file."""
|
|
264
|
+
try:
|
|
265
|
+
with open(Path(jsonl_path), encoding="utf-8") as f:
|
|
266
|
+
for i, line in enumerate(f):
|
|
267
|
+
if i > 30:
|
|
268
|
+
break
|
|
269
|
+
line = line.strip()
|
|
270
|
+
if not line:
|
|
271
|
+
continue
|
|
272
|
+
try:
|
|
273
|
+
d = json.loads(line)
|
|
274
|
+
except json.JSONDecodeError:
|
|
275
|
+
continue
|
|
276
|
+
for key in ("cwd", "workingDirectory", "working_directory"):
|
|
277
|
+
val = d.get(key)
|
|
278
|
+
if val and isinstance(val, str):
|
|
279
|
+
return val
|
|
280
|
+
except OSError:
|
|
281
|
+
pass
|
|
282
|
+
return ""
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def iter_copilot_sessions(copilot_dir=None):
|
|
286
|
+
"""Yield (session_id, created_at) pairs from the Copilot CLI SQLite store."""
|
|
287
|
+
d = Path(copilot_dir) if copilot_dir else Path.home() / ".copilot"
|
|
288
|
+
db = d / "session-store.db"
|
|
289
|
+
if not db.exists():
|
|
290
|
+
return
|
|
291
|
+
try:
|
|
292
|
+
conn = sqlite3.connect(str(db))
|
|
293
|
+
try:
|
|
294
|
+
rows = conn.execute("SELECT id, created_at FROM sessions").fetchall()
|
|
295
|
+
yield from rows
|
|
296
|
+
finally:
|
|
297
|
+
conn.close()
|
|
298
|
+
except Exception:
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# ── Trajectory parsing ────────────────────────────────────────────────────────
|
|
303
|
+
|
|
304
|
+
@dataclass
|
|
305
|
+
class ParsedTrajectory:
|
|
306
|
+
steps: list[dict] = field(default_factory=list)
|
|
307
|
+
session_id: str | None = None
|
|
308
|
+
model_name: str | None = None
|
|
309
|
+
agent_version: str | None = None
|
|
310
|
+
total_prompt_tokens: int = 0
|
|
311
|
+
total_completion_tokens: int = 0
|
|
312
|
+
total_cached_tokens: int = 0
|
|
313
|
+
total_tool_calls: int = 0
|
|
314
|
+
extra_agent: dict = field(default_factory=dict)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _truncate(text: str, limit: int = 4000) -> str:
|
|
318
|
+
if len(text) <= limit:
|
|
319
|
+
return text
|
|
320
|
+
return text[:limit] + f"\n… [{len(text) - limit} chars truncated]"
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _cc_extract_text(content: object) -> str:
|
|
324
|
+
if isinstance(content, str):
|
|
325
|
+
return content
|
|
326
|
+
if isinstance(content, list):
|
|
327
|
+
parts: list[str] = []
|
|
328
|
+
for part in content:
|
|
329
|
+
if isinstance(part, str):
|
|
330
|
+
parts.append(part)
|
|
331
|
+
elif isinstance(part, dict):
|
|
332
|
+
if part.get("type") == "text":
|
|
333
|
+
parts.append(part.get("text", ""))
|
|
334
|
+
elif part.get("type") == "image":
|
|
335
|
+
parts.append("[image]")
|
|
336
|
+
return "\n".join(p for p in parts if p)
|
|
337
|
+
return ""
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _cc_tool_result_text(content: object) -> str:
|
|
341
|
+
if isinstance(content, str):
|
|
342
|
+
return content
|
|
343
|
+
if isinstance(content, list):
|
|
344
|
+
return "\n".join(
|
|
345
|
+
p.get("text", "") for p in content
|
|
346
|
+
if isinstance(p, dict) and p.get("type") == "text"
|
|
347
|
+
)
|
|
348
|
+
return str(content) if content is not None else ""
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _cc_is_pure_tool_result(content: object) -> bool:
|
|
352
|
+
return (
|
|
353
|
+
isinstance(content, list)
|
|
354
|
+
and bool(content)
|
|
355
|
+
and all(isinstance(p, dict) and p.get("type") == "tool_result" for p in content)
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def parse_claude_trajectory(jsonl_path: Path, fallback_timestamp: str = "") -> ParsedTrajectory:
|
|
360
|
+
"""Parse a Claude Code project JSONL trajectory file."""
|
|
361
|
+
entries: list[dict] = []
|
|
362
|
+
with Path(jsonl_path).open(encoding="utf-8") as fh:
|
|
363
|
+
for raw in fh:
|
|
364
|
+
raw = raw.strip()
|
|
365
|
+
if raw:
|
|
366
|
+
try:
|
|
367
|
+
entries.append(json.loads(raw))
|
|
368
|
+
except json.JSONDecodeError:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
session_id: str | None = None
|
|
372
|
+
model_name: str | None = None
|
|
373
|
+
agent_version: str | None = None
|
|
374
|
+
|
|
375
|
+
for entry in entries:
|
|
376
|
+
if not session_id and "sessionId" in entry:
|
|
377
|
+
session_id = entry["sessionId"]
|
|
378
|
+
if entry.get("type") == "assistant":
|
|
379
|
+
msg = entry.get("message") or {}
|
|
380
|
+
if not model_name and msg.get("model"):
|
|
381
|
+
model_name = msg["model"]
|
|
382
|
+
if not agent_version and entry.get("version"):
|
|
383
|
+
agent_version = entry["version"]
|
|
384
|
+
|
|
385
|
+
tool_results: dict[str, str] = {}
|
|
386
|
+
for entry in entries:
|
|
387
|
+
if entry.get("type") != "user":
|
|
388
|
+
continue
|
|
389
|
+
msg = entry.get("message") or {}
|
|
390
|
+
content = msg.get("content", [])
|
|
391
|
+
if not isinstance(content, list):
|
|
392
|
+
continue
|
|
393
|
+
for part in content:
|
|
394
|
+
if isinstance(part, dict) and part.get("type") == "tool_result":
|
|
395
|
+
tid = part.get("tool_use_id", "")
|
|
396
|
+
if tid:
|
|
397
|
+
tool_results[tid] = _cc_tool_result_text(part.get("content", ""))
|
|
398
|
+
|
|
399
|
+
steps: list[dict] = []
|
|
400
|
+
step_id = 0
|
|
401
|
+
total_tool_calls = 0
|
|
402
|
+
total_prompt = total_completion = total_cached = 0
|
|
403
|
+
|
|
404
|
+
for entry in entries:
|
|
405
|
+
entry_type = entry.get("type")
|
|
406
|
+
timestamp: str = entry.get("timestamp") or fallback_timestamp
|
|
407
|
+
|
|
408
|
+
if entry_type == "user":
|
|
409
|
+
msg = entry.get("message") or {}
|
|
410
|
+
content = msg.get("content", [])
|
|
411
|
+
if _cc_is_pure_tool_result(content):
|
|
412
|
+
continue
|
|
413
|
+
text = _cc_extract_text(content)
|
|
414
|
+
if not text.strip():
|
|
415
|
+
continue
|
|
416
|
+
step_id += 1
|
|
417
|
+
steps.append({"step_id": step_id, "timestamp": timestamp,
|
|
418
|
+
"source": "user", "message": text.strip()})
|
|
419
|
+
|
|
420
|
+
elif entry_type == "assistant":
|
|
421
|
+
msg = entry.get("message") or {}
|
|
422
|
+
content = msg.get("content") or []
|
|
423
|
+
if not isinstance(content, list):
|
|
424
|
+
content = []
|
|
425
|
+
|
|
426
|
+
text_parts: list[str] = []
|
|
427
|
+
reasoning: str | None = None
|
|
428
|
+
tool_calls: list[dict] = []
|
|
429
|
+
|
|
430
|
+
for part in content:
|
|
431
|
+
if not isinstance(part, dict):
|
|
432
|
+
continue
|
|
433
|
+
ptype = part.get("type")
|
|
434
|
+
if ptype == "text":
|
|
435
|
+
text_parts.append(part.get("text", ""))
|
|
436
|
+
elif ptype == "thinking":
|
|
437
|
+
reasoning = part.get("thinking", "")
|
|
438
|
+
elif ptype == "tool_use":
|
|
439
|
+
tool_calls.append({
|
|
440
|
+
"tool_call_id": part.get("id", ""),
|
|
441
|
+
"function_name": part.get("name", ""),
|
|
442
|
+
"arguments": part.get("input") or {},
|
|
443
|
+
})
|
|
444
|
+
total_tool_calls += 1
|
|
445
|
+
|
|
446
|
+
usage = msg.get("usage") or {}
|
|
447
|
+
prompt_tokens = (
|
|
448
|
+
(usage.get("input_tokens") or 0)
|
|
449
|
+
+ (usage.get("cache_creation_input_tokens") or 0)
|
|
450
|
+
+ (usage.get("cache_read_input_tokens") or 0)
|
|
451
|
+
)
|
|
452
|
+
completion_tokens = usage.get("output_tokens") or 0
|
|
453
|
+
cached_tokens = usage.get("cache_read_input_tokens") or 0
|
|
454
|
+
total_prompt += prompt_tokens
|
|
455
|
+
total_completion += completion_tokens
|
|
456
|
+
total_cached += cached_tokens
|
|
457
|
+
|
|
458
|
+
observation: dict | None = None
|
|
459
|
+
if tool_calls:
|
|
460
|
+
results = [
|
|
461
|
+
{"source_call_id": tc["tool_call_id"],
|
|
462
|
+
"content": _truncate(tool_results[tc["tool_call_id"]])}
|
|
463
|
+
for tc in tool_calls
|
|
464
|
+
if tc["tool_call_id"] in tool_results
|
|
465
|
+
]
|
|
466
|
+
if results:
|
|
467
|
+
observation = {"results": results}
|
|
468
|
+
|
|
469
|
+
step: dict = {
|
|
470
|
+
"step_id": step_id + 1,
|
|
471
|
+
"timestamp": timestamp,
|
|
472
|
+
"source": "agent",
|
|
473
|
+
"message": "\n".join(text_parts).strip(),
|
|
474
|
+
}
|
|
475
|
+
step_id += 1
|
|
476
|
+
if reasoning:
|
|
477
|
+
step["reasoning_content"] = reasoning
|
|
478
|
+
if tool_calls:
|
|
479
|
+
step["tool_calls"] = tool_calls
|
|
480
|
+
if observation:
|
|
481
|
+
step["observation"] = observation
|
|
482
|
+
if prompt_tokens or completion_tokens:
|
|
483
|
+
step["metrics"] = {
|
|
484
|
+
"prompt_tokens": prompt_tokens,
|
|
485
|
+
"completion_tokens": completion_tokens,
|
|
486
|
+
"cached_tokens": cached_tokens,
|
|
487
|
+
}
|
|
488
|
+
steps.append(step)
|
|
489
|
+
|
|
490
|
+
return ParsedTrajectory(
|
|
491
|
+
session_id=session_id,
|
|
492
|
+
model_name=model_name,
|
|
493
|
+
agent_version=agent_version,
|
|
494
|
+
steps=steps,
|
|
495
|
+
total_prompt_tokens=total_prompt,
|
|
496
|
+
total_completion_tokens=total_completion,
|
|
497
|
+
total_cached_tokens=total_cached,
|
|
498
|
+
total_tool_calls=total_tool_calls,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def parse_codex_trajectory(jsonl_path: Path, fallback_timestamp: str = "") -> ParsedTrajectory:
|
|
503
|
+
"""Parse a Codex rollout-*.jsonl trajectory file."""
|
|
504
|
+
entries: list[dict] = []
|
|
505
|
+
with Path(jsonl_path).open(encoding="utf-8") as fh:
|
|
506
|
+
for raw in fh:
|
|
507
|
+
raw = raw.strip()
|
|
508
|
+
if raw:
|
|
509
|
+
try:
|
|
510
|
+
entries.append(json.loads(raw))
|
|
511
|
+
except json.JSONDecodeError:
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
session_id: str | None = None
|
|
515
|
+
model_name: str | None = None
|
|
516
|
+
cli_version: str | None = None
|
|
517
|
+
|
|
518
|
+
for entry in entries:
|
|
519
|
+
t = entry.get("type", "")
|
|
520
|
+
p = entry.get("payload") or {}
|
|
521
|
+
if t == "session_meta":
|
|
522
|
+
session_id = p.get("id")
|
|
523
|
+
cli_version = p.get("cli_version")
|
|
524
|
+
if t == "turn_context" and p.get("model") and not model_name:
|
|
525
|
+
model_name = p["model"]
|
|
526
|
+
|
|
527
|
+
tool_results: dict[str, str] = {}
|
|
528
|
+
for entry in entries:
|
|
529
|
+
if entry.get("type") != "response_item":
|
|
530
|
+
continue
|
|
531
|
+
p = entry.get("payload") or {}
|
|
532
|
+
pt = p.get("type", "")
|
|
533
|
+
if pt in ("function_call_output", "custom_tool_call_output"):
|
|
534
|
+
call_id = p.get("call_id", "")
|
|
535
|
+
if call_id:
|
|
536
|
+
tool_results[call_id] = str(p.get("output", ""))
|
|
537
|
+
|
|
538
|
+
steps: list[dict] = []
|
|
539
|
+
step_id = 0
|
|
540
|
+
total_tool_calls = 0
|
|
541
|
+
total_prompt = total_completion = total_cached = 0
|
|
542
|
+
|
|
543
|
+
pending: dict | None = None
|
|
544
|
+
|
|
545
|
+
def _flush_pending() -> None:
|
|
546
|
+
nonlocal pending
|
|
547
|
+
if pending is None:
|
|
548
|
+
return
|
|
549
|
+
tool_calls: list[dict] = pending.get("tool_calls", [])
|
|
550
|
+
if tool_calls:
|
|
551
|
+
results = [
|
|
552
|
+
{"source_call_id": tc["tool_call_id"],
|
|
553
|
+
"content": _truncate(tool_results[tc["tool_call_id"]])}
|
|
554
|
+
for tc in tool_calls
|
|
555
|
+
if tc["tool_call_id"] in tool_results
|
|
556
|
+
]
|
|
557
|
+
if results:
|
|
558
|
+
pending["observation"] = {"results": results}
|
|
559
|
+
if not tool_calls:
|
|
560
|
+
pending.pop("tool_calls", None)
|
|
561
|
+
steps.append(pending)
|
|
562
|
+
pending = None
|
|
563
|
+
|
|
564
|
+
for entry in entries:
|
|
565
|
+
t = entry.get("type", "")
|
|
566
|
+
p = entry.get("payload") or {}
|
|
567
|
+
pt = p.get("type", "")
|
|
568
|
+
ts = entry.get("timestamp") or fallback_timestamp
|
|
569
|
+
|
|
570
|
+
if t == "event_msg" and pt == "user_message":
|
|
571
|
+
_flush_pending()
|
|
572
|
+
text = (p.get("message") or "").strip()
|
|
573
|
+
if text:
|
|
574
|
+
step_id += 1
|
|
575
|
+
steps.append({"step_id": step_id, "timestamp": ts,
|
|
576
|
+
"source": "user", "message": text})
|
|
577
|
+
|
|
578
|
+
elif t == "response_item" and pt == "message" and p.get("role") == "assistant":
|
|
579
|
+
text = "".join(
|
|
580
|
+
part.get("text", "")
|
|
581
|
+
for part in (p.get("content") or [])
|
|
582
|
+
if isinstance(part, dict) and part.get("type") == "output_text"
|
|
583
|
+
).strip()
|
|
584
|
+
if pending is None:
|
|
585
|
+
step_id += 1
|
|
586
|
+
pending = {"step_id": step_id, "timestamp": ts,
|
|
587
|
+
"source": "agent", "message": text, "tool_calls": []}
|
|
588
|
+
elif text:
|
|
589
|
+
existing = pending.get("message", "")
|
|
590
|
+
pending["message"] = (existing + "\n" + text).strip()
|
|
591
|
+
|
|
592
|
+
elif t == "response_item" and pt in ("function_call", "custom_tool_call"):
|
|
593
|
+
call_id = p.get("call_id", "")
|
|
594
|
+
name = p.get("name", "")
|
|
595
|
+
if pt == "function_call":
|
|
596
|
+
try:
|
|
597
|
+
arguments: object = json.loads(p.get("arguments") or "{}")
|
|
598
|
+
except (json.JSONDecodeError, TypeError):
|
|
599
|
+
arguments = {"raw": p.get("arguments", "")}
|
|
600
|
+
else:
|
|
601
|
+
arguments = {"input": p.get("input", "")}
|
|
602
|
+
|
|
603
|
+
if pending is None:
|
|
604
|
+
step_id += 1
|
|
605
|
+
pending = {"step_id": step_id, "timestamp": ts,
|
|
606
|
+
"source": "agent", "message": "", "tool_calls": []}
|
|
607
|
+
pending["tool_calls"].append({
|
|
608
|
+
"tool_call_id": call_id,
|
|
609
|
+
"function_name": name,
|
|
610
|
+
"arguments": arguments,
|
|
611
|
+
})
|
|
612
|
+
total_tool_calls += 1
|
|
613
|
+
|
|
614
|
+
elif t == "event_msg" and pt == "token_count":
|
|
615
|
+
tu = (p.get("info") or {}).get("total_token_usage") or {}
|
|
616
|
+
total_prompt = max(total_prompt, tu.get("input_tokens") or 0)
|
|
617
|
+
total_completion = max(total_completion, tu.get("output_tokens") or 0)
|
|
618
|
+
total_cached = max(total_cached, tu.get("cached_input_tokens") or 0)
|
|
619
|
+
if pending is not None:
|
|
620
|
+
lu = (p.get("info") or {}).get("last_token_usage") or {}
|
|
621
|
+
if lu:
|
|
622
|
+
pending["metrics"] = {
|
|
623
|
+
"prompt_tokens": lu.get("input_tokens") or 0,
|
|
624
|
+
"completion_tokens": lu.get("output_tokens") or 0,
|
|
625
|
+
"cached_tokens": lu.get("cached_input_tokens") or 0,
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
elif t == "event_msg" and pt in ("task_complete", "turn_aborted"):
|
|
629
|
+
_flush_pending()
|
|
630
|
+
|
|
631
|
+
_flush_pending()
|
|
632
|
+
|
|
633
|
+
return ParsedTrajectory(
|
|
634
|
+
session_id=session_id,
|
|
635
|
+
model_name=model_name,
|
|
636
|
+
agent_version=cli_version,
|
|
637
|
+
steps=steps,
|
|
638
|
+
total_prompt_tokens=total_prompt,
|
|
639
|
+
total_completion_tokens=total_completion,
|
|
640
|
+
total_cached_tokens=total_cached,
|
|
641
|
+
total_tool_calls=total_tool_calls,
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def parse_copilot_trajectory(db_path: Path, session_id: str, fallback_timestamp: str = "") -> ParsedTrajectory:
|
|
646
|
+
"""Parse a GitHub Copilot CLI session from the SQLite session store."""
|
|
647
|
+
conn = sqlite3.connect(str(db_path))
|
|
648
|
+
try:
|
|
649
|
+
session_row = conn.execute(
|
|
650
|
+
"SELECT cwd, repository, branch, summary, created_at FROM sessions WHERE id=?",
|
|
651
|
+
(session_id,),
|
|
652
|
+
).fetchone()
|
|
653
|
+
if not session_row:
|
|
654
|
+
return ParsedTrajectory()
|
|
655
|
+
_cwd, repository, _branch, summary, _created_at = session_row
|
|
656
|
+
|
|
657
|
+
turns = conn.execute(
|
|
658
|
+
"SELECT turn_index, user_message, assistant_response, timestamp "
|
|
659
|
+
"FROM turns WHERE session_id=? ORDER BY turn_index",
|
|
660
|
+
(session_id,),
|
|
661
|
+
).fetchall()
|
|
662
|
+
finally:
|
|
663
|
+
conn.close()
|
|
664
|
+
|
|
665
|
+
steps: list[dict] = []
|
|
666
|
+
step_id = 0
|
|
667
|
+
|
|
668
|
+
for _turn_idx, user_msg, asst_resp, ts in turns:
|
|
669
|
+
ts = ts or fallback_timestamp
|
|
670
|
+
if user_msg and user_msg.strip():
|
|
671
|
+
step_id += 1
|
|
672
|
+
steps.append({"step_id": step_id, "timestamp": ts,
|
|
673
|
+
"source": "user", "message": user_msg.strip()})
|
|
674
|
+
if asst_resp and asst_resp.strip():
|
|
675
|
+
step_id += 1
|
|
676
|
+
steps.append({"step_id": step_id, "timestamp": ts,
|
|
677
|
+
"source": "agent", "message": asst_resp.strip()})
|
|
678
|
+
|
|
679
|
+
return ParsedTrajectory(
|
|
680
|
+
steps=steps,
|
|
681
|
+
extra_agent={
|
|
682
|
+
"copilot_repository": repository or "",
|
|
683
|
+
"copilot_summary": summary or "",
|
|
684
|
+
},
|
|
685
|
+
)
|
trajectoriz/cli.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""trajectoriz-cli: search and browse past agent trajectories."""
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import math
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Iterator, Optional
|
|
13
|
+
|
|
14
|
+
import trajectoriz as tz
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
DEFAULT_SHOW_PAGE_SIZE = 20 # steps per page
|
|
18
|
+
DEFAULT_LIST_PAGE_SIZE = 50 # trajectories per page
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class TrajRecord:
|
|
23
|
+
id: str
|
|
24
|
+
agent: str
|
|
25
|
+
timestamp: str
|
|
26
|
+
first_msg: str
|
|
27
|
+
source: object # Path for JSONL files; dict for DB sessions
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _short_id(prefix: str, key: str) -> str:
|
|
31
|
+
return f"{prefix}-{hashlib.sha256(key.encode()).hexdigest()[:8]}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _codex_first_user_message(path: Path) -> tuple[str, str]:
|
|
35
|
+
ts = ""
|
|
36
|
+
try:
|
|
37
|
+
with path.open(encoding="utf-8") as f:
|
|
38
|
+
for line in f:
|
|
39
|
+
line = line.strip()
|
|
40
|
+
if not line:
|
|
41
|
+
continue
|
|
42
|
+
try:
|
|
43
|
+
d = json.loads(line)
|
|
44
|
+
except json.JSONDecodeError:
|
|
45
|
+
continue
|
|
46
|
+
if not ts:
|
|
47
|
+
ts = d.get("timestamp", "")
|
|
48
|
+
if d.get("type") == "event_msg":
|
|
49
|
+
p = d.get("payload") or {}
|
|
50
|
+
if p.get("type") == "user_message":
|
|
51
|
+
msg = (p.get("message") or "").strip()
|
|
52
|
+
if msg:
|
|
53
|
+
return ts, msg
|
|
54
|
+
except OSError:
|
|
55
|
+
pass
|
|
56
|
+
return ts, ""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _cwd_matches(cwd_field: str, target: str) -> bool:
|
|
60
|
+
"""True if cwd_field is target or a subdirectory of target."""
|
|
61
|
+
if not cwd_field:
|
|
62
|
+
return False
|
|
63
|
+
try:
|
|
64
|
+
return Path(cwd_field) == Path(target) or Path(cwd_field).is_relative_to(Path(target))
|
|
65
|
+
except (ValueError, TypeError):
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _local_records(cwd: str) -> Iterator[TrajRecord]:
|
|
70
|
+
"""Yield only trajectories whose working directory is cwd or a subdirectory."""
|
|
71
|
+
for p in tz.iter_claude_project_trajectories(cwd):
|
|
72
|
+
ts, msg = tz.get_first_user_message_claude(p)
|
|
73
|
+
yield TrajRecord(_short_id("cl", str(p)), "claude", ts, msg, p)
|
|
74
|
+
|
|
75
|
+
for p in tz.iter_codex_rollout_files():
|
|
76
|
+
if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
|
|
77
|
+
ts, msg = _codex_first_user_message(p)
|
|
78
|
+
yield TrajRecord(_short_id("cx", str(p)), "codex", ts, msg, p)
|
|
79
|
+
|
|
80
|
+
for p in tz.iter_copilot_event_trajectories():
|
|
81
|
+
if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
|
|
82
|
+
ts, msg = tz.get_first_user_message_copilot(p)
|
|
83
|
+
yield TrajRecord(_short_id("cp", str(p)), "copilot", ts, msg, p)
|
|
84
|
+
|
|
85
|
+
for p in tz.iter_agent_probe_trajectories():
|
|
86
|
+
if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
|
|
87
|
+
ts, msg = tz.get_first_user_message_agent_probe(p)
|
|
88
|
+
yield TrajRecord(_short_id("ap", str(p)), "agent_probe", ts, msg, p)
|
|
89
|
+
|
|
90
|
+
for session_id, ts_ms, model_json, directory, first_prompt in tz.iter_opencode_sessions():
|
|
91
|
+
if _cwd_matches(directory, cwd):
|
|
92
|
+
yield TrajRecord(
|
|
93
|
+
_short_id("oc", session_id),
|
|
94
|
+
"opencode",
|
|
95
|
+
str(ts_ms),
|
|
96
|
+
first_prompt,
|
|
97
|
+
{"type": "opencode", "session_id": session_id, "model": model_json, "dir": directory},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
for row in tz.iter_codex_db_sessions():
|
|
101
|
+
sid, updated_ms, first_msg, _, model, rec_cwd = row
|
|
102
|
+
if _cwd_matches(rec_cwd, cwd):
|
|
103
|
+
yield TrajRecord(
|
|
104
|
+
_short_id("cd", str(sid)),
|
|
105
|
+
"codex_db",
|
|
106
|
+
str(updated_ms),
|
|
107
|
+
first_msg or "",
|
|
108
|
+
{"type": "codex_db", "session_id": sid, "model": model, "cwd": rec_cwd},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _all_records() -> Iterator[TrajRecord]:
|
|
113
|
+
for p in tz.iter_claude_trajectories():
|
|
114
|
+
ts, msg = tz.get_first_user_message_claude(p)
|
|
115
|
+
yield TrajRecord(_short_id("cl", str(p)), "claude", ts, msg, p)
|
|
116
|
+
|
|
117
|
+
for p in tz.iter_codex_rollout_files():
|
|
118
|
+
ts, msg = _codex_first_user_message(p)
|
|
119
|
+
yield TrajRecord(_short_id("cx", str(p)), "codex", ts, msg, p)
|
|
120
|
+
|
|
121
|
+
for p in tz.iter_copilot_event_trajectories():
|
|
122
|
+
ts, msg = tz.get_first_user_message_copilot(p)
|
|
123
|
+
yield TrajRecord(_short_id("cp", str(p)), "copilot", ts, msg, p)
|
|
124
|
+
|
|
125
|
+
for p in tz.iter_agent_probe_trajectories():
|
|
126
|
+
ts, msg = tz.get_first_user_message_agent_probe(p)
|
|
127
|
+
yield TrajRecord(_short_id("ap", str(p)), "agent_probe", ts, msg, p)
|
|
128
|
+
|
|
129
|
+
for session_id, ts_ms, model_json, directory, first_prompt in tz.iter_opencode_sessions():
|
|
130
|
+
yield TrajRecord(
|
|
131
|
+
_short_id("oc", session_id),
|
|
132
|
+
"opencode",
|
|
133
|
+
str(ts_ms),
|
|
134
|
+
first_prompt,
|
|
135
|
+
{"type": "opencode", "session_id": session_id, "model": model_json, "dir": directory},
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
for row in tz.iter_codex_db_sessions():
|
|
139
|
+
sid, updated_ms, first_msg, _, model, cwd = row
|
|
140
|
+
yield TrajRecord(
|
|
141
|
+
_short_id("cd", str(sid)),
|
|
142
|
+
"codex_db",
|
|
143
|
+
str(updated_ms),
|
|
144
|
+
first_msg or "",
|
|
145
|
+
{"type": "codex_db", "session_id": sid, "model": model, "cwd": cwd},
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
copilot_db = Path.home() / ".copilot" / "session-store.db"
|
|
149
|
+
if copilot_db.exists():
|
|
150
|
+
for session_id, created_at in tz.iter_copilot_sessions():
|
|
151
|
+
yield TrajRecord(
|
|
152
|
+
_short_id("gh", str(session_id)),
|
|
153
|
+
"copilot_db",
|
|
154
|
+
str(created_at or ""),
|
|
155
|
+
"",
|
|
156
|
+
{"type": "copilot_db", "session_id": session_id, "db_path": str(copilot_db)},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _render_step(step: dict) -> str:
|
|
161
|
+
lines: list[str] = []
|
|
162
|
+
role = "USER" if step["source"] == "user" else "AGENT"
|
|
163
|
+
ts_suffix = f" *{step['timestamp'][:19]}*" if step.get("timestamp") else ""
|
|
164
|
+
lines.append(f"---\n## Step {step['step_id']} — {role}{ts_suffix}\n")
|
|
165
|
+
if step.get("message"):
|
|
166
|
+
lines.append(step["message"])
|
|
167
|
+
lines.append("")
|
|
168
|
+
for tc in step.get("tool_calls", []):
|
|
169
|
+
args_str = json.dumps(tc.get("arguments", {}), indent=2)
|
|
170
|
+
if len(args_str) > 600:
|
|
171
|
+
args_str = args_str[:600] + "\n…"
|
|
172
|
+
lines.append(f"**Tool call:** `{tc['function_name']}`")
|
|
173
|
+
lines.append(f"```json\n{args_str}\n```\n")
|
|
174
|
+
for res in (step.get("observation") or {}).get("results", []):
|
|
175
|
+
content = res.get("content", "")
|
|
176
|
+
if len(content) > 1000:
|
|
177
|
+
content = content[:1000] + "\n…"
|
|
178
|
+
lines.append(f"**Tool result:**\n```\n{content}\n```\n")
|
|
179
|
+
return "\n".join(lines)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _trajectory_header_and_steps(record: TrajRecord) -> tuple[str, list[str]]:
|
|
183
|
+
"""Return (header_markdown, list_of_rendered_steps)."""
|
|
184
|
+
hlines: list[str] = []
|
|
185
|
+
hlines.append(f"# Trajectory `{record.id}`")
|
|
186
|
+
hlines.append(f"**Agent:** {record.agent}")
|
|
187
|
+
if record.timestamp:
|
|
188
|
+
hlines.append(f"**Date:** {record.timestamp[:19]}")
|
|
189
|
+
|
|
190
|
+
if isinstance(record.source, Path):
|
|
191
|
+
if record.agent == "claude":
|
|
192
|
+
traj = tz.parse_claude_trajectory(record.source)
|
|
193
|
+
elif record.agent == "codex":
|
|
194
|
+
traj = tz.parse_codex_trajectory(record.source)
|
|
195
|
+
else:
|
|
196
|
+
hlines.append("\n*Full trajectory parsing not supported for this agent type.*")
|
|
197
|
+
return "\n".join(hlines), []
|
|
198
|
+
elif isinstance(record.source, dict):
|
|
199
|
+
src_type = record.source["type"]
|
|
200
|
+
if src_type == "copilot_db":
|
|
201
|
+
db_path = Path(record.source["db_path"])
|
|
202
|
+
traj = tz.parse_copilot_trajectory(db_path, record.source["session_id"])
|
|
203
|
+
else:
|
|
204
|
+
hlines.append(f"**Session ID:** {record.source.get('session_id', '')}")
|
|
205
|
+
if record.source.get("model"):
|
|
206
|
+
hlines.append(f"**Model:** {record.source['model']}")
|
|
207
|
+
d = record.source.get("dir") or record.source.get("cwd") or ""
|
|
208
|
+
if d:
|
|
209
|
+
hlines.append(f"**Directory:** {d}")
|
|
210
|
+
hlines.append("\n*Full trajectory parsing not available for this agent type.*")
|
|
211
|
+
return "\n".join(hlines), []
|
|
212
|
+
else:
|
|
213
|
+
return "\n".join(hlines), []
|
|
214
|
+
|
|
215
|
+
hlines.append(f"**Steps:** {len(traj.steps)}")
|
|
216
|
+
if traj.model_name:
|
|
217
|
+
hlines.append(f"**Model:** {traj.model_name}")
|
|
218
|
+
if traj.total_tool_calls:
|
|
219
|
+
hlines.append(f"**Tool calls:** {traj.total_tool_calls}")
|
|
220
|
+
if traj.total_prompt_tokens:
|
|
221
|
+
hlines.append(
|
|
222
|
+
f"**Tokens:** {traj.total_prompt_tokens} prompt / "
|
|
223
|
+
f"{traj.total_completion_tokens} completion"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return "\n".join(hlines), [_render_step(s) for s in traj.steps]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _paginate_items(
|
|
230
|
+
items: list[str], page: int, page_size: int, header: str, unit: str, footer: str = ""
|
|
231
|
+
) -> None:
|
|
232
|
+
total = len(items)
|
|
233
|
+
total_pages = max(1, math.ceil(total / page_size))
|
|
234
|
+
if page < 1 or page > total_pages:
|
|
235
|
+
print(f"Error: page {page} out of range (1–{total_pages}).", file=sys.stderr)
|
|
236
|
+
sys.exit(1)
|
|
237
|
+
start = (page - 1) * page_size
|
|
238
|
+
chunk = items[start : start + page_size]
|
|
239
|
+
showing_end = min(start + page_size, total)
|
|
240
|
+
print(
|
|
241
|
+
f"<!-- trajectoriz | page {page}/{total_pages} | "
|
|
242
|
+
f"{unit} {start + 1}–{showing_end} of {total} -->"
|
|
243
|
+
)
|
|
244
|
+
if header:
|
|
245
|
+
print(header)
|
|
246
|
+
print("\n".join(chunk))
|
|
247
|
+
if footer and page == total_pages:
|
|
248
|
+
print(footer)
|
|
249
|
+
if page < total_pages:
|
|
250
|
+
remaining = total_pages - page
|
|
251
|
+
print(f"\n<!-- {remaining} more page(s) — run with --page {page + 1} to continue -->")
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ── Commands ──────────────────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _record_row(rec: TrajRecord) -> str:
|
|
258
|
+
date = rec.timestamp[:10] if rec.timestamp else "—"
|
|
259
|
+
snippet = (rec.first_msg or "")[:80].replace("|", "\\|").replace("\n", " ")
|
|
260
|
+
return f"| `{rec.id}` | {rec.agent} | {date} | {snippet} |"
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def cmd_list(args) -> None:
|
|
264
|
+
source = _all_records() if args.all else _local_records(os.getcwd())
|
|
265
|
+
records = sorted(source, key=lambda r: r.timestamp, reverse=True)
|
|
266
|
+
if not records:
|
|
267
|
+
print("No trajectories found.")
|
|
268
|
+
return
|
|
269
|
+
header = (
|
|
270
|
+
f"## All trajectories ({len(records)} total)\n\n"
|
|
271
|
+
"| ID | Agent | Date | First message |\n"
|
|
272
|
+
"|---|---|---|---|"
|
|
273
|
+
)
|
|
274
|
+
rows = [_record_row(r) for r in records]
|
|
275
|
+
_paginate_items(rows, args.page, args.page_size, header, "trajectories",
|
|
276
|
+
footer="\nUse `trajectoriz-cli show <id>` to view a trajectory.")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def cmd_search(args) -> None:
|
|
280
|
+
query = args.query.lower()
|
|
281
|
+
source = _all_records() if args.all else _local_records(os.getcwd())
|
|
282
|
+
records = [
|
|
283
|
+
rec
|
|
284
|
+
for rec in source
|
|
285
|
+
if query in (rec.first_msg or "").lower()
|
|
286
|
+
or query in rec.id.lower()
|
|
287
|
+
or query in rec.agent.lower()
|
|
288
|
+
]
|
|
289
|
+
records.sort(key=lambda r: r.timestamp, reverse=True)
|
|
290
|
+
|
|
291
|
+
if not records:
|
|
292
|
+
print(f"No trajectories found matching `{args.query}`.")
|
|
293
|
+
return
|
|
294
|
+
header = (
|
|
295
|
+
f"## Search: `{args.query}` — {len(records)} result(s)\n\n"
|
|
296
|
+
"| ID | Agent | Date | First message |\n"
|
|
297
|
+
"|---|---|---|---|"
|
|
298
|
+
)
|
|
299
|
+
rows = [_record_row(r) for r in records]
|
|
300
|
+
_paginate_items(rows, args.page, args.page_size, header, "trajectories",
|
|
301
|
+
footer="\nUse `trajectoriz-cli show <id>` to view a trajectory.")
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def cmd_show(args) -> None:
|
|
305
|
+
target = args.id
|
|
306
|
+
record: Optional[TrajRecord] = None
|
|
307
|
+
for rec in _all_records():
|
|
308
|
+
if rec.id == target:
|
|
309
|
+
record = rec
|
|
310
|
+
break
|
|
311
|
+
|
|
312
|
+
if record is None:
|
|
313
|
+
print(f"Error: trajectory `{target}` not found.", file=sys.stderr)
|
|
314
|
+
sys.exit(1)
|
|
315
|
+
|
|
316
|
+
header, steps = _trajectory_header_and_steps(record)
|
|
317
|
+
_paginate_items(steps, args.page, args.page_size, header, "steps")
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def main() -> None:
|
|
324
|
+
parser = argparse.ArgumentParser(
|
|
325
|
+
prog="trajectoriz-cli",
|
|
326
|
+
description="Search and browse past agent trajectories.",
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
sub = parser.add_subparsers(dest="command", metavar="<command>")
|
|
330
|
+
sub.required = True
|
|
331
|
+
|
|
332
|
+
# list
|
|
333
|
+
p_list = sub.add_parser("list", help="List trajectories (current directory by default).")
|
|
334
|
+
p_list.add_argument("--page", type=int, default=1, metavar="N")
|
|
335
|
+
p_list.add_argument(
|
|
336
|
+
"--page-size", type=int, default=DEFAULT_LIST_PAGE_SIZE, metavar="N",
|
|
337
|
+
help=f"Trajectories per page (default: {DEFAULT_LIST_PAGE_SIZE})",
|
|
338
|
+
)
|
|
339
|
+
p_list.add_argument("--all", action="store_true", help="Include all agents/directories.")
|
|
340
|
+
p_list.set_defaults(func=cmd_list)
|
|
341
|
+
|
|
342
|
+
# search
|
|
343
|
+
p_search = sub.add_parser(
|
|
344
|
+
"search",
|
|
345
|
+
help="Search trajectories by keyword (current directory by default).",
|
|
346
|
+
)
|
|
347
|
+
p_search.add_argument("query", help="Search term (case-insensitive substring).")
|
|
348
|
+
p_search.add_argument("--page", type=int, default=1, metavar="N")
|
|
349
|
+
p_search.add_argument(
|
|
350
|
+
"--page-size", type=int, default=DEFAULT_LIST_PAGE_SIZE, metavar="N",
|
|
351
|
+
help=f"Trajectories per page (default: {DEFAULT_LIST_PAGE_SIZE})",
|
|
352
|
+
)
|
|
353
|
+
p_search.add_argument("--all", action="store_true", help="Search across all agents/directories.")
|
|
354
|
+
p_search.set_defaults(func=cmd_search)
|
|
355
|
+
|
|
356
|
+
# show
|
|
357
|
+
p_show = sub.add_parser(
|
|
358
|
+
"show",
|
|
359
|
+
help="Show a trajectory in agent-readable markdown.",
|
|
360
|
+
)
|
|
361
|
+
p_show.add_argument("id", help="Trajectory ID (from list or search).")
|
|
362
|
+
p_show.add_argument(
|
|
363
|
+
"--page", type=int, default=1, metavar="N",
|
|
364
|
+
help="Page number (default: 1). Increment to scroll.",
|
|
365
|
+
)
|
|
366
|
+
p_show.add_argument(
|
|
367
|
+
"--page-size", type=int, default=DEFAULT_SHOW_PAGE_SIZE, metavar="N",
|
|
368
|
+
help=f"Messages (steps) per page (default: {DEFAULT_SHOW_PAGE_SIZE})",
|
|
369
|
+
)
|
|
370
|
+
p_show.set_defaults(func=cmd_show)
|
|
371
|
+
|
|
372
|
+
args = parser.parse_args()
|
|
373
|
+
args.func(args)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
if __name__ == "__main__":
|
|
377
|
+
main()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trajectoriz
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Locate agent trajectory files on the local machine
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Dynamic: license-file
|
|
10
|
+
|
|
11
|
+
# trajectoriz
|
|
12
|
+
|
|
13
|
+
Locate agent trajectory files on the local machine.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install trajectoriz
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from trajectoriz import (
|
|
25
|
+
iter_claude_trajectories,
|
|
26
|
+
iter_claude_project_trajectories,
|
|
27
|
+
iter_codex_trajectories,
|
|
28
|
+
iter_codex_rollout_files,
|
|
29
|
+
iter_codex_db_sessions,
|
|
30
|
+
iter_pi_trajectories,
|
|
31
|
+
iter_cursor_trajectories,
|
|
32
|
+
iter_copilot_event_trajectories,
|
|
33
|
+
iter_copilot_sessions,
|
|
34
|
+
iter_agent_probe_trajectories,
|
|
35
|
+
iter_opencode_sessions,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# List all Claude Code trajectory files
|
|
39
|
+
for path in iter_claude_trajectories():
|
|
40
|
+
print(path)
|
|
41
|
+
|
|
42
|
+
# List Claude trajectories for a specific project
|
|
43
|
+
for path in iter_claude_project_trajectories("/path/to/repo"):
|
|
44
|
+
print(path)
|
|
45
|
+
|
|
46
|
+
# List Codex CLI session files
|
|
47
|
+
for path in iter_codex_trajectories():
|
|
48
|
+
print(path)
|
|
49
|
+
|
|
50
|
+
# List Codex CLI rollout files
|
|
51
|
+
for path in iter_codex_rollout_files():
|
|
52
|
+
print(path)
|
|
53
|
+
|
|
54
|
+
# List Codex sessions from SQLite store (~/.codex/state_5.sqlite)
|
|
55
|
+
for session_id, updated_at_ms, first_msg, provider, model, cwd in iter_codex_db_sessions():
|
|
56
|
+
print(session_id, first_msg)
|
|
57
|
+
|
|
58
|
+
# List pi coding agent session files
|
|
59
|
+
for path in iter_pi_trajectories():
|
|
60
|
+
print(path)
|
|
61
|
+
|
|
62
|
+
# List Cursor trajectory files
|
|
63
|
+
for path in iter_cursor_trajectories():
|
|
64
|
+
print(path)
|
|
65
|
+
|
|
66
|
+
# List Copilot CLI session event JSONL files (~/.copilot/session-state/*/events.jsonl)
|
|
67
|
+
for path in iter_copilot_event_trajectories():
|
|
68
|
+
print(path)
|
|
69
|
+
|
|
70
|
+
# List Copilot CLI sessions from SQLite store
|
|
71
|
+
for session_id, created_at in iter_copilot_sessions():
|
|
72
|
+
print(session_id, created_at)
|
|
73
|
+
|
|
74
|
+
# List agent_probe session JSONL files (~/.local/share/agent_probe/*/*/*)
|
|
75
|
+
for path in iter_agent_probe_trajectories():
|
|
76
|
+
print(path)
|
|
77
|
+
|
|
78
|
+
# List opencode sessions from SQLite store (~/.local/share/opencode/opencode.db)
|
|
79
|
+
for session_id, updated_at_ms, model_json, directory, first_prompt in iter_opencode_sessions():
|
|
80
|
+
print(session_id, first_prompt)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
MIT
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
trajectoriz/__init__.py,sha256=XGa8C_50P3qUJBjrb90TQok0rr5VOoNjyIwRmMCbQCk,24629
|
|
2
|
+
trajectoriz/cli.py,sha256=JkRdARLKxEdU-kKarvxBUgykk3p1-NMxInpqP3DNuSE,14058
|
|
3
|
+
trajectoriz-0.2.0.dist-info/licenses/LICENSE,sha256=Btzdu2kIoMbdSp6OyCLupB1aRgpTCJ_szMimgEnpkkE,1056
|
|
4
|
+
trajectoriz-0.2.0.dist-info/METADATA,sha256=IHfKq-YtLsItgDHRwx44kCWgOiH5vTIDqAMq5bl1fu8,2185
|
|
5
|
+
trajectoriz-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
trajectoriz-0.2.0.dist-info/entry_points.txt,sha256=YV_i-3pgUXiar4sj5R5Sn3PdBT115KMf2ztMRGZz_VY,57
|
|
7
|
+
trajectoriz-0.2.0.dist-info/top_level.txt,sha256=8p6CY8WukAX6dc_kuSkMHlsHjw9b5gh0k8nq0f5OFgs,12
|
|
8
|
+
trajectoriz-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
trajectoriz
|