trajectoriz 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,685 @@
1
+ """trajectoriz: locate and parse agent trajectory files on the local machine."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+ import os
8
+ import re
9
+ import sqlite3
10
+ from pathlib import Path
11
+
12
+
13
+ def iter_claude_trajectories(claude_dir=None):
14
+ """Yield all Claude Code trajectory JSONL paths."""
15
+ d = Path(claude_dir) if claude_dir else Path.home() / ".claude"
16
+ if d.is_dir():
17
+ yield from sorted(d.glob("projects/**/*.jsonl"))
18
+
19
+
20
+ def claude_project_dir(repo_root: str, claude_dir=None) -> Path:
21
+ """Return the Claude Code project directory for a given repo root."""
22
+ d = Path(claude_dir) if claude_dir else Path.home() / ".claude"
23
+ slug = re.sub(r"[^a-zA-Z0-9]", "-", repo_root)
24
+ return d / "projects" / slug
25
+
26
+
27
+ def iter_claude_project_trajectories(repo_root: str, claude_dir=None):
28
+ """Yield Claude Code trajectory JSONL paths for a specific project."""
29
+ d = claude_project_dir(repo_root, claude_dir)
30
+ if d.is_dir():
31
+ yield from sorted(d.glob("*.jsonl"))
32
+
33
+
34
+ def iter_codex_trajectories(codex_dir=None):
35
+ """Yield all Codex CLI session JSONL paths."""
36
+ d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
37
+ base = d / "sessions"
38
+ if base.is_dir():
39
+ yield from sorted(base.rglob("*.jsonl"))
40
+
41
+
42
+ def iter_codex_rollout_files(codex_dir=None):
43
+ """Yield Codex CLI rollout JSONL paths (rollout-*.jsonl files only)."""
44
+ d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
45
+ base = d / "sessions"
46
+ if base.is_dir():
47
+ yield from sorted(base.rglob("rollout-*.jsonl"))
48
+
49
+
50
+ def iter_pi_trajectories(pi_dir=None):
51
+ """Yield all pi coding agent session JSONL paths."""
52
+ if pi_dir:
53
+ d = Path(pi_dir) / "sessions"
54
+ else:
55
+ env = os.environ.get("PI_CODING_AGENT_DIR")
56
+ d = Path(env) / "sessions" if env else Path.home() / ".pi" / "agent" / "sessions"
57
+ if d.is_dir():
58
+ yield from sorted(d.rglob("*.jsonl"))
59
+
60
+
61
+ def iter_cursor_trajectories(cursor_dir=None):
62
+ """Yield all Cursor trajectory JSONL paths."""
63
+ d = Path(cursor_dir) if cursor_dir else Path.home() / ".cursor"
64
+ if not d.is_dir():
65
+ return
66
+ seen = set()
67
+ for pattern in ("sessions/**/*.jsonl", "projects/**/*.jsonl"):
68
+ for p in sorted(d.glob(pattern)):
69
+ if p not in seen:
70
+ seen.add(p)
71
+ yield p
72
+
73
+
74
+ def iter_copilot_event_trajectories(copilot_dir=None):
75
+ """Yield Copilot CLI session event JSONL paths (~/.copilot/session-state/*/events.jsonl)."""
76
+ d = Path(copilot_dir) if copilot_dir else Path.home() / ".copilot"
77
+ base = d / "session-state"
78
+ if base.is_dir():
79
+ yield from sorted(base.glob("*/events.jsonl"))
80
+
81
+
82
+ def iter_agent_probe_trajectories(agent_probe_dir=None):
83
+ """Yield all agent_probe session JSONL paths (~/.local/share/agent_probe/*/*/*. jsonl)."""
84
+ d = (
85
+ Path(agent_probe_dir)
86
+ if agent_probe_dir
87
+ else Path.home() / ".local" / "share" / "agent_probe"
88
+ )
89
+ if d.is_dir():
90
+ yield from sorted(d.glob("*/*/*.jsonl"))
91
+
92
+
93
+ def iter_opencode_sessions(opencode_dir=None):
94
+ """Yield (id, updated_at_ms, model_json, directory, first_prompt) from the opencode SQLite store."""
95
+ d = (
96
+ Path(opencode_dir)
97
+ if opencode_dir
98
+ else Path.home() / ".local" / "share" / "opencode"
99
+ )
100
+ db = d / "opencode.db"
101
+ if not db.exists():
102
+ return
103
+ try:
104
+ conn = sqlite3.connect(str(db))
105
+ try:
106
+ rows = conn.execute(
107
+ "SELECT id, time_updated, model, directory FROM session ORDER BY time_updated DESC"
108
+ ).fetchall()
109
+ for session_id, ts_ms, model_json, directory in rows:
110
+ first_prompt = ""
111
+ try:
112
+ row = conn.execute(
113
+ """
114
+ SELECT p.data
115
+ FROM message m
116
+ JOIN part p ON m.id = p.message_id
117
+ WHERE m.session_id = ? AND json_extract(m.data, '$.role') = 'user'
118
+ ORDER BY m.time_created, p.time_created
119
+ LIMIT 1
120
+ """,
121
+ (session_id,),
122
+ ).fetchone()
123
+ if row:
124
+ first_prompt = json.loads(row[0]).get("text", "").strip()
125
+ except Exception:
126
+ pass
127
+ yield (session_id, ts_ms, model_json, directory, first_prompt)
128
+ finally:
129
+ conn.close()
130
+ except Exception:
131
+ return
132
+
133
+
134
+ def iter_codex_db_sessions(codex_dir=None):
135
+ """Yield (id, updated_at_ms, first_user_message, model_provider, model, cwd) from ~/.codex/state_5.sqlite."""
136
+ d = Path(codex_dir) if codex_dir else Path.home() / ".codex"
137
+ db = d / "state_5.sqlite"
138
+ if not db.exists():
139
+ return
140
+ try:
141
+ conn = sqlite3.connect(str(db))
142
+ try:
143
+ rows = conn.execute(
144
+ "SELECT id, updated_at_ms, first_user_message, model_provider, model, cwd"
145
+ " FROM threads ORDER BY updated_at_ms DESC"
146
+ ).fetchall()
147
+ yield from rows
148
+ finally:
149
+ conn.close()
150
+ except Exception:
151
+ return
152
+
153
+
154
+ def _extract_content_text(content) -> str:
155
+ if isinstance(content, str):
156
+ return content.strip()
157
+ if isinstance(content, list):
158
+ return " ".join(
159
+ b.get("text", "")
160
+ for b in content
161
+ if isinstance(b, dict) and b.get("type") == "text"
162
+ ).strip()
163
+ return ""
164
+
165
+
166
+ def get_first_user_message_claude(jsonl_path) -> tuple[str, str]:
167
+ """Return (timestamp, first_user_text) from a Claude Code trajectory JSONL."""
168
+ timestamp = ""
169
+ try:
170
+ with open(Path(jsonl_path), encoding="utf-8") as f:
171
+ meta_prompt_ids: set = set()
172
+ for line in f:
173
+ line = line.strip()
174
+ if not line:
175
+ continue
176
+ try:
177
+ d = json.loads(line)
178
+ except json.JSONDecodeError:
179
+ continue
180
+ if not timestamp:
181
+ timestamp = d.get("timestamp", "")
182
+ if d.get("isMeta"):
183
+ meta_prompt_ids.add(d.get("promptId", ""))
184
+ continue
185
+ if d.get("type") == "user":
186
+ if d.get("promptId") in meta_prompt_ids:
187
+ continue
188
+ text = _extract_content_text(d.get("message", {}).get("content", ""))
189
+ if text:
190
+ return timestamp, text
191
+ except OSError:
192
+ pass
193
+ return timestamp, ""
194
+
195
+
196
+ def get_first_user_message_copilot(jsonl_path) -> tuple[str, str]:
197
+ """Return (timestamp, first_user_text) from a Copilot events JSONL."""
198
+ timestamp = ""
199
+ try:
200
+ with open(Path(jsonl_path), encoding="utf-8") as f:
201
+ for line in f:
202
+ line = line.strip()
203
+ if not line:
204
+ continue
205
+ try:
206
+ d = json.loads(line)
207
+ except json.JSONDecodeError:
208
+ continue
209
+ if not timestamp:
210
+ timestamp = d.get("timestamp", "")
211
+ if d.get("type") == "user.message":
212
+ text = _extract_content_text(d.get("data", {}).get("content", ""))
213
+ if text:
214
+ return timestamp, text
215
+ except OSError:
216
+ pass
217
+ return timestamp, ""
218
+
219
+
220
+ def get_first_user_message_agent_probe(jsonl_path) -> tuple[str, str]:
221
+ """Return (timestamp, first_user_text) from an agent_probe trajectory JSONL."""
222
+ timestamp = ""
223
+ try:
224
+ with open(Path(jsonl_path), encoding="utf-8") as f:
225
+ for line in f:
226
+ line = line.strip()
227
+ if not line:
228
+ continue
229
+ try:
230
+ d = json.loads(line)
231
+ except json.JSONDecodeError:
232
+ continue
233
+ if not timestamp:
234
+ timestamp = d.get("timestamp", "")
235
+ event_type = d.get("type")
236
+ if event_type == "user":
237
+ content = d.get("message", {}).get("content", "")
238
+ elif event_type == "user.message":
239
+ content = d.get("data", {}).get("content", "")
240
+ else:
241
+ continue
242
+ text = _extract_content_text(content)
243
+ if text:
244
+ return timestamp, text
245
+ except OSError:
246
+ pass
247
+ return timestamp, ""
248
+
249
+
250
+ def get_first_user_message(jsonl_path) -> tuple[str, str]:
251
+ """Return (timestamp, first_user_text), dispatching by trajectory source."""
252
+ path = Path(jsonl_path)
253
+ if path.is_relative_to(Path.home() / ".claude"):
254
+ return get_first_user_message_claude(path)
255
+ if path.is_relative_to(Path.home() / ".copilot"):
256
+ return get_first_user_message_copilot(path)
257
+ if path.is_relative_to(Path.home() / ".local" / "share" / "agent_probe"):
258
+ return get_first_user_message_agent_probe(path)
259
+ return "", ""
260
+
261
+
262
+ def get_cwd_from_trajectory(jsonl_path) -> str:
263
+ """Extract the working directory from a JSONL trajectory file."""
264
+ try:
265
+ with open(Path(jsonl_path), encoding="utf-8") as f:
266
+ for i, line in enumerate(f):
267
+ if i > 30:
268
+ break
269
+ line = line.strip()
270
+ if not line:
271
+ continue
272
+ try:
273
+ d = json.loads(line)
274
+ except json.JSONDecodeError:
275
+ continue
276
+ for key in ("cwd", "workingDirectory", "working_directory"):
277
+ val = d.get(key)
278
+ if val and isinstance(val, str):
279
+ return val
280
+ except OSError:
281
+ pass
282
+ return ""
283
+
284
+
285
+ def iter_copilot_sessions(copilot_dir=None):
286
+ """Yield (session_id, created_at) pairs from the Copilot CLI SQLite store."""
287
+ d = Path(copilot_dir) if copilot_dir else Path.home() / ".copilot"
288
+ db = d / "session-store.db"
289
+ if not db.exists():
290
+ return
291
+ try:
292
+ conn = sqlite3.connect(str(db))
293
+ try:
294
+ rows = conn.execute("SELECT id, created_at FROM sessions").fetchall()
295
+ yield from rows
296
+ finally:
297
+ conn.close()
298
+ except Exception:
299
+ return
300
+
301
+
302
+ # ── Trajectory parsing ────────────────────────────────────────────────────────
303
+
304
+ @dataclass
305
+ class ParsedTrajectory:
306
+ steps: list[dict] = field(default_factory=list)
307
+ session_id: str | None = None
308
+ model_name: str | None = None
309
+ agent_version: str | None = None
310
+ total_prompt_tokens: int = 0
311
+ total_completion_tokens: int = 0
312
+ total_cached_tokens: int = 0
313
+ total_tool_calls: int = 0
314
+ extra_agent: dict = field(default_factory=dict)
315
+
316
+
317
+ def _truncate(text: str, limit: int = 4000) -> str:
318
+ if len(text) <= limit:
319
+ return text
320
+ return text[:limit] + f"\n… [{len(text) - limit} chars truncated]"
321
+
322
+
323
+ def _cc_extract_text(content: object) -> str:
324
+ if isinstance(content, str):
325
+ return content
326
+ if isinstance(content, list):
327
+ parts: list[str] = []
328
+ for part in content:
329
+ if isinstance(part, str):
330
+ parts.append(part)
331
+ elif isinstance(part, dict):
332
+ if part.get("type") == "text":
333
+ parts.append(part.get("text", ""))
334
+ elif part.get("type") == "image":
335
+ parts.append("[image]")
336
+ return "\n".join(p for p in parts if p)
337
+ return ""
338
+
339
+
340
+ def _cc_tool_result_text(content: object) -> str:
341
+ if isinstance(content, str):
342
+ return content
343
+ if isinstance(content, list):
344
+ return "\n".join(
345
+ p.get("text", "") for p in content
346
+ if isinstance(p, dict) and p.get("type") == "text"
347
+ )
348
+ return str(content) if content is not None else ""
349
+
350
+
351
+ def _cc_is_pure_tool_result(content: object) -> bool:
352
+ return (
353
+ isinstance(content, list)
354
+ and bool(content)
355
+ and all(isinstance(p, dict) and p.get("type") == "tool_result" for p in content)
356
+ )
357
+
358
+
359
+ def parse_claude_trajectory(jsonl_path: Path, fallback_timestamp: str = "") -> ParsedTrajectory:
360
+ """Parse a Claude Code project JSONL trajectory file."""
361
+ entries: list[dict] = []
362
+ with Path(jsonl_path).open(encoding="utf-8") as fh:
363
+ for raw in fh:
364
+ raw = raw.strip()
365
+ if raw:
366
+ try:
367
+ entries.append(json.loads(raw))
368
+ except json.JSONDecodeError:
369
+ continue
370
+
371
+ session_id: str | None = None
372
+ model_name: str | None = None
373
+ agent_version: str | None = None
374
+
375
+ for entry in entries:
376
+ if not session_id and "sessionId" in entry:
377
+ session_id = entry["sessionId"]
378
+ if entry.get("type") == "assistant":
379
+ msg = entry.get("message") or {}
380
+ if not model_name and msg.get("model"):
381
+ model_name = msg["model"]
382
+ if not agent_version and entry.get("version"):
383
+ agent_version = entry["version"]
384
+
385
+ tool_results: dict[str, str] = {}
386
+ for entry in entries:
387
+ if entry.get("type") != "user":
388
+ continue
389
+ msg = entry.get("message") or {}
390
+ content = msg.get("content", [])
391
+ if not isinstance(content, list):
392
+ continue
393
+ for part in content:
394
+ if isinstance(part, dict) and part.get("type") == "tool_result":
395
+ tid = part.get("tool_use_id", "")
396
+ if tid:
397
+ tool_results[tid] = _cc_tool_result_text(part.get("content", ""))
398
+
399
+ steps: list[dict] = []
400
+ step_id = 0
401
+ total_tool_calls = 0
402
+ total_prompt = total_completion = total_cached = 0
403
+
404
+ for entry in entries:
405
+ entry_type = entry.get("type")
406
+ timestamp: str = entry.get("timestamp") or fallback_timestamp
407
+
408
+ if entry_type == "user":
409
+ msg = entry.get("message") or {}
410
+ content = msg.get("content", [])
411
+ if _cc_is_pure_tool_result(content):
412
+ continue
413
+ text = _cc_extract_text(content)
414
+ if not text.strip():
415
+ continue
416
+ step_id += 1
417
+ steps.append({"step_id": step_id, "timestamp": timestamp,
418
+ "source": "user", "message": text.strip()})
419
+
420
+ elif entry_type == "assistant":
421
+ msg = entry.get("message") or {}
422
+ content = msg.get("content") or []
423
+ if not isinstance(content, list):
424
+ content = []
425
+
426
+ text_parts: list[str] = []
427
+ reasoning: str | None = None
428
+ tool_calls: list[dict] = []
429
+
430
+ for part in content:
431
+ if not isinstance(part, dict):
432
+ continue
433
+ ptype = part.get("type")
434
+ if ptype == "text":
435
+ text_parts.append(part.get("text", ""))
436
+ elif ptype == "thinking":
437
+ reasoning = part.get("thinking", "")
438
+ elif ptype == "tool_use":
439
+ tool_calls.append({
440
+ "tool_call_id": part.get("id", ""),
441
+ "function_name": part.get("name", ""),
442
+ "arguments": part.get("input") or {},
443
+ })
444
+ total_tool_calls += 1
445
+
446
+ usage = msg.get("usage") or {}
447
+ prompt_tokens = (
448
+ (usage.get("input_tokens") or 0)
449
+ + (usage.get("cache_creation_input_tokens") or 0)
450
+ + (usage.get("cache_read_input_tokens") or 0)
451
+ )
452
+ completion_tokens = usage.get("output_tokens") or 0
453
+ cached_tokens = usage.get("cache_read_input_tokens") or 0
454
+ total_prompt += prompt_tokens
455
+ total_completion += completion_tokens
456
+ total_cached += cached_tokens
457
+
458
+ observation: dict | None = None
459
+ if tool_calls:
460
+ results = [
461
+ {"source_call_id": tc["tool_call_id"],
462
+ "content": _truncate(tool_results[tc["tool_call_id"]])}
463
+ for tc in tool_calls
464
+ if tc["tool_call_id"] in tool_results
465
+ ]
466
+ if results:
467
+ observation = {"results": results}
468
+
469
+ step: dict = {
470
+ "step_id": step_id + 1,
471
+ "timestamp": timestamp,
472
+ "source": "agent",
473
+ "message": "\n".join(text_parts).strip(),
474
+ }
475
+ step_id += 1
476
+ if reasoning:
477
+ step["reasoning_content"] = reasoning
478
+ if tool_calls:
479
+ step["tool_calls"] = tool_calls
480
+ if observation:
481
+ step["observation"] = observation
482
+ if prompt_tokens or completion_tokens:
483
+ step["metrics"] = {
484
+ "prompt_tokens": prompt_tokens,
485
+ "completion_tokens": completion_tokens,
486
+ "cached_tokens": cached_tokens,
487
+ }
488
+ steps.append(step)
489
+
490
+ return ParsedTrajectory(
491
+ session_id=session_id,
492
+ model_name=model_name,
493
+ agent_version=agent_version,
494
+ steps=steps,
495
+ total_prompt_tokens=total_prompt,
496
+ total_completion_tokens=total_completion,
497
+ total_cached_tokens=total_cached,
498
+ total_tool_calls=total_tool_calls,
499
+ )
500
+
501
+
502
+ def parse_codex_trajectory(jsonl_path: Path, fallback_timestamp: str = "") -> ParsedTrajectory:
503
+ """Parse a Codex rollout-*.jsonl trajectory file."""
504
+ entries: list[dict] = []
505
+ with Path(jsonl_path).open(encoding="utf-8") as fh:
506
+ for raw in fh:
507
+ raw = raw.strip()
508
+ if raw:
509
+ try:
510
+ entries.append(json.loads(raw))
511
+ except json.JSONDecodeError:
512
+ continue
513
+
514
+ session_id: str | None = None
515
+ model_name: str | None = None
516
+ cli_version: str | None = None
517
+
518
+ for entry in entries:
519
+ t = entry.get("type", "")
520
+ p = entry.get("payload") or {}
521
+ if t == "session_meta":
522
+ session_id = p.get("id")
523
+ cli_version = p.get("cli_version")
524
+ if t == "turn_context" and p.get("model") and not model_name:
525
+ model_name = p["model"]
526
+
527
+ tool_results: dict[str, str] = {}
528
+ for entry in entries:
529
+ if entry.get("type") != "response_item":
530
+ continue
531
+ p = entry.get("payload") or {}
532
+ pt = p.get("type", "")
533
+ if pt in ("function_call_output", "custom_tool_call_output"):
534
+ call_id = p.get("call_id", "")
535
+ if call_id:
536
+ tool_results[call_id] = str(p.get("output", ""))
537
+
538
+ steps: list[dict] = []
539
+ step_id = 0
540
+ total_tool_calls = 0
541
+ total_prompt = total_completion = total_cached = 0
542
+
543
+ pending: dict | None = None
544
+
545
+ def _flush_pending() -> None:
546
+ nonlocal pending
547
+ if pending is None:
548
+ return
549
+ tool_calls: list[dict] = pending.get("tool_calls", [])
550
+ if tool_calls:
551
+ results = [
552
+ {"source_call_id": tc["tool_call_id"],
553
+ "content": _truncate(tool_results[tc["tool_call_id"]])}
554
+ for tc in tool_calls
555
+ if tc["tool_call_id"] in tool_results
556
+ ]
557
+ if results:
558
+ pending["observation"] = {"results": results}
559
+ if not tool_calls:
560
+ pending.pop("tool_calls", None)
561
+ steps.append(pending)
562
+ pending = None
563
+
564
+ for entry in entries:
565
+ t = entry.get("type", "")
566
+ p = entry.get("payload") or {}
567
+ pt = p.get("type", "")
568
+ ts = entry.get("timestamp") or fallback_timestamp
569
+
570
+ if t == "event_msg" and pt == "user_message":
571
+ _flush_pending()
572
+ text = (p.get("message") or "").strip()
573
+ if text:
574
+ step_id += 1
575
+ steps.append({"step_id": step_id, "timestamp": ts,
576
+ "source": "user", "message": text})
577
+
578
+ elif t == "response_item" and pt == "message" and p.get("role") == "assistant":
579
+ text = "".join(
580
+ part.get("text", "")
581
+ for part in (p.get("content") or [])
582
+ if isinstance(part, dict) and part.get("type") == "output_text"
583
+ ).strip()
584
+ if pending is None:
585
+ step_id += 1
586
+ pending = {"step_id": step_id, "timestamp": ts,
587
+ "source": "agent", "message": text, "tool_calls": []}
588
+ elif text:
589
+ existing = pending.get("message", "")
590
+ pending["message"] = (existing + "\n" + text).strip()
591
+
592
+ elif t == "response_item" and pt in ("function_call", "custom_tool_call"):
593
+ call_id = p.get("call_id", "")
594
+ name = p.get("name", "")
595
+ if pt == "function_call":
596
+ try:
597
+ arguments: object = json.loads(p.get("arguments") or "{}")
598
+ except (json.JSONDecodeError, TypeError):
599
+ arguments = {"raw": p.get("arguments", "")}
600
+ else:
601
+ arguments = {"input": p.get("input", "")}
602
+
603
+ if pending is None:
604
+ step_id += 1
605
+ pending = {"step_id": step_id, "timestamp": ts,
606
+ "source": "agent", "message": "", "tool_calls": []}
607
+ pending["tool_calls"].append({
608
+ "tool_call_id": call_id,
609
+ "function_name": name,
610
+ "arguments": arguments,
611
+ })
612
+ total_tool_calls += 1
613
+
614
+ elif t == "event_msg" and pt == "token_count":
615
+ tu = (p.get("info") or {}).get("total_token_usage") or {}
616
+ total_prompt = max(total_prompt, tu.get("input_tokens") or 0)
617
+ total_completion = max(total_completion, tu.get("output_tokens") or 0)
618
+ total_cached = max(total_cached, tu.get("cached_input_tokens") or 0)
619
+ if pending is not None:
620
+ lu = (p.get("info") or {}).get("last_token_usage") or {}
621
+ if lu:
622
+ pending["metrics"] = {
623
+ "prompt_tokens": lu.get("input_tokens") or 0,
624
+ "completion_tokens": lu.get("output_tokens") or 0,
625
+ "cached_tokens": lu.get("cached_input_tokens") or 0,
626
+ }
627
+
628
+ elif t == "event_msg" and pt in ("task_complete", "turn_aborted"):
629
+ _flush_pending()
630
+
631
+ _flush_pending()
632
+
633
+ return ParsedTrajectory(
634
+ session_id=session_id,
635
+ model_name=model_name,
636
+ agent_version=cli_version,
637
+ steps=steps,
638
+ total_prompt_tokens=total_prompt,
639
+ total_completion_tokens=total_completion,
640
+ total_cached_tokens=total_cached,
641
+ total_tool_calls=total_tool_calls,
642
+ )
643
+
644
+
645
+ def parse_copilot_trajectory(db_path: Path, session_id: str, fallback_timestamp: str = "") -> ParsedTrajectory:
646
+ """Parse a GitHub Copilot CLI session from the SQLite session store."""
647
+ conn = sqlite3.connect(str(db_path))
648
+ try:
649
+ session_row = conn.execute(
650
+ "SELECT cwd, repository, branch, summary, created_at FROM sessions WHERE id=?",
651
+ (session_id,),
652
+ ).fetchone()
653
+ if not session_row:
654
+ return ParsedTrajectory()
655
+ _cwd, repository, _branch, summary, _created_at = session_row
656
+
657
+ turns = conn.execute(
658
+ "SELECT turn_index, user_message, assistant_response, timestamp "
659
+ "FROM turns WHERE session_id=? ORDER BY turn_index",
660
+ (session_id,),
661
+ ).fetchall()
662
+ finally:
663
+ conn.close()
664
+
665
+ steps: list[dict] = []
666
+ step_id = 0
667
+
668
+ for _turn_idx, user_msg, asst_resp, ts in turns:
669
+ ts = ts or fallback_timestamp
670
+ if user_msg and user_msg.strip():
671
+ step_id += 1
672
+ steps.append({"step_id": step_id, "timestamp": ts,
673
+ "source": "user", "message": user_msg.strip()})
674
+ if asst_resp and asst_resp.strip():
675
+ step_id += 1
676
+ steps.append({"step_id": step_id, "timestamp": ts,
677
+ "source": "agent", "message": asst_resp.strip()})
678
+
679
+ return ParsedTrajectory(
680
+ steps=steps,
681
+ extra_agent={
682
+ "copilot_repository": repository or "",
683
+ "copilot_summary": summary or "",
684
+ },
685
+ )
trajectoriz/cli.py ADDED
@@ -0,0 +1,377 @@
1
+ #!/usr/bin/env python3
2
+ """trajectoriz-cli: search and browse past agent trajectories."""
3
+
4
+ import argparse
5
+ import hashlib
6
+ import json
7
+ import math
8
+ import os
9
+ import sys
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Iterator, Optional
13
+
14
+ import trajectoriz as tz
15
+
16
+
17
+ DEFAULT_SHOW_PAGE_SIZE = 20 # steps per page
18
+ DEFAULT_LIST_PAGE_SIZE = 50 # trajectories per page
19
+
20
+
21
+ @dataclass
22
+ class TrajRecord:
23
+ id: str
24
+ agent: str
25
+ timestamp: str
26
+ first_msg: str
27
+ source: object # Path for JSONL files; dict for DB sessions
28
+
29
+
30
+ def _short_id(prefix: str, key: str) -> str:
31
+ return f"{prefix}-{hashlib.sha256(key.encode()).hexdigest()[:8]}"
32
+
33
+
34
+ def _codex_first_user_message(path: Path) -> tuple[str, str]:
35
+ ts = ""
36
+ try:
37
+ with path.open(encoding="utf-8") as f:
38
+ for line in f:
39
+ line = line.strip()
40
+ if not line:
41
+ continue
42
+ try:
43
+ d = json.loads(line)
44
+ except json.JSONDecodeError:
45
+ continue
46
+ if not ts:
47
+ ts = d.get("timestamp", "")
48
+ if d.get("type") == "event_msg":
49
+ p = d.get("payload") or {}
50
+ if p.get("type") == "user_message":
51
+ msg = (p.get("message") or "").strip()
52
+ if msg:
53
+ return ts, msg
54
+ except OSError:
55
+ pass
56
+ return ts, ""
57
+
58
+
59
+ def _cwd_matches(cwd_field: str, target: str) -> bool:
60
+ """True if cwd_field is target or a subdirectory of target."""
61
+ if not cwd_field:
62
+ return False
63
+ try:
64
+ return Path(cwd_field) == Path(target) or Path(cwd_field).is_relative_to(Path(target))
65
+ except (ValueError, TypeError):
66
+ return False
67
+
68
+
69
+ def _local_records(cwd: str) -> Iterator[TrajRecord]:
70
+ """Yield only trajectories whose working directory is cwd or a subdirectory."""
71
+ for p in tz.iter_claude_project_trajectories(cwd):
72
+ ts, msg = tz.get_first_user_message_claude(p)
73
+ yield TrajRecord(_short_id("cl", str(p)), "claude", ts, msg, p)
74
+
75
+ for p in tz.iter_codex_rollout_files():
76
+ if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
77
+ ts, msg = _codex_first_user_message(p)
78
+ yield TrajRecord(_short_id("cx", str(p)), "codex", ts, msg, p)
79
+
80
+ for p in tz.iter_copilot_event_trajectories():
81
+ if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
82
+ ts, msg = tz.get_first_user_message_copilot(p)
83
+ yield TrajRecord(_short_id("cp", str(p)), "copilot", ts, msg, p)
84
+
85
+ for p in tz.iter_agent_probe_trajectories():
86
+ if _cwd_matches(tz.get_cwd_from_trajectory(p), cwd):
87
+ ts, msg = tz.get_first_user_message_agent_probe(p)
88
+ yield TrajRecord(_short_id("ap", str(p)), "agent_probe", ts, msg, p)
89
+
90
+ for session_id, ts_ms, model_json, directory, first_prompt in tz.iter_opencode_sessions():
91
+ if _cwd_matches(directory, cwd):
92
+ yield TrajRecord(
93
+ _short_id("oc", session_id),
94
+ "opencode",
95
+ str(ts_ms),
96
+ first_prompt,
97
+ {"type": "opencode", "session_id": session_id, "model": model_json, "dir": directory},
98
+ )
99
+
100
+ for row in tz.iter_codex_db_sessions():
101
+ sid, updated_ms, first_msg, _, model, rec_cwd = row
102
+ if _cwd_matches(rec_cwd, cwd):
103
+ yield TrajRecord(
104
+ _short_id("cd", str(sid)),
105
+ "codex_db",
106
+ str(updated_ms),
107
+ first_msg or "",
108
+ {"type": "codex_db", "session_id": sid, "model": model, "cwd": rec_cwd},
109
+ )
110
+
111
+
112
+ def _all_records() -> Iterator[TrajRecord]:
113
+ for p in tz.iter_claude_trajectories():
114
+ ts, msg = tz.get_first_user_message_claude(p)
115
+ yield TrajRecord(_short_id("cl", str(p)), "claude", ts, msg, p)
116
+
117
+ for p in tz.iter_codex_rollout_files():
118
+ ts, msg = _codex_first_user_message(p)
119
+ yield TrajRecord(_short_id("cx", str(p)), "codex", ts, msg, p)
120
+
121
+ for p in tz.iter_copilot_event_trajectories():
122
+ ts, msg = tz.get_first_user_message_copilot(p)
123
+ yield TrajRecord(_short_id("cp", str(p)), "copilot", ts, msg, p)
124
+
125
+ for p in tz.iter_agent_probe_trajectories():
126
+ ts, msg = tz.get_first_user_message_agent_probe(p)
127
+ yield TrajRecord(_short_id("ap", str(p)), "agent_probe", ts, msg, p)
128
+
129
+ for session_id, ts_ms, model_json, directory, first_prompt in tz.iter_opencode_sessions():
130
+ yield TrajRecord(
131
+ _short_id("oc", session_id),
132
+ "opencode",
133
+ str(ts_ms),
134
+ first_prompt,
135
+ {"type": "opencode", "session_id": session_id, "model": model_json, "dir": directory},
136
+ )
137
+
138
+ for row in tz.iter_codex_db_sessions():
139
+ sid, updated_ms, first_msg, _, model, cwd = row
140
+ yield TrajRecord(
141
+ _short_id("cd", str(sid)),
142
+ "codex_db",
143
+ str(updated_ms),
144
+ first_msg or "",
145
+ {"type": "codex_db", "session_id": sid, "model": model, "cwd": cwd},
146
+ )
147
+
148
+ copilot_db = Path.home() / ".copilot" / "session-store.db"
149
+ if copilot_db.exists():
150
+ for session_id, created_at in tz.iter_copilot_sessions():
151
+ yield TrajRecord(
152
+ _short_id("gh", str(session_id)),
153
+ "copilot_db",
154
+ str(created_at or ""),
155
+ "",
156
+ {"type": "copilot_db", "session_id": session_id, "db_path": str(copilot_db)},
157
+ )
158
+
159
+
160
+ def _render_step(step: dict) -> str:
161
+ lines: list[str] = []
162
+ role = "USER" if step["source"] == "user" else "AGENT"
163
+ ts_suffix = f" *{step['timestamp'][:19]}*" if step.get("timestamp") else ""
164
+ lines.append(f"---\n## Step {step['step_id']} — {role}{ts_suffix}\n")
165
+ if step.get("message"):
166
+ lines.append(step["message"])
167
+ lines.append("")
168
+ for tc in step.get("tool_calls", []):
169
+ args_str = json.dumps(tc.get("arguments", {}), indent=2)
170
+ if len(args_str) > 600:
171
+ args_str = args_str[:600] + "\n…"
172
+ lines.append(f"**Tool call:** `{tc['function_name']}`")
173
+ lines.append(f"```json\n{args_str}\n```\n")
174
+ for res in (step.get("observation") or {}).get("results", []):
175
+ content = res.get("content", "")
176
+ if len(content) > 1000:
177
+ content = content[:1000] + "\n…"
178
+ lines.append(f"**Tool result:**\n```\n{content}\n```\n")
179
+ return "\n".join(lines)
180
+
181
+
182
+ def _trajectory_header_and_steps(record: TrajRecord) -> tuple[str, list[str]]:
183
+ """Return (header_markdown, list_of_rendered_steps)."""
184
+ hlines: list[str] = []
185
+ hlines.append(f"# Trajectory `{record.id}`")
186
+ hlines.append(f"**Agent:** {record.agent}")
187
+ if record.timestamp:
188
+ hlines.append(f"**Date:** {record.timestamp[:19]}")
189
+
190
+ if isinstance(record.source, Path):
191
+ if record.agent == "claude":
192
+ traj = tz.parse_claude_trajectory(record.source)
193
+ elif record.agent == "codex":
194
+ traj = tz.parse_codex_trajectory(record.source)
195
+ else:
196
+ hlines.append("\n*Full trajectory parsing not supported for this agent type.*")
197
+ return "\n".join(hlines), []
198
+ elif isinstance(record.source, dict):
199
+ src_type = record.source["type"]
200
+ if src_type == "copilot_db":
201
+ db_path = Path(record.source["db_path"])
202
+ traj = tz.parse_copilot_trajectory(db_path, record.source["session_id"])
203
+ else:
204
+ hlines.append(f"**Session ID:** {record.source.get('session_id', '')}")
205
+ if record.source.get("model"):
206
+ hlines.append(f"**Model:** {record.source['model']}")
207
+ d = record.source.get("dir") or record.source.get("cwd") or ""
208
+ if d:
209
+ hlines.append(f"**Directory:** {d}")
210
+ hlines.append("\n*Full trajectory parsing not available for this agent type.*")
211
+ return "\n".join(hlines), []
212
+ else:
213
+ return "\n".join(hlines), []
214
+
215
+ hlines.append(f"**Steps:** {len(traj.steps)}")
216
+ if traj.model_name:
217
+ hlines.append(f"**Model:** {traj.model_name}")
218
+ if traj.total_tool_calls:
219
+ hlines.append(f"**Tool calls:** {traj.total_tool_calls}")
220
+ if traj.total_prompt_tokens:
221
+ hlines.append(
222
+ f"**Tokens:** {traj.total_prompt_tokens} prompt / "
223
+ f"{traj.total_completion_tokens} completion"
224
+ )
225
+
226
+ return "\n".join(hlines), [_render_step(s) for s in traj.steps]
227
+
228
+
229
+ def _paginate_items(
230
+ items: list[str], page: int, page_size: int, header: str, unit: str, footer: str = ""
231
+ ) -> None:
232
+ total = len(items)
233
+ total_pages = max(1, math.ceil(total / page_size))
234
+ if page < 1 or page > total_pages:
235
+ print(f"Error: page {page} out of range (1–{total_pages}).", file=sys.stderr)
236
+ sys.exit(1)
237
+ start = (page - 1) * page_size
238
+ chunk = items[start : start + page_size]
239
+ showing_end = min(start + page_size, total)
240
+ print(
241
+ f"<!-- trajectoriz | page {page}/{total_pages} | "
242
+ f"{unit} {start + 1}–{showing_end} of {total} -->"
243
+ )
244
+ if header:
245
+ print(header)
246
+ print("\n".join(chunk))
247
+ if footer and page == total_pages:
248
+ print(footer)
249
+ if page < total_pages:
250
+ remaining = total_pages - page
251
+ print(f"\n<!-- {remaining} more page(s) — run with --page {page + 1} to continue -->")
252
+
253
+
254
+ # ── Commands ──────────────────────────────────────────────────────────────────
255
+
256
+
257
+ def _record_row(rec: TrajRecord) -> str:
258
+ date = rec.timestamp[:10] if rec.timestamp else "—"
259
+ snippet = (rec.first_msg or "")[:80].replace("|", "\\|").replace("\n", " ")
260
+ return f"| `{rec.id}` | {rec.agent} | {date} | {snippet} |"
261
+
262
+
263
+ def cmd_list(args) -> None:
264
+ source = _all_records() if args.all else _local_records(os.getcwd())
265
+ records = sorted(source, key=lambda r: r.timestamp, reverse=True)
266
+ if not records:
267
+ print("No trajectories found.")
268
+ return
269
+ header = (
270
+ f"## All trajectories ({len(records)} total)\n\n"
271
+ "| ID | Agent | Date | First message |\n"
272
+ "|---|---|---|---|"
273
+ )
274
+ rows = [_record_row(r) for r in records]
275
+ _paginate_items(rows, args.page, args.page_size, header, "trajectories",
276
+ footer="\nUse `trajectoriz-cli show <id>` to view a trajectory.")
277
+
278
+
279
+ def cmd_search(args) -> None:
280
+ query = args.query.lower()
281
+ source = _all_records() if args.all else _local_records(os.getcwd())
282
+ records = [
283
+ rec
284
+ for rec in source
285
+ if query in (rec.first_msg or "").lower()
286
+ or query in rec.id.lower()
287
+ or query in rec.agent.lower()
288
+ ]
289
+ records.sort(key=lambda r: r.timestamp, reverse=True)
290
+
291
+ if not records:
292
+ print(f"No trajectories found matching `{args.query}`.")
293
+ return
294
+ header = (
295
+ f"## Search: `{args.query}` — {len(records)} result(s)\n\n"
296
+ "| ID | Agent | Date | First message |\n"
297
+ "|---|---|---|---|"
298
+ )
299
+ rows = [_record_row(r) for r in records]
300
+ _paginate_items(rows, args.page, args.page_size, header, "trajectories",
301
+ footer="\nUse `trajectoriz-cli show <id>` to view a trajectory.")
302
+
303
+
304
+ def cmd_show(args) -> None:
305
+ target = args.id
306
+ record: Optional[TrajRecord] = None
307
+ for rec in _all_records():
308
+ if rec.id == target:
309
+ record = rec
310
+ break
311
+
312
+ if record is None:
313
+ print(f"Error: trajectory `{target}` not found.", file=sys.stderr)
314
+ sys.exit(1)
315
+
316
+ header, steps = _trajectory_header_and_steps(record)
317
+ _paginate_items(steps, args.page, args.page_size, header, "steps")
318
+
319
+
320
+ # ── Entry point ───────────────────────────────────────────────────────────────
321
+
322
+
323
+ def main() -> None:
324
+ parser = argparse.ArgumentParser(
325
+ prog="trajectoriz-cli",
326
+ description="Search and browse past agent trajectories.",
327
+ )
328
+
329
+ sub = parser.add_subparsers(dest="command", metavar="<command>")
330
+ sub.required = True
331
+
332
+ # list
333
+ p_list = sub.add_parser("list", help="List trajectories (current directory by default).")
334
+ p_list.add_argument("--page", type=int, default=1, metavar="N")
335
+ p_list.add_argument(
336
+ "--page-size", type=int, default=DEFAULT_LIST_PAGE_SIZE, metavar="N",
337
+ help=f"Trajectories per page (default: {DEFAULT_LIST_PAGE_SIZE})",
338
+ )
339
+ p_list.add_argument("--all", action="store_true", help="Include all agents/directories.")
340
+ p_list.set_defaults(func=cmd_list)
341
+
342
+ # search
343
+ p_search = sub.add_parser(
344
+ "search",
345
+ help="Search trajectories by keyword (current directory by default).",
346
+ )
347
+ p_search.add_argument("query", help="Search term (case-insensitive substring).")
348
+ p_search.add_argument("--page", type=int, default=1, metavar="N")
349
+ p_search.add_argument(
350
+ "--page-size", type=int, default=DEFAULT_LIST_PAGE_SIZE, metavar="N",
351
+ help=f"Trajectories per page (default: {DEFAULT_LIST_PAGE_SIZE})",
352
+ )
353
+ p_search.add_argument("--all", action="store_true", help="Search across all agents/directories.")
354
+ p_search.set_defaults(func=cmd_search)
355
+
356
+ # show
357
+ p_show = sub.add_parser(
358
+ "show",
359
+ help="Show a trajectory in agent-readable markdown.",
360
+ )
361
+ p_show.add_argument("id", help="Trajectory ID (from list or search).")
362
+ p_show.add_argument(
363
+ "--page", type=int, default=1, metavar="N",
364
+ help="Page number (default: 1). Increment to scroll.",
365
+ )
366
+ p_show.add_argument(
367
+ "--page-size", type=int, default=DEFAULT_SHOW_PAGE_SIZE, metavar="N",
368
+ help=f"Messages (steps) per page (default: {DEFAULT_SHOW_PAGE_SIZE})",
369
+ )
370
+ p_show.set_defaults(func=cmd_show)
371
+
372
+ args = parser.parse_args()
373
+ args.func(args)
374
+
375
+
376
+ if __name__ == "__main__":
377
+ main()
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: trajectoriz
3
+ Version: 0.2.0
4
+ Summary: Locate agent trajectory files on the local machine
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Dynamic: license-file
10
+
11
+ # trajectoriz
12
+
13
+ Locate agent trajectory files on the local machine.
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install trajectoriz
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ```python
24
+ from trajectoriz import (
25
+ iter_claude_trajectories,
26
+ iter_claude_project_trajectories,
27
+ iter_codex_trajectories,
28
+ iter_codex_rollout_files,
29
+ iter_codex_db_sessions,
30
+ iter_pi_trajectories,
31
+ iter_cursor_trajectories,
32
+ iter_copilot_event_trajectories,
33
+ iter_copilot_sessions,
34
+ iter_agent_probe_trajectories,
35
+ iter_opencode_sessions,
36
+ )
37
+
38
+ # List all Claude Code trajectory files
39
+ for path in iter_claude_trajectories():
40
+ print(path)
41
+
42
+ # List Claude trajectories for a specific project
43
+ for path in iter_claude_project_trajectories("/path/to/repo"):
44
+ print(path)
45
+
46
+ # List Codex CLI session files
47
+ for path in iter_codex_trajectories():
48
+ print(path)
49
+
50
+ # List Codex CLI rollout files
51
+ for path in iter_codex_rollout_files():
52
+ print(path)
53
+
54
+ # List Codex sessions from SQLite store (~/.codex/state_5.sqlite)
55
+ for session_id, updated_at_ms, first_msg, provider, model, cwd in iter_codex_db_sessions():
56
+ print(session_id, first_msg)
57
+
58
+ # List pi coding agent session files
59
+ for path in iter_pi_trajectories():
60
+ print(path)
61
+
62
+ # List Cursor trajectory files
63
+ for path in iter_cursor_trajectories():
64
+ print(path)
65
+
66
+ # List Copilot CLI session event JSONL files (~/.copilot/session-state/*/events.jsonl)
67
+ for path in iter_copilot_event_trajectories():
68
+ print(path)
69
+
70
+ # List Copilot CLI sessions from SQLite store
71
+ for session_id, created_at in iter_copilot_sessions():
72
+ print(session_id, created_at)
73
+
74
+ # List agent_probe session JSONL files (~/.local/share/agent_probe/*/*/*)
75
+ for path in iter_agent_probe_trajectories():
76
+ print(path)
77
+
78
+ # List opencode sessions from SQLite store (~/.local/share/opencode/opencode.db)
79
+ for session_id, updated_at_ms, model_json, directory, first_prompt in iter_opencode_sessions():
80
+ print(session_id, first_prompt)
81
+ ```
82
+
83
+ ## License
84
+
85
+ MIT
@@ -0,0 +1,8 @@
1
+ trajectoriz/__init__.py,sha256=XGa8C_50P3qUJBjrb90TQok0rr5VOoNjyIwRmMCbQCk,24629
2
+ trajectoriz/cli.py,sha256=JkRdARLKxEdU-kKarvxBUgykk3p1-NMxInpqP3DNuSE,14058
3
+ trajectoriz-0.2.0.dist-info/licenses/LICENSE,sha256=Btzdu2kIoMbdSp6OyCLupB1aRgpTCJ_szMimgEnpkkE,1056
4
+ trajectoriz-0.2.0.dist-info/METADATA,sha256=IHfKq-YtLsItgDHRwx44kCWgOiH5vTIDqAMq5bl1fu8,2185
5
+ trajectoriz-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ trajectoriz-0.2.0.dist-info/entry_points.txt,sha256=YV_i-3pgUXiar4sj5R5Sn3PdBT115KMf2ztMRGZz_VY,57
7
+ trajectoriz-0.2.0.dist-info/top_level.txt,sha256=8p6CY8WukAX6dc_kuSkMHlsHjw9b5gh0k8nq0f5OFgs,12
8
+ trajectoriz-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ trajectoriz-cli = trajectoriz.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ trajectoriz