threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,178 @@
1
+ """Claude Code adapter.
2
+
3
+ Claude Code stores conversation transcripts as JSONL files under
4
+ ~/.claude/projects/<slug>/<conversation-id>.jsonl. MCP servers are
5
+ registered in ~/.claude.json under "mcpServers".
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import shutil
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Iterator
15
+
16
+ from .base import CLIAdapter, NormalizedMessage
17
+
18
+
19
+ def _ts(s: str) -> int:
20
+ try:
21
+ return int(datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp())
22
+ except Exception:
23
+ import time
24
+ return int(time.time())
25
+
26
+
27
+ def _extract_text(msg: dict) -> str:
28
+ """Pull searchable text from a message; skip tool_use args,
29
+ cap tool_results. Matches the legacy behavior pre-adapter."""
30
+ content = msg.get("content", "")
31
+ if isinstance(content, str):
32
+ return content
33
+ if not isinstance(content, list):
34
+ return ""
35
+ parts: list[str] = []
36
+ for block in content:
37
+ if not isinstance(block, dict):
38
+ continue
39
+ t = block.get("type")
40
+ if t == "text":
41
+ parts.append(block.get("text", ""))
42
+ elif t == "thinking":
43
+ parts.append(f"[thinking] {block.get('thinking', '')}")
44
+ elif t == "tool_result":
45
+ tr = block.get("content", "")
46
+ if isinstance(tr, list):
47
+ tr = " ".join(b.get("text", "") for b in tr if isinstance(b, dict))
48
+ if isinstance(tr, str) and tr:
49
+ parts.append(f"[tool_result] {tr[:800]}")
50
+ return "\n".join(p for p in parts if p)
51
+
52
+
53
+ class ClaudeCodeAdapter(CLIAdapter):
54
+ name = "claude-code"
55
+
56
+ def __init__(self) -> None:
57
+ self.projects_dir = Path(
58
+ os.environ.get("CLAUDE_PROJECTS_DIR", "~/.claude/projects")
59
+ ).expanduser()
60
+ self.config_path = Path("~/.claude.json").expanduser()
61
+ self._instructions = Path("~/.claude/CLAUDE.md").expanduser()
62
+ # Hooks live in the same settings.json that controls other
63
+ # editor preferences. Each entry under "hooks" is keyed by event
64
+ # name (SessionStart, PostToolUse, ...).
65
+ self._settings_path = Path("~/.claude/settings.json").expanduser()
66
+ # Claude auto-discovers SKILL.md files under this directory via
67
+ # frontmatter description scanning at session start. The canonical
68
+ # Anthropic skills format.
69
+ self._skills_dir = Path(
70
+ os.environ.get("CLAUDE_SKILLS_DIR", "~/.claude/skills")
71
+ ).expanduser()
72
+
73
+ def skills_dir(self):
74
+ return self._skills_dir
75
+
76
+ def instructions_path(self):
77
+ return self._instructions
78
+
79
+ def hooks_supported(self) -> bool:
80
+ return True
81
+
82
+ def register_hooks(self, specs, dry_run=False) -> str:
83
+ from ._hook_helpers import install_claude_style_hooks
84
+ return install_claude_style_hooks(
85
+ self._settings_path, specs, dry_run=dry_run,
86
+ )
87
+
88
+ # ----------------------------- detection -----------------------------
89
+ def is_installed(self) -> bool:
90
+ # Either the projects dir exists (user has used Claude Code at
91
+ # least once) OR the executable is on PATH.
92
+ if self.projects_dir.exists():
93
+ return True
94
+ return shutil.which("claude") is not None
95
+
96
+ # ----------------------------- mcp -----------------------------------
97
+ def register_mcp_server(
98
+ self, name, command, args, env, dry_run=False
99
+ ) -> str:
100
+ cfg: dict
101
+ if self.config_path.exists():
102
+ try:
103
+ cfg = json.loads(self.config_path.read_text())
104
+ except json.JSONDecodeError:
105
+ return "claude-code: malformed ~/.claude.json — refused"
106
+ else:
107
+ cfg = {}
108
+ servers = cfg.setdefault("mcpServers", {})
109
+ entry = {
110
+ "type": "stdio",
111
+ "command": command,
112
+ "args": list(args),
113
+ "env": dict(env),
114
+ }
115
+ existing = servers.get(name)
116
+ if existing == entry:
117
+ return "claude-code: already current"
118
+ servers[name] = entry
119
+ if not dry_run:
120
+ self.config_path.write_text(json.dumps(cfg, indent=2))
121
+ return f"claude-code: {'would ' if dry_run else ''}{'update' if existing else 'add'}"
122
+
123
+ def unregister_mcp_server(self, name, dry_run=False) -> str:
124
+ if not self.config_path.exists():
125
+ return "claude-code: nothing to remove"
126
+ cfg = json.loads(self.config_path.read_text())
127
+ servers = (cfg.get("mcpServers") or {})
128
+ if name not in servers:
129
+ return "claude-code: not present"
130
+ if dry_run:
131
+ return f"claude-code: would remove {name}"
132
+ servers.pop(name)
133
+ self.config_path.write_text(json.dumps(cfg, indent=2))
134
+ return f"claude-code: removed {name}"
135
+
136
+ # ----------------------------- transcripts ---------------------------
137
+ def session_dir(self):
138
+ return self.projects_dir
139
+
140
+ def transcript_files(self) -> list[Path]:
141
+ if not self.projects_dir.exists():
142
+ return []
143
+ return list(self.projects_dir.glob("**/*.jsonl"))
144
+
145
+ def iter_messages(self, fp: Path) -> Iterator[NormalizedMessage]:
146
+ try:
147
+ with fp.open("r", encoding="utf-8", errors="replace") as f:
148
+ for line in f:
149
+ line = line.strip()
150
+ if not line:
151
+ continue
152
+ try:
153
+ obj = json.loads(line)
154
+ except json.JSONDecodeError:
155
+ continue
156
+ uuid = obj.get("uuid")
157
+ if not uuid:
158
+ continue
159
+ msg = obj.get("message", {})
160
+ role = msg.get("role") or obj.get("type")
161
+ if role not in ("user", "assistant"):
162
+ continue
163
+ text = _extract_text(msg)
164
+ created = _ts(obj.get("timestamp", ""))
165
+ yield NormalizedMessage(
166
+ uuid=uuid,
167
+ session_id=obj.get("sessionId") or "",
168
+ role=role,
169
+ content=text,
170
+ model=msg.get("model") or "",
171
+ created_at=created,
172
+ raw=msg,
173
+ )
174
+ except OSError:
175
+ return
176
+
177
+
178
+ ADAPTER = ClaudeCodeAdapter()
@@ -0,0 +1,128 @@
1
+ """Claude Desktop adapter.
2
+
3
+ Claude Desktop is the Electron app — distinct from Claude Code (the CLI).
4
+ The two share a vendor but not a config or transcript location:
5
+
6
+ * Claude Code (CLI): ~/.claude.json , ~/.claude/projects/**/*.jsonl
7
+ * Claude Desktop: ~/Library/Application Support/Claude/
8
+ claude_desktop_config.json on macOS;
9
+ %APPDATA%/Claude/... on Windows;
10
+ ~/.config/Claude/... on Linux.
11
+
12
+ Config shape mirrors Gemini/Copilot:
13
+
14
+ {"mcpServers": {"<name>": {"command": "...", "args": [...], "env": {...}}}}
15
+
16
+ Claude Desktop has no shell-style hook mechanism and no global per-user
17
+ instructions file analogous to ~/.claude/CLAUDE.md (style + memory live
18
+ inside the app's GUI settings, not on disk). Conversations are stored
19
+ in Electron's IndexedDB (a leveldb on disk), which is fragile to parse
20
+ without browser tooling — we skip transcript ingest. MCP registration
21
+ alone gets thread-keeper's tools available inside Claude Desktop chats,
22
+ which is the integration users actually ask for.
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import os
28
+ import sys
29
+ from pathlib import Path
30
+ from typing import Iterator
31
+
32
+ from .base import CLIAdapter, NormalizedMessage
33
+
34
+
35
+ def _default_config_path() -> Path:
36
+ """Per-OS default location for claude_desktop_config.json.
37
+
38
+ Overridable via CLAUDE_DESKTOP_CONFIG env var (used by tests)."""
39
+ env = os.environ.get("CLAUDE_DESKTOP_CONFIG")
40
+ if env:
41
+ return Path(env).expanduser()
42
+ if sys.platform == "darwin":
43
+ return Path(
44
+ "~/Library/Application Support/Claude/claude_desktop_config.json"
45
+ ).expanduser()
46
+ if sys.platform == "win32":
47
+ appdata = os.environ.get("APPDATA") or "~/AppData/Roaming"
48
+ return Path(appdata).expanduser() / "Claude" / "claude_desktop_config.json"
49
+ # linux / freebsd / others — follow XDG-ish convention used by other
50
+ # Electron apps shipped under "Claude".
51
+ return Path("~/.config/Claude/claude_desktop_config.json").expanduser()
52
+
53
+
54
+ def _app_bundle_present() -> bool:
55
+ """On macOS, detect Claude Desktop without requiring its config file
56
+ to exist yet (fresh install hasn't launched once)."""
57
+ if sys.platform == "darwin":
58
+ return Path("/Applications/Claude.app").exists()
59
+ return False
60
+
61
+
62
+ class ClaudeDesktopAdapter(CLIAdapter):
63
+ name = "claude-desktop"
64
+
65
+ def __init__(self) -> None:
66
+ self.config_path = _default_config_path()
67
+
68
+ # ----------------------------- detection -----------------------------
69
+ def is_installed(self) -> bool:
70
+ return self.config_path.exists() or _app_bundle_present()
71
+
72
+ # ----------------------------- mcp -----------------------------------
73
+ def register_mcp_server(
74
+ self, name, command, args, env, dry_run=False
75
+ ) -> str:
76
+ cfg: dict
77
+ if self.config_path.exists():
78
+ try:
79
+ cfg = json.loads(self.config_path.read_text())
80
+ except json.JSONDecodeError:
81
+ return "claude-desktop: malformed config — refused"
82
+ else:
83
+ cfg = {}
84
+ servers = cfg.setdefault("mcpServers", {})
85
+ entry: dict = {
86
+ "command": command,
87
+ "args": list(args),
88
+ }
89
+ if env:
90
+ entry["env"] = dict(env)
91
+ existing = servers.get(name)
92
+ if existing == entry:
93
+ return "claude-desktop: already current"
94
+ servers[name] = entry
95
+ if not dry_run:
96
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
97
+ self.config_path.write_text(json.dumps(cfg, indent=2))
98
+ return f"claude-desktop: {'would ' if dry_run else ''}{'update' if existing else 'add'}"
99
+
100
+ def unregister_mcp_server(self, name, dry_run=False) -> str:
101
+ if not self.config_path.exists():
102
+ return "claude-desktop: nothing to remove"
103
+ try:
104
+ cfg = json.loads(self.config_path.read_text())
105
+ except json.JSONDecodeError:
106
+ return "claude-desktop: malformed config — refused"
107
+ servers = (cfg.get("mcpServers") or {})
108
+ if name not in servers:
109
+ return "claude-desktop: not present"
110
+ if dry_run:
111
+ return f"claude-desktop: would remove {name}"
112
+ servers.pop(name)
113
+ self.config_path.write_text(json.dumps(cfg, indent=2))
114
+ return f"claude-desktop: removed {name}"
115
+
116
+ # ----------------------------- transcripts ---------------------------
117
+ # Claude Desktop stores chats inside Electron IndexedDB (leveldb on
118
+ # disk). Parsing that without Chromium/Electron tooling is brittle,
119
+ # so we don't expose any transcripts here — MCP registration alone is
120
+ # the win. dialog_search() across other CLIs still works normally.
121
+ def transcript_files(self) -> list[Path]:
122
+ return []
123
+
124
+ def iter_messages(self, fp: Path) -> Iterator[NormalizedMessage]:
125
+ return iter(())
126
+
127
+
128
+ ADAPTER = ClaudeDesktopAdapter()
@@ -0,0 +1,259 @@
1
+ """OpenAI Codex CLI adapter.
2
+
3
+ Codex stores configuration in ~/.codex/config.toml with sections
4
+ `[mcp_servers.<name>]`. Conversation transcripts are JSONL files at
5
+ ~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl with envelopes like:
6
+
7
+ {"timestamp": "...", "type": "session_meta", "payload": {...}}
8
+ {"timestamp": "...", "type": "event_msg", "payload": {...}}
9
+ {"timestamp": "...", "type": "response_item","payload": {"type": "message", "role": ..., "content": [...]}}
10
+
11
+ We pick `type=response_item` and `payload.type=message` as turns.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ import shutil
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Iterator
22
+
23
+ from .base import CLIAdapter, NormalizedMessage
24
+
25
+
26
+ def _ts(s: str) -> int:
27
+ try:
28
+ return int(datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp())
29
+ except Exception:
30
+ import time
31
+ return int(time.time())
32
+
33
+
34
+ def _extract_text(payload: dict) -> str:
35
+ """Codex content blocks: input_text/output_text/tool_call/etc.
36
+ We collect the text-flavored ones, cap tool_call payloads."""
37
+ content = payload.get("content", [])
38
+ if isinstance(content, str):
39
+ return content
40
+ if not isinstance(content, list):
41
+ return ""
42
+ parts: list[str] = []
43
+ for block in content:
44
+ if not isinstance(block, dict):
45
+ continue
46
+ t = block.get("type")
47
+ if t in ("input_text", "output_text", "text"):
48
+ parts.append(block.get("text", ""))
49
+ elif t == "thinking":
50
+ parts.append(f"[thinking] {block.get('text', '')}")
51
+ elif t == "tool_call_output":
52
+ out = block.get("output", "")
53
+ if isinstance(out, str) and out:
54
+ parts.append(f"[tool_result] {out[:800]}")
55
+ return "\n".join(p for p in parts if p)
56
+
57
+
58
+ # --- minimal TOML R/W ---------------------------------------------------
59
+ # We don't want to depend on tomllib for writes (Python's stdlib has
60
+ # tomllib for reads only). The shape we touch is one section:
61
+ # `[mcp_servers.<name>]` with key=value lines. Implement just enough.
62
+
63
+ def _read_toml(fp: Path) -> dict:
64
+ if not fp.exists():
65
+ return {}
66
+ try:
67
+ import tomllib # py3.11+
68
+ except ImportError:
69
+ # Fallback: VERY narrow parser — only used in environments
70
+ # without tomllib. Returns empty (caller treats as "no MCP").
71
+ return {}
72
+ try:
73
+ return tomllib.loads(fp.read_text())
74
+ except Exception:
75
+ return {}
76
+
77
+
78
+ def _serialize_mcp_section(name: str, command: str,
79
+ args: list[str], env: dict[str, str]) -> str:
80
+ """Produce the `[mcp_servers.<name>]` TOML block as a string."""
81
+ lines = [f"[mcp_servers.{name}]"]
82
+ lines.append(f"command = {json.dumps(command)}")
83
+ args_str = "[" + ", ".join(json.dumps(a) for a in args) + "]"
84
+ lines.append(f"args = {args_str}")
85
+ if env:
86
+ lines.append("[mcp_servers." + name + ".env]")
87
+ for k, v in env.items():
88
+ lines.append(f"{k} = {json.dumps(v)}")
89
+ return "\n".join(lines) + "\n"
90
+
91
+
92
+ _SECTION_HEADER_RE = re.compile(
93
+ r"^\[(mcp_servers\.[A-Za-z0-9_\-]+)(?:\.[A-Za-z0-9_\-]+)?\]\s*$",
94
+ re.MULTILINE,
95
+ )
96
+
97
+
98
+ def _replace_or_append_mcp_block(
99
+ body: str, name: str, new_block: str
100
+ ) -> str:
101
+ """Strip every TOML section beginning with `[mcp_servers.<name>...]`
102
+ (including nested `.env`), then append the new block at end.
103
+ Other sections are preserved as-is."""
104
+ out: list[str] = []
105
+ current_section = ""
106
+ target_prefix = f"mcp_servers.{name}"
107
+ skip_current = False
108
+ for line in body.splitlines(keepends=True):
109
+ m = _SECTION_HEADER_RE.match(line.rstrip("\n"))
110
+ if m:
111
+ section_full = m.group(0).strip("[]")
112
+ current_section = section_full
113
+ skip_current = (
114
+ section_full == target_prefix
115
+ or section_full.startswith(target_prefix + ".")
116
+ )
117
+ if skip_current:
118
+ continue
119
+ if skip_current:
120
+ # still inside the target section — drop the line
121
+ continue
122
+ out.append(line)
123
+ result = "".join(out).rstrip() + "\n\n" + new_block
124
+ return result
125
+
126
+
127
+ # --- adapter ------------------------------------------------------------
128
+
129
+ class CodexAdapter(CLIAdapter):
130
+ name = "codex"
131
+
132
+ def __init__(self) -> None:
133
+ self.config_path = Path("~/.codex/config.toml").expanduser()
134
+ self.sessions_dir = Path("~/.codex/sessions").expanduser()
135
+ # Codex loads AGENTS.md from cwd → parents → ~. We manage the
136
+ # home-level fallback so it's always present even outside a
137
+ # project tree.
138
+ self._instructions = Path("~/.codex/AGENTS.md").expanduser()
139
+ # Codex auto-discovers skills under $CODEX_HOME/skills/ — same
140
+ # Anthropic-style SKILL.md format Claude uses. Multi-mirror in
141
+ # skill_manage propagates SKILL.md here so the same skill is
142
+ # available in Codex's own session.
143
+ self._skills_dir = Path(
144
+ os.environ.get("CODEX_HOME", "~/.codex")
145
+ ).expanduser() / "skills"
146
+
147
+ def skills_dir(self):
148
+ return self._skills_dir
149
+
150
+ def instructions_path(self):
151
+ return self._instructions
152
+
153
+ def is_installed(self) -> bool:
154
+ if self.config_path.exists() or self.sessions_dir.exists():
155
+ return True
156
+ return shutil.which("codex") is not None
157
+
158
+ # ----- MCP registration ---------------------------------------------
159
+ def register_mcp_server(
160
+ self, name, command, args, env, dry_run=False
161
+ ) -> str:
162
+ block = _serialize_mcp_section(name, command, list(args), dict(env))
163
+ if not self.config_path.exists():
164
+ if dry_run:
165
+ return "codex: would create config.toml with mcp section"
166
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
167
+ self.config_path.write_text(block)
168
+ return "codex: created config.toml"
169
+ body = self.config_path.read_text()
170
+ # Check if already current (cheap normalization compare)
171
+ already = _read_toml(self.config_path).get("mcp_servers", {}).get(name)
172
+ if isinstance(already, dict):
173
+ want = {"command": command, "args": list(args)}
174
+ if env:
175
+ want["env"] = dict(env)
176
+ if already == want:
177
+ return "codex: already current"
178
+ new_body = _replace_or_append_mcp_block(body, name, block)
179
+ if new_body == body:
180
+ return "codex: already current"
181
+ if dry_run:
182
+ return "codex: would update config.toml"
183
+ self.config_path.write_text(new_body)
184
+ return "codex: updated config.toml"
185
+
186
+ def unregister_mcp_server(self, name, dry_run=False) -> str:
187
+ if not self.config_path.exists():
188
+ return "codex: nothing to remove"
189
+ body = self.config_path.read_text()
190
+ new_body = _replace_or_append_mcp_block(body, name, "").rstrip() + "\n"
191
+ if new_body.rstrip() == body.rstrip():
192
+ return "codex: not present"
193
+ if dry_run:
194
+ return f"codex: would remove {name}"
195
+ self.config_path.write_text(new_body)
196
+ return f"codex: removed {name}"
197
+
198
+ # ----- Transcript ingestion -----------------------------------------
199
+ def session_dir(self):
200
+ return self.sessions_dir
201
+
202
+ def transcript_files(self) -> list[Path]:
203
+ if not self.sessions_dir.exists():
204
+ return []
205
+ return list(self.sessions_dir.glob("**/rollout-*.jsonl"))
206
+
207
+ def iter_messages(self, fp: Path) -> Iterator[NormalizedMessage]:
208
+ sess_id = ""
209
+ try:
210
+ with fp.open("r", encoding="utf-8", errors="replace") as f:
211
+ for line in f:
212
+ line = line.strip()
213
+ if not line:
214
+ continue
215
+ try:
216
+ env = json.loads(line)
217
+ except json.JSONDecodeError:
218
+ continue
219
+ typ = env.get("type")
220
+ payload = env.get("payload") or {}
221
+ if typ == "session_meta" and isinstance(payload, dict):
222
+ sess_id = payload.get("id") or sess_id
223
+ continue
224
+ if typ != "response_item":
225
+ continue
226
+ if not isinstance(payload, dict):
227
+ continue
228
+ if payload.get("type") != "message":
229
+ continue
230
+ role = payload.get("role")
231
+ if role == "developer":
232
+ # Codex injects a developer turn with permission
233
+ # instructions etc. Skip — not user dialog.
234
+ continue
235
+ if role not in ("user", "assistant"):
236
+ continue
237
+ text = _extract_text(payload)
238
+ # Stable per-line id: use payload.id when present,
239
+ # else fall back to timestamp+offset.
240
+ uuid = payload.get("id") or f"codex:{fp.name}:{env.get('timestamp', '')}"
241
+ yield NormalizedMessage(
242
+ uuid=uuid,
243
+ session_id=sess_id,
244
+ role=role,
245
+ content=text,
246
+ model=payload.get("model") or "",
247
+ created_at=_ts(env.get("timestamp", "")),
248
+ raw=payload,
249
+ )
250
+ except OSError:
251
+ return
252
+
253
+ def project_label(self, fp: Path) -> str:
254
+ # rollout files are in YYYY/MM/DD subdirs — use the parent of
255
+ # parent (year/month) for a coarse but meaningful label.
256
+ return f"codex-{fp.parent.parent.parent.name}" # year
257
+
258
+
259
+ ADAPTER = CodexAdapter()