zeno-cli 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. zeno_adapters/__init__.py +17 -0
  2. zeno_adapters/_common.py +38 -0
  3. zeno_adapters/anthropic.py +68 -0
  4. zeno_adapters/claude_code.py +101 -0
  5. zeno_adapters/crewai.py +92 -0
  6. zeno_adapters/langgraph.py +49 -0
  7. zeno_adapters/openai.py +108 -0
  8. zeno_cli/__init__.py +1 -0
  9. zeno_cli/_hooks/cc_bridge.py +1016 -0
  10. zeno_cli/doctor.py +535 -0
  11. zeno_cli/hook_install.py +269 -0
  12. zeno_cli/hud/__init__.py +1 -0
  13. zeno_cli/hud/hud_install.py +652 -0
  14. zeno_cli/hud/zeno_attention.py +288 -0
  15. zeno_cli/hud/zeno_cognition.py +457 -0
  16. zeno_cli/hud/zeno_hud.py +496 -0
  17. zeno_cli/interview_invites.py +342 -0
  18. zeno_cli/login.py +241 -0
  19. zeno_cli/main.py +2534 -0
  20. zeno_cli/onboard.py +206 -0
  21. zeno_cli/outreach.py +456 -0
  22. zeno_cli/version.py +67 -0
  23. zeno_cli-0.3.4.dist-info/METADATA +161 -0
  24. zeno_cli-0.3.4.dist-info/RECORD +69 -0
  25. zeno_cli-0.3.4.dist-info/WHEEL +4 -0
  26. zeno_cli-0.3.4.dist-info/entry_points.txt +4 -0
  27. zeno_core/__init__.py +67 -0
  28. zeno_core/analytics.py +193 -0
  29. zeno_core/rtlx_s.py +460 -0
  30. zeno_core/streak.py +178 -0
  31. zeno_core/tlx_s.py +192 -0
  32. zeno_sdk/__init__.py +6 -0
  33. zeno_sdk/_generated/__init__.py +6 -0
  34. zeno_sdk/_generated/client.py +819 -0
  35. zeno_sdk/_migrations/alembic/env.py +33 -0
  36. zeno_sdk/_migrations/alembic/script.py.mako +18 -0
  37. zeno_sdk/_migrations/alembic/versions/0001_initial.py +79 -0
  38. zeno_sdk/_migrations/alembic/versions/0002_cognition_samples.py +53 -0
  39. zeno_sdk/_migrations/alembic/versions/0003_cognition_drivers.py +41 -0
  40. zeno_sdk/_migrations/alembic/versions/0004_transcript_intelligence.py +248 -0
  41. zeno_sdk/_migrations/alembic.ini +35 -0
  42. zeno_sdk/_runtime.py +12 -0
  43. zeno_sdk/adapters/__init__.py +15 -0
  44. zeno_sdk/adapters/anthropic.py +5 -0
  45. zeno_sdk/adapters/claude_code.py +5 -0
  46. zeno_sdk/adapters/crewai.py +5 -0
  47. zeno_sdk/adapters/langgraph.py +5 -0
  48. zeno_sdk/adapters/openai.py +5 -0
  49. zeno_sdk/auth.py +25 -0
  50. zeno_sdk/client.py +87 -0
  51. zeno_sdk/config.py +61 -0
  52. zeno_sdk/daemon.py +72 -0
  53. zeno_sdk/privacy.py +46 -0
  54. zeno_sdk/session.py +179 -0
  55. zeno_sdk/storage.py +487 -0
  56. zeno_sdk/types/__init__.py +121 -0
  57. zeno_session_intel/__init__.py +19 -0
  58. zeno_session_intel/analytics.py +588 -0
  59. zeno_session_intel/compression.py +123 -0
  60. zeno_session_intel/ingest.py +376 -0
  61. zeno_session_intel/model.py +129 -0
  62. zeno_session_intel/parsers/__init__.py +31 -0
  63. zeno_session_intel/parsers/claude_code.py +169 -0
  64. zeno_session_intel/parsers/codex.py +265 -0
  65. zeno_session_intel/parsers/cursor.py +198 -0
  66. zeno_session_intel/prices.py +281 -0
  67. zeno_session_intel/schema.py +277 -0
  68. zeno_session_intel/signals.py +319 -0
  69. zeno_session_intel/taxonomy.py +71 -0
@@ -0,0 +1,169 @@
1
+ """Claude Code transcript parser (JSONL, the format zeno already hooks - verified).
2
+
3
+ Layout: ``~/.claude/projects/<project-slug>/<sessionId>.jsonl``, one JSON object per
4
+ line. Lines are ``user`` / ``assistant`` / ``system`` / ``summary`` records carrying a
5
+ ``message`` object, plus ``uuid`` / ``parentUuid`` / ``timestamp`` / ``sessionId`` /
6
+ ``cwd`` / ``gitBranch`` / ``isSidechain``. Assistant ``message.usage`` is the cache-aware
7
+ token block; assistant ``message.content`` is a list of text/thinking/tool_use blocks;
8
+ user content is a string or a list including ``tool_result`` blocks.
9
+
10
+ Read-only: the file is opened ``"r"`` and never modified. Never raises - a malformed line
11
+ is skipped.
12
+
13
+ CC gotcha: ``cache_read_input_tokens`` is large and recurs every turn (prompt caching),
14
+ so each assistant message carries its own usage as ONE ledger event (deduped by message
15
+ id); the rollup uses the ledger, never a naive per-line sum.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import os
22
+ from pathlib import Path
23
+
24
+ from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
25
+
26
+
27
+ def default_root() -> Path:
28
+ return Path(os.path.expanduser("~")) / ".claude" / "projects"
29
+
30
+
31
+ def discover(root: Path) -> list[Path]:
32
+ if not root.exists():
33
+ return []
34
+ return sorted(root.glob("**/*.jsonl"))
35
+
36
+
37
+ def _flatten_content(content: object) -> tuple[str, str, list[ToolUse], list[ToolResult]]:
38
+ """Return (text, thinking, tool_uses, tool_results) from a CC message.content."""
39
+ if isinstance(content, str):
40
+ return content, "", [], []
41
+ text_parts: list[str] = []
42
+ thinking_parts: list[str] = []
43
+ tool_uses: list[ToolUse] = []
44
+ tool_results: list[ToolResult] = []
45
+ if isinstance(content, list):
46
+ for block in content:
47
+ if not isinstance(block, dict):
48
+ if isinstance(block, str):
49
+ text_parts.append(block)
50
+ continue
51
+ btype = block.get("type")
52
+ if btype == "text":
53
+ text_parts.append(str(block.get("text", "")))
54
+ elif btype == "thinking":
55
+ thinking_parts.append(str(block.get("thinking", "")))
56
+ elif btype == "tool_use":
57
+ tool_uses.append(
58
+ ToolUse(
59
+ name=str(block.get("name", "")),
60
+ input_json=json.dumps(block.get("input", {}), sort_keys=True),
61
+ id=str(block.get("id", "")),
62
+ )
63
+ )
64
+ elif btype == "tool_result":
65
+ rc = block.get("content", "")
66
+ if isinstance(rc, list):
67
+ rc = " ".join(
68
+ str(b.get("text", "")) if isinstance(b, dict) else str(b) for b in rc
69
+ )
70
+ tool_results.append(
71
+ ToolResult(
72
+ tool_use_id=str(block.get("tool_use_id", "")),
73
+ content=str(rc),
74
+ is_error=bool(block.get("is_error", False)),
75
+ )
76
+ )
77
+ return "\n".join(p for p in text_parts if p), "\n".join(thinking_parts), tool_uses, tool_results
78
+
79
+
80
+ def parse_file(path: Path) -> ParsedSession | None:
81
+ try:
82
+ raw = path.read_text(errors="replace")
83
+ except OSError:
84
+ return None
85
+ session_id = path.stem
86
+ messages: list[ParsedMessage] = []
87
+ project = ""
88
+ cwd = ""
89
+ git_branch = ""
90
+ started_at: str | None = None
91
+ ended_at: str | None = None
92
+ display_name: str | None = None
93
+ ordinal = 0
94
+
95
+ for line in raw.splitlines():
96
+ line = line.strip()
97
+ if not line:
98
+ continue
99
+ try:
100
+ obj = json.loads(line)
101
+ except Exception:
102
+ continue
103
+ if not isinstance(obj, dict):
104
+ continue
105
+ rtype = obj.get("type")
106
+ if rtype == "summary":
107
+ display_name = display_name or str(obj.get("summary", "")) or None
108
+ continue
109
+ msg = obj.get("message")
110
+ if not isinstance(msg, dict):
111
+ continue
112
+ role = str(msg.get("role") or rtype or "")
113
+ if role not in ("user", "assistant", "system"):
114
+ continue
115
+ ts = obj.get("timestamp")
116
+ if isinstance(ts, str):
117
+ started_at = started_at or ts
118
+ ended_at = ts
119
+ cwd = cwd or str(obj.get("cwd", "") or "")
120
+ git_branch = git_branch or str(obj.get("gitBranch", "") or "")
121
+ sid = obj.get("sessionId")
122
+ if isinstance(sid, str) and sid:
123
+ session_id = sid
124
+ text, thinking, tool_uses, tool_results = _flatten_content(msg.get("content"))
125
+ usage = msg.get("usage") if isinstance(msg.get("usage"), dict) else {}
126
+ pm = ParsedMessage(
127
+ ordinal=ordinal,
128
+ role=role,
129
+ content=text,
130
+ thinking_text=thinking,
131
+ timestamp=ts if isinstance(ts, str) else None,
132
+ model=str(msg.get("model", "") or ""),
133
+ input_tokens=to_int(usage.get("input_tokens")),
134
+ output_tokens=to_int(usage.get("output_tokens")),
135
+ cache_creation_input_tokens=to_int(usage.get("cache_creation_input_tokens")),
136
+ cache_read_input_tokens=to_int(usage.get("cache_read_input_tokens")),
137
+ raw_usage=json.dumps(usage, sort_keys=True) if usage else "",
138
+ source_uuid=str(obj.get("uuid", "") or ""),
139
+ source_parent_uuid=str(obj.get("parentUuid", "") or ""),
140
+ is_sidechain=bool(obj.get("isSidechain", False)),
141
+ tool_uses=tool_uses,
142
+ tool_results=tool_results,
143
+ usage_dedup_key=str(msg.get("id") or obj.get("uuid") or ""),
144
+ )
145
+ messages.append(pm)
146
+ ordinal += 1
147
+
148
+ if not messages:
149
+ return None
150
+ project = path.parent.name
151
+ first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
152
+ try:
153
+ mtime = int(path.stat().st_mtime)
154
+ except OSError:
155
+ mtime = None
156
+ return ParsedSession(
157
+ id=session_id,
158
+ agent="claude",
159
+ messages=messages,
160
+ project=project,
161
+ cwd=cwd,
162
+ git_branch=git_branch,
163
+ display_name=display_name or (first_user[:80] if first_user else None),
164
+ first_message=first_user,
165
+ started_at=started_at,
166
+ ended_at=ended_at,
167
+ file_path=str(path),
168
+ file_mtime=mtime,
169
+ )
@@ -0,0 +1,265 @@
1
+ """Codex (OpenAI) rollout parser - JSONL. EXPERIMENTAL: fixture-verified only.
2
+
3
+ STATUS: experimental. zeno is Claude-Code-first; Codex is NOT a first-class capture path
4
+ and is NOT installed on this box. This parser is validated against a synthesized fixture
5
+ (``tests/fixtures/session_intel/codex_rollout.jsonl``) built from the public rollout format
6
+ only - it is **fixture-verified, needs real-data confirmation**. The ingester gates Codex
7
+ behind an explicit ``--tools`` opt-in so a wrong guess never corrupts real Claude Code data.
8
+ See docs/TOOLS_STATUS.md and docs/adapters/codex.md.
9
+
10
+ Layout: ``~/.codex/sessions/<YYYY>/<MM>/<DD>/rollout-<ts>-<uuid>.jsonl``. Record types:
11
+ - ``session_meta`` -> id / timestamp / cwd (payload-nested or flat)
12
+ - ``response_item`` ``message`` -> a turn (role + content[].text)
13
+ - ``response_item`` ``function_call`` -> a tool call (name/arguments/call_id)
14
+ - ``response_item`` ``function_call_output`` -> a tool result (call_id/output, exit code)
15
+ - ``event_msg`` ``token_count`` / ``usage`` -> token usage, attributed to the last turn
16
+
17
+ Read-only; never raises (malformed lines skipped).
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import os
24
+ from pathlib import Path
25
+
26
+ from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
27
+
28
+ _DEFAULT_MODEL = "gpt-5.3-codex"
29
+
30
+
31
+ def default_root() -> Path:
32
+ return Path(os.path.expanduser("~")) / ".codex" / "sessions"
33
+
34
+
35
+ def discover(root: Path) -> list[Path]:
36
+ if not root.exists():
37
+ return []
38
+ return sorted(root.glob("**/rollout-*.jsonl"))
39
+
40
+
41
+ def _payload(obj: dict) -> dict:
42
+ p = obj.get("payload")
43
+ return p if isinstance(p, dict) else obj
44
+
45
+
46
+ def _text_from_content(content: object) -> str:
47
+ if isinstance(content, str):
48
+ return content
49
+ if isinstance(content, list):
50
+ parts = []
51
+ for b in content:
52
+ if isinstance(b, dict):
53
+ parts.append(str(b.get("text", "")))
54
+ elif isinstance(b, str):
55
+ parts.append(b)
56
+ return "\n".join(p for p in parts if p)
57
+ return ""
58
+
59
+
60
+ def _block(src: dict) -> dict:
61
+ return {
62
+ "input_tokens": to_int(src.get("input_tokens")),
63
+ "output_tokens": to_int(src.get("output_tokens")),
64
+ "cache_read_input_tokens": to_int(
65
+ src.get("cached_input_tokens", src.get("cache_read_input_tokens"))
66
+ ),
67
+ "reasoning_tokens": to_int(src.get("reasoning_output_tokens", src.get("reasoning_tokens"))),
68
+ }
69
+
70
+
71
+ def _usage_block(info: dict) -> tuple[dict, bool]:
72
+ """Return (usage dict, is_cumulative) from a Codex token_count record.
73
+
74
+ Codex ``token_count`` totals under ``total_token_usage`` are CUMULATIVE across the
75
+ session; only ``last_token_usage`` (when present) is already a per-turn delta. The
76
+ caller subtracts the running cumulative so the ledger sums to the true session total
77
+ instead of overcounting. (Verified against the public Codex rollout format,
78
+ 2026-06-17; still verified-by-fixture pending a real install.)"""
79
+ if isinstance(info.get("last_token_usage"), dict):
80
+ return _block(info["last_token_usage"]), False
81
+ if isinstance(info.get("total_token_usage"), dict):
82
+ return _block(info["total_token_usage"]), True
83
+ if isinstance(info.get("usage"), dict):
84
+ return _block(info["usage"]), False
85
+ return _block(info), False
86
+
87
+
88
+ def parse_file(path: Path) -> ParsedSession | None:
89
+ try:
90
+ raw = path.read_text(errors="replace")
91
+ except OSError:
92
+ return None
93
+
94
+ session_id = path.stem
95
+ cwd = ""
96
+ model = ""
97
+ started_at: str | None = None
98
+ ended_at: str | None = None
99
+ messages: list[ParsedMessage] = []
100
+ ordinal = 0
101
+ # running cumulative, to turn Codex's cumulative token_count totals into per-turn deltas
102
+ prev_cum = {
103
+ "input_tokens": 0,
104
+ "output_tokens": 0,
105
+ "cache_read_input_tokens": 0,
106
+ "reasoning_tokens": 0,
107
+ }
108
+
109
+ def last_assistant() -> ParsedMessage | None:
110
+ for m in reversed(messages):
111
+ if m.role == "assistant":
112
+ return m
113
+ return None
114
+
115
+ for line in raw.splitlines():
116
+ line = line.strip()
117
+ if not line:
118
+ continue
119
+ try:
120
+ obj = json.loads(line)
121
+ except Exception:
122
+ continue
123
+ if not isinstance(obj, dict):
124
+ continue
125
+ rtype = obj.get("type")
126
+ ts = obj.get("timestamp")
127
+ if isinstance(ts, str):
128
+ started_at = started_at or ts
129
+ ended_at = ts
130
+
131
+ if rtype == "session_meta":
132
+ p = _payload(obj)
133
+ sid = p.get("id") or obj.get("id")
134
+ if isinstance(sid, str) and sid:
135
+ session_id = sid
136
+ cwd = cwd or str(p.get("cwd", "") or "")
137
+ model = model or str(p.get("model", "") or "")
138
+ continue
139
+
140
+ if rtype == "response_item":
141
+ p = _payload(obj)
142
+ ptype = p.get("type")
143
+ if ptype == "message":
144
+ role = str(p.get("role", "") or "")
145
+ if role not in ("user", "assistant", "system"):
146
+ continue
147
+ messages.append(
148
+ ParsedMessage(
149
+ ordinal=ordinal,
150
+ role=role,
151
+ content=_text_from_content(p.get("content")),
152
+ timestamp=ts if isinstance(ts, str) else None,
153
+ model=model or _DEFAULT_MODEL if role == "assistant" else "",
154
+ )
155
+ )
156
+ ordinal += 1
157
+ elif ptype == "function_call":
158
+ tu = ToolUse(
159
+ name=str(p.get("name", "") or ""),
160
+ input_json=_canon_args(p.get("arguments")),
161
+ id=str(p.get("call_id", "") or ""),
162
+ )
163
+ target = last_assistant()
164
+ if target is None:
165
+ target = ParsedMessage(ordinal=ordinal, role="assistant", model=model)
166
+ messages.append(target)
167
+ ordinal += 1
168
+ target.tool_uses.append(tu)
169
+ elif ptype == "function_call_output":
170
+ out = p.get("output")
171
+ content, is_err = _decode_output(out)
172
+ tr = ToolResult(
173
+ tool_use_id=str(p.get("call_id", "") or ""),
174
+ content=content,
175
+ is_error=is_err,
176
+ )
177
+ target = last_assistant()
178
+ if target is not None:
179
+ target.tool_results.append(tr)
180
+ continue
181
+
182
+ if rtype in ("event_msg", "token_count"):
183
+ p = _payload(obj)
184
+ info = p.get("info") if isinstance(p.get("info"), dict) else p
185
+ if p.get("type") in ("token_count", None) or rtype == "token_count":
186
+ usage, is_cumulative = _usage_block(info)
187
+ if is_cumulative:
188
+ # cumulative -> per-turn delta, then advance the running total. A
189
+ # context-window reset / compaction makes the cumulative drop BELOW the
190
+ # prior total (non-monotonic); a plain ``max(0, cur-prev)`` would clamp
191
+ # that turn to 0 and silently drop its real tokens. On a reset the new
192
+ # lower cumulative IS that turn's usage, so take the current value itself.
193
+ delta = {
194
+ k: (
195
+ usage[k]
196
+ if usage[k] < prev_cum.get(k, 0)
197
+ else usage[k] - prev_cum.get(k, 0)
198
+ )
199
+ for k in usage
200
+ }
201
+ prev_cum = dict(usage)
202
+ usage = delta
203
+ target = last_assistant()
204
+ if target is not None and any(usage.values()):
205
+ target.input_tokens = usage["input_tokens"] or target.input_tokens
206
+ target.output_tokens = usage["output_tokens"] or target.output_tokens
207
+ target.cache_read_input_tokens = (
208
+ usage["cache_read_input_tokens"] or target.cache_read_input_tokens
209
+ )
210
+ target.reasoning_tokens = usage["reasoning_tokens"] or target.reasoning_tokens
211
+ target.raw_usage = json.dumps(usage, sort_keys=True)
212
+ target.model = target.model or model or _DEFAULT_MODEL
213
+ target.usage_dedup_key = f"{session_id}:tok:{target.ordinal}"
214
+
215
+ if not messages:
216
+ return None
217
+ first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
218
+ try:
219
+ mtime = int(path.stat().st_mtime)
220
+ except OSError:
221
+ mtime = None
222
+ return ParsedSession(
223
+ id=session_id,
224
+ agent="codex",
225
+ messages=messages,
226
+ project=Path(cwd).name if cwd else path.parent.name,
227
+ cwd=cwd,
228
+ display_name=(first_user[:80] if first_user else None),
229
+ first_message=first_user,
230
+ started_at=started_at,
231
+ ended_at=ended_at,
232
+ file_path=str(path),
233
+ file_mtime=mtime,
234
+ )
235
+
236
+
237
+ def _canon_args(arguments: object) -> str:
238
+ if isinstance(arguments, str):
239
+ try:
240
+ return json.dumps(json.loads(arguments), sort_keys=True)
241
+ except Exception:
242
+ return arguments
243
+ if isinstance(arguments, dict):
244
+ return json.dumps(arguments, sort_keys=True)
245
+ return ""
246
+
247
+
248
+ def _decode_output(out: object) -> tuple[str, bool]:
249
+ """Codex function_call_output is often a JSON string with {output, metadata:{exit_code}}."""
250
+ if isinstance(out, str):
251
+ try:
252
+ obj = json.loads(out)
253
+ except Exception:
254
+ return out, False
255
+ elif isinstance(out, dict):
256
+ obj = out
257
+ else:
258
+ return "", False
259
+ if isinstance(obj, dict):
260
+ text = str(obj.get("output", obj.get("content", "")))
261
+ meta = obj.get("metadata")
262
+ exit_code = meta.get("exit_code") if isinstance(meta, dict) else obj.get("exit_code")
263
+ is_err = bool(exit_code) and str(exit_code) != "0"
264
+ return text, is_err
265
+ return str(obj), False
@@ -0,0 +1,198 @@
1
+ """Cursor chat parser - a SQLite ``state.vscdb`` (verified against real local data).
2
+
3
+ Unlike Claude Code / Codex (one JSONL file == one session), one Cursor ``state.vscdb``
4
+ holds MANY composers (each composer == one session), so this module exposes
5
+ ``parse_sessions(path) -> list[ParsedSession]`` and the ingester iterates it.
6
+
7
+ Layout (verified on dctrl's Mac, 2026-06-17):
8
+ ``~/Library/Application Support/Cursor/User/globalStorage/state.vscdb`` (+ per-workspace
9
+ ``workspaceStorage/<hash>/state.vscdb``), a ``cursorDiskKV(key, value)`` table where:
10
+ - ``composerData:<composerId>`` -> JSON header: ``composerId``, ``name``, ``createdAt``
11
+ (epoch ms), ``fullConversationHeadersOnly`` = ordered ``[{bubbleId, type}]`` (type
12
+ 1 = user, 2 = assistant).
13
+ - ``bubbleId:<composerId>:<bubbleId>`` -> JSON message: ``text``, ``type``,
14
+ ``toolFormerData`` (``.name`` e.g. ``read_file``), ``tokenCount`` (often ``0/0``).
15
+
16
+ Quirks handled: only the most-recently-active composer has its bubbles materialized (the
17
+ rest are header-only -> tolerate composers with zero retrievable bubbles); Cursor may hold
18
+ the DB open -> **copy-then-read** the file (+ WAL sidecars) to a temp dir to dodge
19
+ ``database is locked``. Token counts are best-effort (often 0), so Cursor cost is usually
20
+ unpriced - never alert on it.
21
+
22
+ Read-only on the source; never raises (a bad row is skipped). py3.9-safe.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import os
29
+ import shutil
30
+ import sqlite3
31
+ import tempfile
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+
35
+ from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
36
+
37
+
38
+ def default_root() -> Path:
39
+ return Path(os.path.expanduser("~")) / "Library" / "Application Support" / "Cursor" / "User"
40
+
41
+
42
+ def discover(root: Path) -> list[Path]:
43
+ if not root.exists():
44
+ return []
45
+ found = []
46
+ for sub in ("globalStorage/state.vscdb",):
47
+ p = root / sub
48
+ if p.exists():
49
+ found.append(p)
50
+ found.extend(sorted((root / "workspaceStorage").glob("*/state.vscdb")))
51
+ return found
52
+
53
+
54
+ def _epoch_ms_to_iso(ms: object) -> str | None:
55
+ try:
56
+ return datetime.fromtimestamp(float(ms) / 1000.0, tz=timezone.utc).isoformat()
57
+ except (ValueError, TypeError, OverflowError, OSError):
58
+ return None
59
+
60
+
61
+ def _open_copy(path: Path):
62
+ """Copy the vscdb (+ -wal/-shm) to a temp dir and open it, dodging Cursor's lock.
63
+ Returns (connection, tempdir) or (None, tempdir) on failure; caller cleans up."""
64
+ d = tempfile.mkdtemp(prefix="zeno-cursor-")
65
+ dst = os.path.join(d, "state.vscdb")
66
+ try:
67
+ for ext in ("", "-wal", "-shm"):
68
+ src = str(path) + ext
69
+ if os.path.exists(src):
70
+ shutil.copy(src, dst + ext)
71
+ con = sqlite3.connect(dst)
72
+ con.execute("PRAGMA query_only=1")
73
+ return con, d
74
+ except Exception:
75
+ return None, d
76
+
77
+
78
+ def parse_sessions(path: Path) -> list[ParsedSession]:
79
+ con, tmpdir = _open_copy(path)
80
+ if con is None:
81
+ shutil.rmtree(tmpdir, ignore_errors=True)
82
+ return []
83
+ sessions: list[ParsedSession] = []
84
+ try:
85
+ composers = con.execute(
86
+ "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
87
+ ).fetchall()
88
+ for _key, raw in composers:
89
+ s = _parse_composer(con, raw, path)
90
+ if s is not None:
91
+ sessions.append(s)
92
+ except Exception:
93
+ pass
94
+ finally:
95
+ con.close()
96
+ shutil.rmtree(tmpdir, ignore_errors=True)
97
+ return sessions
98
+
99
+
100
+ def _parse_composer(con: sqlite3.Connection, raw: str, path: Path) -> ParsedSession | None:
101
+ try:
102
+ cd = json.loads(raw)
103
+ except Exception:
104
+ return None
105
+ if not isinstance(cd, dict):
106
+ return None
107
+ composer_id = str(cd.get("composerId") or "")
108
+ headers = cd.get("fullConversationHeadersOnly")
109
+ if not composer_id or not isinstance(headers, list) or not headers:
110
+ return None # header-only / empty composer -> tolerate (skip)
111
+
112
+ messages: list[ParsedMessage] = []
113
+ ordinal = 0
114
+ last_ms = None
115
+ for h in headers:
116
+ if not isinstance(h, dict):
117
+ continue
118
+ bid = h.get("bubbleId")
119
+ btype = h.get("type")
120
+ if not bid:
121
+ continue
122
+ row = con.execute(
123
+ "SELECT value FROM cursorDiskKV WHERE key = ?",
124
+ (f"bubbleId:{composer_id}:{bid}",),
125
+ ).fetchone()
126
+ if not row:
127
+ continue # bubble not materialized (common for older composers)
128
+ try:
129
+ b = json.loads(row[0])
130
+ except Exception:
131
+ continue
132
+ if not isinstance(b, dict):
133
+ continue
134
+ role = "user" if btype == 1 else "assistant" if btype == 2 else "system"
135
+ tool_uses: list[ToolUse] = []
136
+ tool_results: list[ToolResult] = []
137
+ tfd = b.get("toolFormerData")
138
+ if isinstance(tfd, dict) and tfd.get("name"):
139
+ params = tfd.get("params")
140
+ tool_uses.append(
141
+ ToolUse(
142
+ name=str(tfd.get("name")),
143
+ input_json=json.dumps(params, sort_keys=True) if params else "",
144
+ id=str(b.get("bubbleId") or bid),
145
+ )
146
+ )
147
+ status = str(tfd.get("status", "")).lower()
148
+ tool_results.append(
149
+ ToolResult(
150
+ tool_use_id=str(b.get("bubbleId") or bid),
151
+ content=str(tfd.get("result", "")),
152
+ is_error=status in ("error", "failed", "cancelled"),
153
+ )
154
+ )
155
+ tc = b.get("tokenCount") if isinstance(b.get("tokenCount"), dict) else {}
156
+ created = b.get("createdAt")
157
+ if created is not None:
158
+ last_ms = created
159
+ messages.append(
160
+ ParsedMessage(
161
+ ordinal=ordinal,
162
+ role=role,
163
+ content=str(b.get("text") or ""),
164
+ thinking_text="",
165
+ timestamp=_epoch_ms_to_iso(created),
166
+ model="", # Cursor does not expose a per-bubble model id -> unpriced
167
+ input_tokens=to_int(tc.get("inputTokens")),
168
+ output_tokens=to_int(tc.get("outputTokens")),
169
+ raw_usage=json.dumps(tc, sort_keys=True) if tc else "",
170
+ source_uuid=str(bid),
171
+ tool_uses=tool_uses,
172
+ tool_results=tool_results,
173
+ usage_dedup_key=str(bid),
174
+ )
175
+ )
176
+ ordinal += 1
177
+
178
+ if not messages:
179
+ return None
180
+ first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
181
+ return ParsedSession(
182
+ id=composer_id,
183
+ agent="cursor",
184
+ messages=messages,
185
+ project=str(cd.get("name") or "") or "cursor",
186
+ display_name=(str(cd.get("name")) if cd.get("name") else None),
187
+ first_message=first_user,
188
+ started_at=_epoch_ms_to_iso(cd.get("createdAt")),
189
+ ended_at=_epoch_ms_to_iso(last_ms if last_ms is not None else cd.get("lastUpdatedAt")),
190
+ file_path=str(path),
191
+ )
192
+
193
+
194
+ def parse_file(path: Path) -> ParsedSession | None:
195
+ """Single-session convenience (returns the first composer). The ingester uses
196
+ ``parse_sessions`` because one vscdb holds many."""
197
+ out = parse_sessions(path)
198
+ return out[0] if out else None