zeno-cli 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zeno_adapters/__init__.py +17 -0
- zeno_adapters/_common.py +38 -0
- zeno_adapters/anthropic.py +68 -0
- zeno_adapters/claude_code.py +101 -0
- zeno_adapters/crewai.py +92 -0
- zeno_adapters/langgraph.py +49 -0
- zeno_adapters/openai.py +108 -0
- zeno_cli/__init__.py +1 -0
- zeno_cli/_hooks/cc_bridge.py +1016 -0
- zeno_cli/doctor.py +535 -0
- zeno_cli/hook_install.py +269 -0
- zeno_cli/hud/__init__.py +1 -0
- zeno_cli/hud/hud_install.py +652 -0
- zeno_cli/hud/zeno_attention.py +288 -0
- zeno_cli/hud/zeno_cognition.py +457 -0
- zeno_cli/hud/zeno_hud.py +496 -0
- zeno_cli/interview_invites.py +342 -0
- zeno_cli/login.py +241 -0
- zeno_cli/main.py +2534 -0
- zeno_cli/onboard.py +206 -0
- zeno_cli/outreach.py +456 -0
- zeno_cli/version.py +67 -0
- zeno_cli-0.3.4.dist-info/METADATA +161 -0
- zeno_cli-0.3.4.dist-info/RECORD +69 -0
- zeno_cli-0.3.4.dist-info/WHEEL +4 -0
- zeno_cli-0.3.4.dist-info/entry_points.txt +4 -0
- zeno_core/__init__.py +67 -0
- zeno_core/analytics.py +193 -0
- zeno_core/rtlx_s.py +460 -0
- zeno_core/streak.py +178 -0
- zeno_core/tlx_s.py +192 -0
- zeno_sdk/__init__.py +6 -0
- zeno_sdk/_generated/__init__.py +6 -0
- zeno_sdk/_generated/client.py +819 -0
- zeno_sdk/_migrations/alembic/env.py +33 -0
- zeno_sdk/_migrations/alembic/script.py.mako +18 -0
- zeno_sdk/_migrations/alembic/versions/0001_initial.py +79 -0
- zeno_sdk/_migrations/alembic/versions/0002_cognition_samples.py +53 -0
- zeno_sdk/_migrations/alembic/versions/0003_cognition_drivers.py +41 -0
- zeno_sdk/_migrations/alembic/versions/0004_transcript_intelligence.py +248 -0
- zeno_sdk/_migrations/alembic.ini +35 -0
- zeno_sdk/_runtime.py +12 -0
- zeno_sdk/adapters/__init__.py +15 -0
- zeno_sdk/adapters/anthropic.py +5 -0
- zeno_sdk/adapters/claude_code.py +5 -0
- zeno_sdk/adapters/crewai.py +5 -0
- zeno_sdk/adapters/langgraph.py +5 -0
- zeno_sdk/adapters/openai.py +5 -0
- zeno_sdk/auth.py +25 -0
- zeno_sdk/client.py +87 -0
- zeno_sdk/config.py +61 -0
- zeno_sdk/daemon.py +72 -0
- zeno_sdk/privacy.py +46 -0
- zeno_sdk/session.py +179 -0
- zeno_sdk/storage.py +487 -0
- zeno_sdk/types/__init__.py +121 -0
- zeno_session_intel/__init__.py +19 -0
- zeno_session_intel/analytics.py +588 -0
- zeno_session_intel/compression.py +123 -0
- zeno_session_intel/ingest.py +376 -0
- zeno_session_intel/model.py +129 -0
- zeno_session_intel/parsers/__init__.py +31 -0
- zeno_session_intel/parsers/claude_code.py +169 -0
- zeno_session_intel/parsers/codex.py +265 -0
- zeno_session_intel/parsers/cursor.py +198 -0
- zeno_session_intel/prices.py +281 -0
- zeno_session_intel/schema.py +277 -0
- zeno_session_intel/signals.py +319 -0
- zeno_session_intel/taxonomy.py +71 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Claude Code transcript parser (JSONL, the format zeno already hooks - verified).
|
|
2
|
+
|
|
3
|
+
Layout: ``~/.claude/projects/<project-slug>/<sessionId>.jsonl``, one JSON object per
|
|
4
|
+
line. Lines are ``user`` / ``assistant`` / ``system`` / ``summary`` records carrying a
|
|
5
|
+
``message`` object, plus ``uuid`` / ``parentUuid`` / ``timestamp`` / ``sessionId`` /
|
|
6
|
+
``cwd`` / ``gitBranch`` / ``isSidechain``. Assistant ``message.usage`` is the cache-aware
|
|
7
|
+
token block; assistant ``message.content`` is a list of text/thinking/tool_use blocks;
|
|
8
|
+
user content is a string or a list including ``tool_result`` blocks.
|
|
9
|
+
|
|
10
|
+
Read-only: the file is opened ``"r"`` and never modified. Never raises - a malformed line
|
|
11
|
+
is skipped.
|
|
12
|
+
|
|
13
|
+
CC gotcha: ``cache_read_input_tokens`` is large and recurs every turn (prompt caching),
|
|
14
|
+
so each assistant message carries its own usage as ONE ledger event (deduped by message
|
|
15
|
+
id); the rollup uses the ledger, never a naive per-line sum.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def default_root() -> Path:
|
|
28
|
+
return Path(os.path.expanduser("~")) / ".claude" / "projects"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def discover(root: Path) -> list[Path]:
|
|
32
|
+
if not root.exists():
|
|
33
|
+
return []
|
|
34
|
+
return sorted(root.glob("**/*.jsonl"))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _flatten_content(content: object) -> tuple[str, str, list[ToolUse], list[ToolResult]]:
|
|
38
|
+
"""Return (text, thinking, tool_uses, tool_results) from a CC message.content."""
|
|
39
|
+
if isinstance(content, str):
|
|
40
|
+
return content, "", [], []
|
|
41
|
+
text_parts: list[str] = []
|
|
42
|
+
thinking_parts: list[str] = []
|
|
43
|
+
tool_uses: list[ToolUse] = []
|
|
44
|
+
tool_results: list[ToolResult] = []
|
|
45
|
+
if isinstance(content, list):
|
|
46
|
+
for block in content:
|
|
47
|
+
if not isinstance(block, dict):
|
|
48
|
+
if isinstance(block, str):
|
|
49
|
+
text_parts.append(block)
|
|
50
|
+
continue
|
|
51
|
+
btype = block.get("type")
|
|
52
|
+
if btype == "text":
|
|
53
|
+
text_parts.append(str(block.get("text", "")))
|
|
54
|
+
elif btype == "thinking":
|
|
55
|
+
thinking_parts.append(str(block.get("thinking", "")))
|
|
56
|
+
elif btype == "tool_use":
|
|
57
|
+
tool_uses.append(
|
|
58
|
+
ToolUse(
|
|
59
|
+
name=str(block.get("name", "")),
|
|
60
|
+
input_json=json.dumps(block.get("input", {}), sort_keys=True),
|
|
61
|
+
id=str(block.get("id", "")),
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
elif btype == "tool_result":
|
|
65
|
+
rc = block.get("content", "")
|
|
66
|
+
if isinstance(rc, list):
|
|
67
|
+
rc = " ".join(
|
|
68
|
+
str(b.get("text", "")) if isinstance(b, dict) else str(b) for b in rc
|
|
69
|
+
)
|
|
70
|
+
tool_results.append(
|
|
71
|
+
ToolResult(
|
|
72
|
+
tool_use_id=str(block.get("tool_use_id", "")),
|
|
73
|
+
content=str(rc),
|
|
74
|
+
is_error=bool(block.get("is_error", False)),
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
return "\n".join(p for p in text_parts if p), "\n".join(thinking_parts), tool_uses, tool_results
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def parse_file(path: Path) -> ParsedSession | None:
|
|
81
|
+
try:
|
|
82
|
+
raw = path.read_text(errors="replace")
|
|
83
|
+
except OSError:
|
|
84
|
+
return None
|
|
85
|
+
session_id = path.stem
|
|
86
|
+
messages: list[ParsedMessage] = []
|
|
87
|
+
project = ""
|
|
88
|
+
cwd = ""
|
|
89
|
+
git_branch = ""
|
|
90
|
+
started_at: str | None = None
|
|
91
|
+
ended_at: str | None = None
|
|
92
|
+
display_name: str | None = None
|
|
93
|
+
ordinal = 0
|
|
94
|
+
|
|
95
|
+
for line in raw.splitlines():
|
|
96
|
+
line = line.strip()
|
|
97
|
+
if not line:
|
|
98
|
+
continue
|
|
99
|
+
try:
|
|
100
|
+
obj = json.loads(line)
|
|
101
|
+
except Exception:
|
|
102
|
+
continue
|
|
103
|
+
if not isinstance(obj, dict):
|
|
104
|
+
continue
|
|
105
|
+
rtype = obj.get("type")
|
|
106
|
+
if rtype == "summary":
|
|
107
|
+
display_name = display_name or str(obj.get("summary", "")) or None
|
|
108
|
+
continue
|
|
109
|
+
msg = obj.get("message")
|
|
110
|
+
if not isinstance(msg, dict):
|
|
111
|
+
continue
|
|
112
|
+
role = str(msg.get("role") or rtype or "")
|
|
113
|
+
if role not in ("user", "assistant", "system"):
|
|
114
|
+
continue
|
|
115
|
+
ts = obj.get("timestamp")
|
|
116
|
+
if isinstance(ts, str):
|
|
117
|
+
started_at = started_at or ts
|
|
118
|
+
ended_at = ts
|
|
119
|
+
cwd = cwd or str(obj.get("cwd", "") or "")
|
|
120
|
+
git_branch = git_branch or str(obj.get("gitBranch", "") or "")
|
|
121
|
+
sid = obj.get("sessionId")
|
|
122
|
+
if isinstance(sid, str) and sid:
|
|
123
|
+
session_id = sid
|
|
124
|
+
text, thinking, tool_uses, tool_results = _flatten_content(msg.get("content"))
|
|
125
|
+
usage = msg.get("usage") if isinstance(msg.get("usage"), dict) else {}
|
|
126
|
+
pm = ParsedMessage(
|
|
127
|
+
ordinal=ordinal,
|
|
128
|
+
role=role,
|
|
129
|
+
content=text,
|
|
130
|
+
thinking_text=thinking,
|
|
131
|
+
timestamp=ts if isinstance(ts, str) else None,
|
|
132
|
+
model=str(msg.get("model", "") or ""),
|
|
133
|
+
input_tokens=to_int(usage.get("input_tokens")),
|
|
134
|
+
output_tokens=to_int(usage.get("output_tokens")),
|
|
135
|
+
cache_creation_input_tokens=to_int(usage.get("cache_creation_input_tokens")),
|
|
136
|
+
cache_read_input_tokens=to_int(usage.get("cache_read_input_tokens")),
|
|
137
|
+
raw_usage=json.dumps(usage, sort_keys=True) if usage else "",
|
|
138
|
+
source_uuid=str(obj.get("uuid", "") or ""),
|
|
139
|
+
source_parent_uuid=str(obj.get("parentUuid", "") or ""),
|
|
140
|
+
is_sidechain=bool(obj.get("isSidechain", False)),
|
|
141
|
+
tool_uses=tool_uses,
|
|
142
|
+
tool_results=tool_results,
|
|
143
|
+
usage_dedup_key=str(msg.get("id") or obj.get("uuid") or ""),
|
|
144
|
+
)
|
|
145
|
+
messages.append(pm)
|
|
146
|
+
ordinal += 1
|
|
147
|
+
|
|
148
|
+
if not messages:
|
|
149
|
+
return None
|
|
150
|
+
project = path.parent.name
|
|
151
|
+
first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
|
|
152
|
+
try:
|
|
153
|
+
mtime = int(path.stat().st_mtime)
|
|
154
|
+
except OSError:
|
|
155
|
+
mtime = None
|
|
156
|
+
return ParsedSession(
|
|
157
|
+
id=session_id,
|
|
158
|
+
agent="claude",
|
|
159
|
+
messages=messages,
|
|
160
|
+
project=project,
|
|
161
|
+
cwd=cwd,
|
|
162
|
+
git_branch=git_branch,
|
|
163
|
+
display_name=display_name or (first_user[:80] if first_user else None),
|
|
164
|
+
first_message=first_user,
|
|
165
|
+
started_at=started_at,
|
|
166
|
+
ended_at=ended_at,
|
|
167
|
+
file_path=str(path),
|
|
168
|
+
file_mtime=mtime,
|
|
169
|
+
)
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Codex (OpenAI) rollout parser - JSONL. EXPERIMENTAL: fixture-verified only.
|
|
2
|
+
|
|
3
|
+
STATUS: experimental. zeno is Claude-Code-first; Codex is NOT a first-class capture path
|
|
4
|
+
and is NOT installed on this box. This parser is validated against a synthesized fixture
|
|
5
|
+
(``tests/fixtures/session_intel/codex_rollout.jsonl``) built from the public rollout format
|
|
6
|
+
only - it is **fixture-verified, needs real-data confirmation**. The ingester gates Codex
|
|
7
|
+
behind an explicit ``--tools`` opt-in so a wrong guess never corrupts real Claude Code data.
|
|
8
|
+
See docs/TOOLS_STATUS.md and docs/adapters/codex.md.
|
|
9
|
+
|
|
10
|
+
Layout: ``~/.codex/sessions/<YYYY>/<MM>/<DD>/rollout-<ts>-<uuid>.jsonl``. Record types:
|
|
11
|
+
- ``session_meta`` -> id / timestamp / cwd (payload-nested or flat)
|
|
12
|
+
- ``response_item`` ``message`` -> a turn (role + content[].text)
|
|
13
|
+
- ``response_item`` ``function_call`` -> a tool call (name/arguments/call_id)
|
|
14
|
+
- ``response_item`` ``function_call_output`` -> a tool result (call_id/output, exit code)
|
|
15
|
+
- ``event_msg`` ``token_count`` / ``usage`` -> token usage, attributed to the last turn
|
|
16
|
+
|
|
17
|
+
Read-only; never raises (malformed lines skipped).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
|
|
27
|
+
|
|
28
|
+
_DEFAULT_MODEL = "gpt-5.3-codex"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def default_root() -> Path:
|
|
32
|
+
return Path(os.path.expanduser("~")) / ".codex" / "sessions"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def discover(root: Path) -> list[Path]:
|
|
36
|
+
if not root.exists():
|
|
37
|
+
return []
|
|
38
|
+
return sorted(root.glob("**/rollout-*.jsonl"))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _payload(obj: dict) -> dict:
|
|
42
|
+
p = obj.get("payload")
|
|
43
|
+
return p if isinstance(p, dict) else obj
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _text_from_content(content: object) -> str:
|
|
47
|
+
if isinstance(content, str):
|
|
48
|
+
return content
|
|
49
|
+
if isinstance(content, list):
|
|
50
|
+
parts = []
|
|
51
|
+
for b in content:
|
|
52
|
+
if isinstance(b, dict):
|
|
53
|
+
parts.append(str(b.get("text", "")))
|
|
54
|
+
elif isinstance(b, str):
|
|
55
|
+
parts.append(b)
|
|
56
|
+
return "\n".join(p for p in parts if p)
|
|
57
|
+
return ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _block(src: dict) -> dict:
|
|
61
|
+
return {
|
|
62
|
+
"input_tokens": to_int(src.get("input_tokens")),
|
|
63
|
+
"output_tokens": to_int(src.get("output_tokens")),
|
|
64
|
+
"cache_read_input_tokens": to_int(
|
|
65
|
+
src.get("cached_input_tokens", src.get("cache_read_input_tokens"))
|
|
66
|
+
),
|
|
67
|
+
"reasoning_tokens": to_int(src.get("reasoning_output_tokens", src.get("reasoning_tokens"))),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _usage_block(info: dict) -> tuple[dict, bool]:
|
|
72
|
+
"""Return (usage dict, is_cumulative) from a Codex token_count record.
|
|
73
|
+
|
|
74
|
+
Codex ``token_count`` totals under ``total_token_usage`` are CUMULATIVE across the
|
|
75
|
+
session; only ``last_token_usage`` (when present) is already a per-turn delta. The
|
|
76
|
+
caller subtracts the running cumulative so the ledger sums to the true session total
|
|
77
|
+
instead of overcounting. (Verified against the public Codex rollout format,
|
|
78
|
+
2026-06-17; still verified-by-fixture pending a real install.)"""
|
|
79
|
+
if isinstance(info.get("last_token_usage"), dict):
|
|
80
|
+
return _block(info["last_token_usage"]), False
|
|
81
|
+
if isinstance(info.get("total_token_usage"), dict):
|
|
82
|
+
return _block(info["total_token_usage"]), True
|
|
83
|
+
if isinstance(info.get("usage"), dict):
|
|
84
|
+
return _block(info["usage"]), False
|
|
85
|
+
return _block(info), False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def parse_file(path: Path) -> ParsedSession | None:
|
|
89
|
+
try:
|
|
90
|
+
raw = path.read_text(errors="replace")
|
|
91
|
+
except OSError:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
session_id = path.stem
|
|
95
|
+
cwd = ""
|
|
96
|
+
model = ""
|
|
97
|
+
started_at: str | None = None
|
|
98
|
+
ended_at: str | None = None
|
|
99
|
+
messages: list[ParsedMessage] = []
|
|
100
|
+
ordinal = 0
|
|
101
|
+
# running cumulative, to turn Codex's cumulative token_count totals into per-turn deltas
|
|
102
|
+
prev_cum = {
|
|
103
|
+
"input_tokens": 0,
|
|
104
|
+
"output_tokens": 0,
|
|
105
|
+
"cache_read_input_tokens": 0,
|
|
106
|
+
"reasoning_tokens": 0,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def last_assistant() -> ParsedMessage | None:
|
|
110
|
+
for m in reversed(messages):
|
|
111
|
+
if m.role == "assistant":
|
|
112
|
+
return m
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
for line in raw.splitlines():
|
|
116
|
+
line = line.strip()
|
|
117
|
+
if not line:
|
|
118
|
+
continue
|
|
119
|
+
try:
|
|
120
|
+
obj = json.loads(line)
|
|
121
|
+
except Exception:
|
|
122
|
+
continue
|
|
123
|
+
if not isinstance(obj, dict):
|
|
124
|
+
continue
|
|
125
|
+
rtype = obj.get("type")
|
|
126
|
+
ts = obj.get("timestamp")
|
|
127
|
+
if isinstance(ts, str):
|
|
128
|
+
started_at = started_at or ts
|
|
129
|
+
ended_at = ts
|
|
130
|
+
|
|
131
|
+
if rtype == "session_meta":
|
|
132
|
+
p = _payload(obj)
|
|
133
|
+
sid = p.get("id") or obj.get("id")
|
|
134
|
+
if isinstance(sid, str) and sid:
|
|
135
|
+
session_id = sid
|
|
136
|
+
cwd = cwd or str(p.get("cwd", "") or "")
|
|
137
|
+
model = model or str(p.get("model", "") or "")
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
if rtype == "response_item":
|
|
141
|
+
p = _payload(obj)
|
|
142
|
+
ptype = p.get("type")
|
|
143
|
+
if ptype == "message":
|
|
144
|
+
role = str(p.get("role", "") or "")
|
|
145
|
+
if role not in ("user", "assistant", "system"):
|
|
146
|
+
continue
|
|
147
|
+
messages.append(
|
|
148
|
+
ParsedMessage(
|
|
149
|
+
ordinal=ordinal,
|
|
150
|
+
role=role,
|
|
151
|
+
content=_text_from_content(p.get("content")),
|
|
152
|
+
timestamp=ts if isinstance(ts, str) else None,
|
|
153
|
+
model=model or _DEFAULT_MODEL if role == "assistant" else "",
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
ordinal += 1
|
|
157
|
+
elif ptype == "function_call":
|
|
158
|
+
tu = ToolUse(
|
|
159
|
+
name=str(p.get("name", "") or ""),
|
|
160
|
+
input_json=_canon_args(p.get("arguments")),
|
|
161
|
+
id=str(p.get("call_id", "") or ""),
|
|
162
|
+
)
|
|
163
|
+
target = last_assistant()
|
|
164
|
+
if target is None:
|
|
165
|
+
target = ParsedMessage(ordinal=ordinal, role="assistant", model=model)
|
|
166
|
+
messages.append(target)
|
|
167
|
+
ordinal += 1
|
|
168
|
+
target.tool_uses.append(tu)
|
|
169
|
+
elif ptype == "function_call_output":
|
|
170
|
+
out = p.get("output")
|
|
171
|
+
content, is_err = _decode_output(out)
|
|
172
|
+
tr = ToolResult(
|
|
173
|
+
tool_use_id=str(p.get("call_id", "") or ""),
|
|
174
|
+
content=content,
|
|
175
|
+
is_error=is_err,
|
|
176
|
+
)
|
|
177
|
+
target = last_assistant()
|
|
178
|
+
if target is not None:
|
|
179
|
+
target.tool_results.append(tr)
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
if rtype in ("event_msg", "token_count"):
|
|
183
|
+
p = _payload(obj)
|
|
184
|
+
info = p.get("info") if isinstance(p.get("info"), dict) else p
|
|
185
|
+
if p.get("type") in ("token_count", None) or rtype == "token_count":
|
|
186
|
+
usage, is_cumulative = _usage_block(info)
|
|
187
|
+
if is_cumulative:
|
|
188
|
+
# cumulative -> per-turn delta, then advance the running total. A
|
|
189
|
+
# context-window reset / compaction makes the cumulative drop BELOW the
|
|
190
|
+
# prior total (non-monotonic); a plain ``max(0, cur-prev)`` would clamp
|
|
191
|
+
# that turn to 0 and silently drop its real tokens. On a reset the new
|
|
192
|
+
# lower cumulative IS that turn's usage, so take the current value itself.
|
|
193
|
+
delta = {
|
|
194
|
+
k: (
|
|
195
|
+
usage[k]
|
|
196
|
+
if usage[k] < prev_cum.get(k, 0)
|
|
197
|
+
else usage[k] - prev_cum.get(k, 0)
|
|
198
|
+
)
|
|
199
|
+
for k in usage
|
|
200
|
+
}
|
|
201
|
+
prev_cum = dict(usage)
|
|
202
|
+
usage = delta
|
|
203
|
+
target = last_assistant()
|
|
204
|
+
if target is not None and any(usage.values()):
|
|
205
|
+
target.input_tokens = usage["input_tokens"] or target.input_tokens
|
|
206
|
+
target.output_tokens = usage["output_tokens"] or target.output_tokens
|
|
207
|
+
target.cache_read_input_tokens = (
|
|
208
|
+
usage["cache_read_input_tokens"] or target.cache_read_input_tokens
|
|
209
|
+
)
|
|
210
|
+
target.reasoning_tokens = usage["reasoning_tokens"] or target.reasoning_tokens
|
|
211
|
+
target.raw_usage = json.dumps(usage, sort_keys=True)
|
|
212
|
+
target.model = target.model or model or _DEFAULT_MODEL
|
|
213
|
+
target.usage_dedup_key = f"{session_id}:tok:{target.ordinal}"
|
|
214
|
+
|
|
215
|
+
if not messages:
|
|
216
|
+
return None
|
|
217
|
+
first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
|
|
218
|
+
try:
|
|
219
|
+
mtime = int(path.stat().st_mtime)
|
|
220
|
+
except OSError:
|
|
221
|
+
mtime = None
|
|
222
|
+
return ParsedSession(
|
|
223
|
+
id=session_id,
|
|
224
|
+
agent="codex",
|
|
225
|
+
messages=messages,
|
|
226
|
+
project=Path(cwd).name if cwd else path.parent.name,
|
|
227
|
+
cwd=cwd,
|
|
228
|
+
display_name=(first_user[:80] if first_user else None),
|
|
229
|
+
first_message=first_user,
|
|
230
|
+
started_at=started_at,
|
|
231
|
+
ended_at=ended_at,
|
|
232
|
+
file_path=str(path),
|
|
233
|
+
file_mtime=mtime,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _canon_args(arguments: object) -> str:
|
|
238
|
+
if isinstance(arguments, str):
|
|
239
|
+
try:
|
|
240
|
+
return json.dumps(json.loads(arguments), sort_keys=True)
|
|
241
|
+
except Exception:
|
|
242
|
+
return arguments
|
|
243
|
+
if isinstance(arguments, dict):
|
|
244
|
+
return json.dumps(arguments, sort_keys=True)
|
|
245
|
+
return ""
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _decode_output(out: object) -> tuple[str, bool]:
|
|
249
|
+
"""Codex function_call_output is often a JSON string with {output, metadata:{exit_code}}."""
|
|
250
|
+
if isinstance(out, str):
|
|
251
|
+
try:
|
|
252
|
+
obj = json.loads(out)
|
|
253
|
+
except Exception:
|
|
254
|
+
return out, False
|
|
255
|
+
elif isinstance(out, dict):
|
|
256
|
+
obj = out
|
|
257
|
+
else:
|
|
258
|
+
return "", False
|
|
259
|
+
if isinstance(obj, dict):
|
|
260
|
+
text = str(obj.get("output", obj.get("content", "")))
|
|
261
|
+
meta = obj.get("metadata")
|
|
262
|
+
exit_code = meta.get("exit_code") if isinstance(meta, dict) else obj.get("exit_code")
|
|
263
|
+
is_err = bool(exit_code) and str(exit_code) != "0"
|
|
264
|
+
return text, is_err
|
|
265
|
+
return str(obj), False
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Cursor chat parser - a SQLite ``state.vscdb`` (verified against real local data).
|
|
2
|
+
|
|
3
|
+
Unlike Claude Code / Codex (one JSONL file == one session), one Cursor ``state.vscdb``
|
|
4
|
+
holds MANY composers (each composer == one session), so this module exposes
|
|
5
|
+
``parse_sessions(path) -> list[ParsedSession]`` and the ingester iterates it.
|
|
6
|
+
|
|
7
|
+
Layout (verified on dctrl's Mac, 2026-06-17):
|
|
8
|
+
``~/Library/Application Support/Cursor/User/globalStorage/state.vscdb`` (+ per-workspace
|
|
9
|
+
``workspaceStorage/<hash>/state.vscdb``), a ``cursorDiskKV(key, value)`` table where:
|
|
10
|
+
- ``composerData:<composerId>`` -> JSON header: ``composerId``, ``name``, ``createdAt``
|
|
11
|
+
(epoch ms), ``fullConversationHeadersOnly`` = ordered ``[{bubbleId, type}]`` (type
|
|
12
|
+
1 = user, 2 = assistant).
|
|
13
|
+
- ``bubbleId:<composerId>:<bubbleId>`` -> JSON message: ``text``, ``type``,
|
|
14
|
+
``toolFormerData`` (``.name`` e.g. ``read_file``), ``tokenCount`` (often ``0/0``).
|
|
15
|
+
|
|
16
|
+
Quirks handled: only the most-recently-active composer has its bubbles materialized (the
|
|
17
|
+
rest are header-only -> tolerate composers with zero retrievable bubbles); Cursor may hold
|
|
18
|
+
the DB open -> **copy-then-read** the file (+ WAL sidecars) to a temp dir to dodge
|
|
19
|
+
``database is locked``. Token counts are best-effort (often 0), so Cursor cost is usually
|
|
20
|
+
unpriced - never alert on it.
|
|
21
|
+
|
|
22
|
+
Read-only on the source; never raises (a bad row is skipped). py3.9-safe.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import shutil
|
|
30
|
+
import sqlite3
|
|
31
|
+
import tempfile
|
|
32
|
+
from datetime import datetime, timezone
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
from ..model import ParsedMessage, ParsedSession, ToolResult, ToolUse, to_int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def default_root() -> Path:
|
|
39
|
+
return Path(os.path.expanduser("~")) / "Library" / "Application Support" / "Cursor" / "User"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def discover(root: Path) -> list[Path]:
|
|
43
|
+
if not root.exists():
|
|
44
|
+
return []
|
|
45
|
+
found = []
|
|
46
|
+
for sub in ("globalStorage/state.vscdb",):
|
|
47
|
+
p = root / sub
|
|
48
|
+
if p.exists():
|
|
49
|
+
found.append(p)
|
|
50
|
+
found.extend(sorted((root / "workspaceStorage").glob("*/state.vscdb")))
|
|
51
|
+
return found
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _epoch_ms_to_iso(ms: object) -> str | None:
|
|
55
|
+
try:
|
|
56
|
+
return datetime.fromtimestamp(float(ms) / 1000.0, tz=timezone.utc).isoformat()
|
|
57
|
+
except (ValueError, TypeError, OverflowError, OSError):
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _open_copy(path: Path):
|
|
62
|
+
"""Copy the vscdb (+ -wal/-shm) to a temp dir and open it, dodging Cursor's lock.
|
|
63
|
+
Returns (connection, tempdir) or (None, tempdir) on failure; caller cleans up."""
|
|
64
|
+
d = tempfile.mkdtemp(prefix="zeno-cursor-")
|
|
65
|
+
dst = os.path.join(d, "state.vscdb")
|
|
66
|
+
try:
|
|
67
|
+
for ext in ("", "-wal", "-shm"):
|
|
68
|
+
src = str(path) + ext
|
|
69
|
+
if os.path.exists(src):
|
|
70
|
+
shutil.copy(src, dst + ext)
|
|
71
|
+
con = sqlite3.connect(dst)
|
|
72
|
+
con.execute("PRAGMA query_only=1")
|
|
73
|
+
return con, d
|
|
74
|
+
except Exception:
|
|
75
|
+
return None, d
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parse_sessions(path: Path) -> list[ParsedSession]:
|
|
79
|
+
con, tmpdir = _open_copy(path)
|
|
80
|
+
if con is None:
|
|
81
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
82
|
+
return []
|
|
83
|
+
sessions: list[ParsedSession] = []
|
|
84
|
+
try:
|
|
85
|
+
composers = con.execute(
|
|
86
|
+
"SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
|
|
87
|
+
).fetchall()
|
|
88
|
+
for _key, raw in composers:
|
|
89
|
+
s = _parse_composer(con, raw, path)
|
|
90
|
+
if s is not None:
|
|
91
|
+
sessions.append(s)
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
finally:
|
|
95
|
+
con.close()
|
|
96
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
97
|
+
return sessions
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _parse_composer(con: sqlite3.Connection, raw: str, path: Path) -> ParsedSession | None:
|
|
101
|
+
try:
|
|
102
|
+
cd = json.loads(raw)
|
|
103
|
+
except Exception:
|
|
104
|
+
return None
|
|
105
|
+
if not isinstance(cd, dict):
|
|
106
|
+
return None
|
|
107
|
+
composer_id = str(cd.get("composerId") or "")
|
|
108
|
+
headers = cd.get("fullConversationHeadersOnly")
|
|
109
|
+
if not composer_id or not isinstance(headers, list) or not headers:
|
|
110
|
+
return None # header-only / empty composer -> tolerate (skip)
|
|
111
|
+
|
|
112
|
+
messages: list[ParsedMessage] = []
|
|
113
|
+
ordinal = 0
|
|
114
|
+
last_ms = None
|
|
115
|
+
for h in headers:
|
|
116
|
+
if not isinstance(h, dict):
|
|
117
|
+
continue
|
|
118
|
+
bid = h.get("bubbleId")
|
|
119
|
+
btype = h.get("type")
|
|
120
|
+
if not bid:
|
|
121
|
+
continue
|
|
122
|
+
row = con.execute(
|
|
123
|
+
"SELECT value FROM cursorDiskKV WHERE key = ?",
|
|
124
|
+
(f"bubbleId:{composer_id}:{bid}",),
|
|
125
|
+
).fetchone()
|
|
126
|
+
if not row:
|
|
127
|
+
continue # bubble not materialized (common for older composers)
|
|
128
|
+
try:
|
|
129
|
+
b = json.loads(row[0])
|
|
130
|
+
except Exception:
|
|
131
|
+
continue
|
|
132
|
+
if not isinstance(b, dict):
|
|
133
|
+
continue
|
|
134
|
+
role = "user" if btype == 1 else "assistant" if btype == 2 else "system"
|
|
135
|
+
tool_uses: list[ToolUse] = []
|
|
136
|
+
tool_results: list[ToolResult] = []
|
|
137
|
+
tfd = b.get("toolFormerData")
|
|
138
|
+
if isinstance(tfd, dict) and tfd.get("name"):
|
|
139
|
+
params = tfd.get("params")
|
|
140
|
+
tool_uses.append(
|
|
141
|
+
ToolUse(
|
|
142
|
+
name=str(tfd.get("name")),
|
|
143
|
+
input_json=json.dumps(params, sort_keys=True) if params else "",
|
|
144
|
+
id=str(b.get("bubbleId") or bid),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
status = str(tfd.get("status", "")).lower()
|
|
148
|
+
tool_results.append(
|
|
149
|
+
ToolResult(
|
|
150
|
+
tool_use_id=str(b.get("bubbleId") or bid),
|
|
151
|
+
content=str(tfd.get("result", "")),
|
|
152
|
+
is_error=status in ("error", "failed", "cancelled"),
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
tc = b.get("tokenCount") if isinstance(b.get("tokenCount"), dict) else {}
|
|
156
|
+
created = b.get("createdAt")
|
|
157
|
+
if created is not None:
|
|
158
|
+
last_ms = created
|
|
159
|
+
messages.append(
|
|
160
|
+
ParsedMessage(
|
|
161
|
+
ordinal=ordinal,
|
|
162
|
+
role=role,
|
|
163
|
+
content=str(b.get("text") or ""),
|
|
164
|
+
thinking_text="",
|
|
165
|
+
timestamp=_epoch_ms_to_iso(created),
|
|
166
|
+
model="", # Cursor does not expose a per-bubble model id -> unpriced
|
|
167
|
+
input_tokens=to_int(tc.get("inputTokens")),
|
|
168
|
+
output_tokens=to_int(tc.get("outputTokens")),
|
|
169
|
+
raw_usage=json.dumps(tc, sort_keys=True) if tc else "",
|
|
170
|
+
source_uuid=str(bid),
|
|
171
|
+
tool_uses=tool_uses,
|
|
172
|
+
tool_results=tool_results,
|
|
173
|
+
usage_dedup_key=str(bid),
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
ordinal += 1
|
|
177
|
+
|
|
178
|
+
if not messages:
|
|
179
|
+
return None
|
|
180
|
+
first_user = next((m.content for m in messages if m.role == "user" and m.content), None)
|
|
181
|
+
return ParsedSession(
|
|
182
|
+
id=composer_id,
|
|
183
|
+
agent="cursor",
|
|
184
|
+
messages=messages,
|
|
185
|
+
project=str(cd.get("name") or "") or "cursor",
|
|
186
|
+
display_name=(str(cd.get("name")) if cd.get("name") else None),
|
|
187
|
+
first_message=first_user,
|
|
188
|
+
started_at=_epoch_ms_to_iso(cd.get("createdAt")),
|
|
189
|
+
ended_at=_epoch_ms_to_iso(last_ms if last_ms is not None else cd.get("lastUpdatedAt")),
|
|
190
|
+
file_path=str(path),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def parse_file(path: Path) -> ParsedSession | None:
|
|
195
|
+
"""Single-session convenience (returns the first composer). The ingester uses
|
|
196
|
+
``parse_sessions`` because one vscdb holds many."""
|
|
197
|
+
out = parse_sessions(path)
|
|
198
|
+
return out[0] if out else None
|