zeno-cli 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zeno_adapters/__init__.py +17 -0
- zeno_adapters/_common.py +38 -0
- zeno_adapters/anthropic.py +68 -0
- zeno_adapters/claude_code.py +101 -0
- zeno_adapters/crewai.py +92 -0
- zeno_adapters/langgraph.py +49 -0
- zeno_adapters/openai.py +108 -0
- zeno_cli/__init__.py +1 -0
- zeno_cli/_hooks/cc_bridge.py +1016 -0
- zeno_cli/doctor.py +535 -0
- zeno_cli/hook_install.py +269 -0
- zeno_cli/hud/__init__.py +1 -0
- zeno_cli/hud/hud_install.py +652 -0
- zeno_cli/hud/zeno_attention.py +288 -0
- zeno_cli/hud/zeno_cognition.py +457 -0
- zeno_cli/hud/zeno_hud.py +496 -0
- zeno_cli/interview_invites.py +342 -0
- zeno_cli/login.py +241 -0
- zeno_cli/main.py +2534 -0
- zeno_cli/onboard.py +206 -0
- zeno_cli/outreach.py +456 -0
- zeno_cli/version.py +67 -0
- zeno_cli-0.3.4.dist-info/METADATA +161 -0
- zeno_cli-0.3.4.dist-info/RECORD +69 -0
- zeno_cli-0.3.4.dist-info/WHEEL +4 -0
- zeno_cli-0.3.4.dist-info/entry_points.txt +4 -0
- zeno_core/__init__.py +67 -0
- zeno_core/analytics.py +193 -0
- zeno_core/rtlx_s.py +460 -0
- zeno_core/streak.py +178 -0
- zeno_core/tlx_s.py +192 -0
- zeno_sdk/__init__.py +6 -0
- zeno_sdk/_generated/__init__.py +6 -0
- zeno_sdk/_generated/client.py +819 -0
- zeno_sdk/_migrations/alembic/env.py +33 -0
- zeno_sdk/_migrations/alembic/script.py.mako +18 -0
- zeno_sdk/_migrations/alembic/versions/0001_initial.py +79 -0
- zeno_sdk/_migrations/alembic/versions/0002_cognition_samples.py +53 -0
- zeno_sdk/_migrations/alembic/versions/0003_cognition_drivers.py +41 -0
- zeno_sdk/_migrations/alembic/versions/0004_transcript_intelligence.py +248 -0
- zeno_sdk/_migrations/alembic.ini +35 -0
- zeno_sdk/_runtime.py +12 -0
- zeno_sdk/adapters/__init__.py +15 -0
- zeno_sdk/adapters/anthropic.py +5 -0
- zeno_sdk/adapters/claude_code.py +5 -0
- zeno_sdk/adapters/crewai.py +5 -0
- zeno_sdk/adapters/langgraph.py +5 -0
- zeno_sdk/adapters/openai.py +5 -0
- zeno_sdk/auth.py +25 -0
- zeno_sdk/client.py +87 -0
- zeno_sdk/config.py +61 -0
- zeno_sdk/daemon.py +72 -0
- zeno_sdk/privacy.py +46 -0
- zeno_sdk/session.py +179 -0
- zeno_sdk/storage.py +487 -0
- zeno_sdk/types/__init__.py +121 -0
- zeno_session_intel/__init__.py +19 -0
- zeno_session_intel/analytics.py +588 -0
- zeno_session_intel/compression.py +123 -0
- zeno_session_intel/ingest.py +376 -0
- zeno_session_intel/model.py +129 -0
- zeno_session_intel/parsers/__init__.py +31 -0
- zeno_session_intel/parsers/claude_code.py +169 -0
- zeno_session_intel/parsers/codex.py +265 -0
- zeno_session_intel/parsers/cursor.py +198 -0
- zeno_session_intel/prices.py +281 -0
- zeno_session_intel/schema.py +277 -0
- zeno_session_intel/signals.py +319 -0
- zeno_session_intel/taxonomy.py +71 -0
|
@@ -0,0 +1,1016 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Claude Code -> zeno SQLite bridge (v2: event-time cognition capture).
|
|
3
|
+
|
|
4
|
+
Wires Claude Code hook events into the local zeno store. v1 captured
|
|
5
|
+
sessions/agent_runs/supervision_events so RTLX-S behavioral anchors populate.
|
|
6
|
+
v2 ADDS the multi-dimensional cognition signal at EVENT TIME: it records the real
|
|
7
|
+
prompt at UserPromptSubmit and, at Stop, reads the turn's transcript (tool uses,
|
|
8
|
+
tokens, timing) and writes one rich cognition_samples row (five drivers + a
|
|
9
|
+
composite) via the shared zeno_cognition model. The HUD and dashboard read those
|
|
10
|
+
same rows, so the terminal bar and the timeline agree by construction.
|
|
11
|
+
|
|
12
|
+
This fixes the v1 "attention flat at 50" problem: effort is the REAL submitted
|
|
13
|
+
prompt (not re-derived from a coarse transcript tail), and there are now five axes
|
|
14
|
+
(effort / autonomy / verification / fatigue / flow), each scored against the
|
|
15
|
+
user's own rolling baseline.
|
|
16
|
+
|
|
17
|
+
Event mapping:
|
|
18
|
+
SessionStart -> INSERT sessions
|
|
19
|
+
UserPromptSubmit -> INSERT agent_runs (open) + record the turn's prompt state
|
|
20
|
+
Stop -> close the open agent_run + WRITE the cognition_samples row
|
|
21
|
+
Notification -> INSERT supervision_events type='intervene'
|
|
22
|
+
SessionEnd -> close open runs + set sessions.end_at
|
|
23
|
+
|
|
24
|
+
Hard constraints (unchanged):
|
|
25
|
+
- stdlib only for the capture path; the ONE optional import is zeno_cognition
|
|
26
|
+
(also stdlib, light - no SDK/pandas/sqlalchemy), guarded so a missing zeno
|
|
27
|
+
checkout degrades to v1 capture, never an error
|
|
28
|
+
- ALWAYS exit 0, never block Claude Code on a zeno write failure
|
|
29
|
+
- bounded transcript reads (tail only) to stay fast on every fire
|
|
30
|
+
- debug log via env: ZENO_CC_BRIDGE_DEBUG=1 -> ~/.zeno/cc-bridge.log
|
|
31
|
+
- cognition schema is owned by zeno_cognition.ensure_schema (single source of
|
|
32
|
+
truth); the drift test guards it against the alembic migration
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import json
|
|
38
|
+
import math
|
|
39
|
+
import os
|
|
40
|
+
import socket
|
|
41
|
+
import sqlite3
|
|
42
|
+
import sys
|
|
43
|
+
import urllib.error
|
|
44
|
+
import urllib.request
|
|
45
|
+
import uuid
|
|
46
|
+
from datetime import datetime, timezone
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
|
|
49
|
+
# datetime.UTC is 3.11+; the Mac Mini's system python3 is 3.9 (CLT). Shim it via
|
|
50
|
+
# timezone.utc so this hook imports on every machine's python3. (Do not let ruff's
|
|
51
|
+
# UP017 "rewrite to datetime.UTC" autofix touch this - it breaks 3.9.)
|
|
52
|
+
UTC = timezone.utc # noqa: UP017
|
|
53
|
+
|
|
54
|
+
DEFAULT_PROJECT_SLUG: str = "default-project"
|
|
55
|
+
DEFAULT_HARNESS: str = "claude-code"
|
|
56
|
+
DEFAULT_MODEL: str = "claude-opus-4-8"
|
|
57
|
+
|
|
58
|
+
TRANSCRIPT_TAIL_BYTES = 512 * 1024
|
|
59
|
+
RECENT_SCORES = 6
|
|
60
|
+
RECENT_DURATIONS = 8
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# real-time cross-device sync (feat/realtime-sync) - PUSH side.
|
|
64
|
+
#
|
|
65
|
+
# After the SACRED local capture (commit + close) succeeds, the hook best-effort
|
|
66
|
+
# POSTs the just-written event to the shared API so the dashboard + both HUDs
|
|
67
|
+
# update live. EVERYTHING here is swallowed: a dead target, DNS failure, a slow
|
|
68
|
+
# network - none of it can change the hook's exit code (always 0) or block
|
|
69
|
+
# Claude Code. Stdlib only (urllib).
|
|
70
|
+
#
|
|
71
|
+
# PRIVACY (PRIVACY.md): the payload carries ONLY derived numeric scores + token
|
|
72
|
+
# counts + bounded label fields (model/harness/event_name) + opaque ids. It
|
|
73
|
+
# NEVER carries the prompt text or any transcript content. The server's schema
|
|
74
|
+
# is extra="forbid", so a stray field would 422; we additionally never put one
|
|
75
|
+
# in. _build_ingest_payload below is the single chokepoint and is allowlist-only.
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
DEFAULT_API_BASE_URL = "https://zeno-api-364453955482.us-west1.run.app"
|
|
78
|
+
SYNC_PATH = "/v1/capture/ingest"
|
|
79
|
+
SYNC_TIMEOUT_S = 0.4 # explicit socket timeout; overridable for tests
|
|
80
|
+
OUTBOX_MAX_LINES = 500 # bounded append-only spool (drop-oldest)
|
|
81
|
+
OUTBOX_DRAIN_BUDGET_S = 0.5 # time-box a best-effort drain after a live push
|
|
82
|
+
OUTBOX_DRAIN_MAX = 5 # at most a few spooled lines per successful push
|
|
83
|
+
|
|
84
|
+
# Lifecycle hook events map to a bounded event_name label sent to the API; this
|
|
85
|
+
# allowlist is also what gates which events push at all (Notification stays local).
|
|
86
|
+
_PUSH_EVENT_NAMES = {
|
|
87
|
+
"SessionStart": "session_start",
|
|
88
|
+
"UserPromptSubmit": "user_prompt_submit",
|
|
89
|
+
"Stop": "stop",
|
|
90
|
+
"SessionEnd": "session_end",
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Cross-event scratch: a Stop that wrote a cognition row stashes the derived-only
|
|
94
|
+
# sample here so main() can push it AFTER conn.commit()/close(). Reset per process.
|
|
95
|
+
_PENDING_PUSH: dict[str, dict] = {}
|
|
96
|
+
|
|
97
|
+
# Optional shared cognition model. Light + stdlib, but guarded: a missing zeno
|
|
98
|
+
# checkout (or any import error) degrades this hook to v1 capture, never breaks it.
|
|
99
|
+
_COG = None
|
|
100
|
+
try: # pragma: no cover - import shim
|
|
101
|
+
# Prefer the copy bundled with an installed zeno-cli wheel (v2 capture works
|
|
102
|
+
# out of the box); fall back to a repo checkout for dev / dotfiles installs.
|
|
103
|
+
try:
|
|
104
|
+
from zeno_cli.hud import zeno_cognition as _COG # type: ignore
|
|
105
|
+
except Exception:
|
|
106
|
+
# Fall back to a bare repo checkout (dev / dotfiles installs without the wheel).
|
|
107
|
+
# The cognition logic now lives ONLY in the CLI source; zeno_cognition has no
|
|
108
|
+
# relative imports, so it imports cleanly as a top-level module from that dir.
|
|
109
|
+
_cog_dir = os.path.expanduser(
|
|
110
|
+
os.environ.get("ZENO_COG_DIR", "~/github/zeno/apps/cli/src/zeno_cli/hud")
|
|
111
|
+
)
|
|
112
|
+
if os.path.isdir(_cog_dir) and _cog_dir not in sys.path:
|
|
113
|
+
sys.path.insert(0, _cog_dir)
|
|
114
|
+
import zeno_cognition as _COG # type: ignore # noqa: E402
|
|
115
|
+
except Exception:
|
|
116
|
+
_COG = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _now() -> datetime:
|
|
120
|
+
return datetime.now(tz=UTC)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _now_iso() -> str:
|
|
124
|
+
return _now().isoformat()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _zeno_home() -> Path:
|
|
128
|
+
return Path(os.environ.get("ZENO_HOME", str(Path.home() / ".zeno"))).expanduser()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _zeno_db_path() -> Path:
|
|
132
|
+
return Path(os.environ.get("ZENO_DB_PATH", str(_zeno_home() / "zeno.db"))).expanduser()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _project_id(slug: str) -> str:
|
|
136
|
+
return str(uuid.uuid5(uuid.NAMESPACE_URL, slug))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
# device + account identity for the dedup triple (device_id, session_id, sample_id)
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
def _device_id() -> str:
|
|
143
|
+
"""Stable per-machine id (first leg of the dedup triple).
|
|
144
|
+
|
|
145
|
+
The zeno session id is uuid5(cc_session_id), so the SAME cc session run on two
|
|
146
|
+
machines collapses to one uuid - device_id is what keeps the two operators'
|
|
147
|
+
rows distinct. Read-or-create a persisted uuid4 at ~/.zeno/device-id (0600);
|
|
148
|
+
if the file can't be read/written for any reason, fall back to the hostname so
|
|
149
|
+
we still emit a non-empty, machine-stable id. Never raises."""
|
|
150
|
+
path = _zeno_home() / "device-id"
|
|
151
|
+
try:
|
|
152
|
+
existing = path.read_text(encoding="utf-8").strip()
|
|
153
|
+
if existing:
|
|
154
|
+
return existing
|
|
155
|
+
except OSError:
|
|
156
|
+
pass
|
|
157
|
+
new_id = str(uuid.uuid4())
|
|
158
|
+
try:
|
|
159
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
160
|
+
tmp = path.with_suffix(".tmp")
|
|
161
|
+
tmp.write_text(new_id, encoding="utf-8")
|
|
162
|
+
try:
|
|
163
|
+
os.chmod(tmp, 0o600)
|
|
164
|
+
except OSError:
|
|
165
|
+
pass
|
|
166
|
+
tmp.replace(path)
|
|
167
|
+
return new_id
|
|
168
|
+
except OSError:
|
|
169
|
+
# last resort: a hostname is machine-stable enough to keep two machines'
|
|
170
|
+
# colliding session uuid apart even if we can't persist the device file.
|
|
171
|
+
try:
|
|
172
|
+
host = socket.gethostname().strip()
|
|
173
|
+
except OSError:
|
|
174
|
+
host = ""
|
|
175
|
+
return host or new_id
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _account_id() -> str | None:
|
|
179
|
+
"""Optional account label. ZENO_ACCOUNT_ID lets one operator tag which Claude
|
|
180
|
+
account a turn came from; absent, it's None (the server stamps the
|
|
181
|
+
authoritative identity as server_user_id from the tailnet login)."""
|
|
182
|
+
val = (os.environ.get("ZENO_ACCOUNT_ID") or "").strip()
|
|
183
|
+
return val or None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
# real-time push: build payload (derived numerics only) -> POST -> spool on fail
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
def _sync_disabled() -> bool:
|
|
190
|
+
return bool(os.environ.get("ZENO_SYNC_DISABLE"))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _api_base_url() -> str:
|
|
194
|
+
return (os.environ.get("ZENO_API_BASE_URL") or DEFAULT_API_BASE_URL).rstrip("/")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _sync_timeout_s() -> float:
|
|
198
|
+
try:
|
|
199
|
+
return float(os.environ.get("ZENO_SYNC_TIMEOUT_S") or SYNC_TIMEOUT_S)
|
|
200
|
+
except (TypeError, ValueError):
|
|
201
|
+
return SYNC_TIMEOUT_S
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _outbox_path() -> Path:
|
|
205
|
+
return _zeno_home() / "outbox.ndjson"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _build_ingest_payload(event: str, payload: dict) -> dict | None:
|
|
209
|
+
"""Build the ingest body for a hook event - the SINGLE privacy chokepoint.
|
|
210
|
+
|
|
211
|
+
Allowlist-only: every value is a derived numeric, a token count, a bounded
|
|
212
|
+
label (event_name/model/harness), or an opaque id. There is NO path here for
|
|
213
|
+
prompt text or transcript content to enter the payload. Returns None when the
|
|
214
|
+
event should not push (no session id, or an event not in the push allowlist).
|
|
215
|
+
"""
|
|
216
|
+
event_name = _PUSH_EVENT_NAMES.get(event)
|
|
217
|
+
if event_name is None:
|
|
218
|
+
return None
|
|
219
|
+
cc_session = payload.get("session_id")
|
|
220
|
+
if not cc_session:
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
pending = _PENDING_PUSH.get("sample") or {}
|
|
224
|
+
has_sample = event == "Stop" and bool(pending)
|
|
225
|
+
# A Stop with a written cognition row pushes the per-turn sample; otherwise
|
|
226
|
+
# (incl. a Stop that wrote no row, e.g. _COG None) it's a bare lifecycle event.
|
|
227
|
+
zeno_session = (
|
|
228
|
+
pending.get("zeno_session") if has_sample else _cc_session_to_zeno_session(cc_session)
|
|
229
|
+
)
|
|
230
|
+
model = payload.get("model")
|
|
231
|
+
if has_sample and not model:
|
|
232
|
+
model = pending.get("model")
|
|
233
|
+
|
|
234
|
+
body: dict = {
|
|
235
|
+
"device_id": _device_id(),
|
|
236
|
+
"account_id": _account_id(),
|
|
237
|
+
"event_name": event_name,
|
|
238
|
+
"session_id": zeno_session,
|
|
239
|
+
"cc_session_id": str(cc_session),
|
|
240
|
+
"harness": DEFAULT_HARNESS,
|
|
241
|
+
"model": model,
|
|
242
|
+
"ts": (pending.get("ts") if has_sample else _now_iso()),
|
|
243
|
+
}
|
|
244
|
+
if has_sample:
|
|
245
|
+
body["sample"] = pending.get("sample")
|
|
246
|
+
return body
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _idempotency_key(body: dict) -> str:
|
|
250
|
+
"""Stable per-(device, session, entity) key so a replay is a true no-op -
|
|
251
|
+
mirrors the server's effective_key when no header is sent."""
|
|
252
|
+
sample = body.get("sample") or {}
|
|
253
|
+
entity = sample.get("sample_id") or f"{body['event_name']}:{body['ts']}"
|
|
254
|
+
return f"{body['device_id']}:{body['session_id']}:{entity}"
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _post_ingest(body: dict, timeout_s: float) -> bool:
|
|
258
|
+
"""POST one ingest body. Returns True on a 2xx, False on any failure. Never
|
|
259
|
+
raises (all exceptions -> False). urllib + an explicit socket timeout only."""
|
|
260
|
+
url = _api_base_url() + SYNC_PATH
|
|
261
|
+
data = json.dumps(body).encode("utf-8")
|
|
262
|
+
req = urllib.request.Request(
|
|
263
|
+
url,
|
|
264
|
+
data=data,
|
|
265
|
+
method="POST",
|
|
266
|
+
headers={
|
|
267
|
+
"Content-Type": "application/json",
|
|
268
|
+
"Idempotency-Key": _idempotency_key(body),
|
|
269
|
+
},
|
|
270
|
+
)
|
|
271
|
+
try:
|
|
272
|
+
with urllib.request.urlopen(req, timeout=timeout_s) as resp: # noqa: S310 (trusted tailnet)
|
|
273
|
+
return 200 <= getattr(resp, "status", 0) < 300
|
|
274
|
+
except urllib.error.HTTPError as exc:
|
|
275
|
+
# The server answered (4xx/5xx). 409 = idempotent replay conflict (already
|
|
276
|
+
# stored) - treat as delivered so we don't spool a no-op forever. Other
|
|
277
|
+
# statuses are genuine failures worth spooling.
|
|
278
|
+
return exc.code == 409
|
|
279
|
+
except Exception:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _spool(body: dict) -> None:
|
|
284
|
+
"""Append one event to the bounded append-only outbox; drop oldest over cap.
|
|
285
|
+
Best-effort: a spool failure must never surface to the caller."""
|
|
286
|
+
try:
|
|
287
|
+
path = _outbox_path()
|
|
288
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
289
|
+
line = json.dumps(body, separators=(",", ":"))
|
|
290
|
+
with path.open("a", encoding="utf-8") as f:
|
|
291
|
+
f.write(line + "\n")
|
|
292
|
+
_trim_outbox(path)
|
|
293
|
+
except OSError as exc:
|
|
294
|
+
_debug(f"spool failed: {exc}")
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _trim_outbox(path: Path) -> None:
|
|
298
|
+
"""Enforce the hard line cap, dropping the OLDEST lines. Rewritten atomically."""
|
|
299
|
+
try:
|
|
300
|
+
with path.open("r", encoding="utf-8") as f:
|
|
301
|
+
lines = f.readlines()
|
|
302
|
+
if len(lines) <= OUTBOX_MAX_LINES:
|
|
303
|
+
return
|
|
304
|
+
kept = lines[-OUTBOX_MAX_LINES:]
|
|
305
|
+
tmp = path.with_suffix(".tmp")
|
|
306
|
+
with tmp.open("w", encoding="utf-8") as f:
|
|
307
|
+
f.writelines(kept)
|
|
308
|
+
tmp.replace(path)
|
|
309
|
+
except OSError as exc:
|
|
310
|
+
_debug(f"trim_outbox failed: {exc}")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _drain_outbox(timeout_s: float) -> None:
|
|
314
|
+
"""After a live push, best-effort + time-boxed drain of a few spooled lines.
|
|
315
|
+
Pushes oldest-first; stops at the budget, the line cap, or the first failure
|
|
316
|
+
(so an offline window doesn't re-spin). Rewrites the outbox with the remainder."""
|
|
317
|
+
path = _outbox_path()
|
|
318
|
+
try:
|
|
319
|
+
if not path.exists():
|
|
320
|
+
return
|
|
321
|
+
with path.open("r", encoding="utf-8") as f:
|
|
322
|
+
lines = [ln for ln in f.read().splitlines() if ln.strip()]
|
|
323
|
+
except OSError as exc:
|
|
324
|
+
_debug(f"drain read failed: {exc}")
|
|
325
|
+
return
|
|
326
|
+
if not lines:
|
|
327
|
+
return
|
|
328
|
+
deadline = _monotonic() + OUTBOX_DRAIN_BUDGET_S
|
|
329
|
+
consumed = 0 # count of leading lines fully resolved (delivered or dropped)
|
|
330
|
+
for line in lines:
|
|
331
|
+
if consumed >= OUTBOX_DRAIN_MAX or _monotonic() >= deadline:
|
|
332
|
+
break
|
|
333
|
+
try:
|
|
334
|
+
body = json.loads(line)
|
|
335
|
+
except Exception:
|
|
336
|
+
consumed += 1 # unparseable - drop it, keep draining
|
|
337
|
+
continue
|
|
338
|
+
if not _post_ingest(body, timeout_s):
|
|
339
|
+
break # stop on first failure; leave this line + the rest spooled
|
|
340
|
+
consumed += 1
|
|
341
|
+
if consumed == 0:
|
|
342
|
+
return
|
|
343
|
+
remainder = lines[consumed:]
|
|
344
|
+
try:
|
|
345
|
+
if remainder:
|
|
346
|
+
tmp = path.with_suffix(".tmp")
|
|
347
|
+
with tmp.open("w", encoding="utf-8") as f:
|
|
348
|
+
f.write("\n".join(remainder) + "\n")
|
|
349
|
+
tmp.replace(path)
|
|
350
|
+
else:
|
|
351
|
+
path.unlink(missing_ok=True)
|
|
352
|
+
except OSError as exc:
|
|
353
|
+
_debug(f"drain rewrite failed: {exc}")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _monotonic() -> float:
|
|
357
|
+
import time
|
|
358
|
+
|
|
359
|
+
return time.monotonic()
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _maybe_push(event: str, payload: dict) -> None:
|
|
363
|
+
"""Post-commit push entrypoint. Builds the derived-only payload, POSTs it, and
|
|
364
|
+
on failure spools it; on success best-effort drains the spool. Swallows
|
|
365
|
+
EVERYTHING - capture has already been committed and the hook must still exit 0
|
|
366
|
+
regardless of any network outcome."""
|
|
367
|
+
if _sync_disabled():
|
|
368
|
+
return
|
|
369
|
+
try:
|
|
370
|
+
body = _build_ingest_payload(event, payload)
|
|
371
|
+
if body is None:
|
|
372
|
+
return
|
|
373
|
+
timeout_s = _sync_timeout_s()
|
|
374
|
+
if _post_ingest(body, timeout_s):
|
|
375
|
+
_drain_outbox(timeout_s)
|
|
376
|
+
else:
|
|
377
|
+
_spool(body)
|
|
378
|
+
except Exception as exc: # network/build path must never break the hook
|
|
379
|
+
_debug(f"maybe_push failed: {exc}")
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _cc_session_to_zeno_session(cc_session_id: str) -> str:
|
|
383
|
+
"""Stable cc session_id -> zeno session UUID (uuid5 over 'cc:<id>')."""
|
|
384
|
+
return str(uuid.uuid5(uuid.NAMESPACE_URL, f"cc:{cc_session_id}"))
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _debug(message: str) -> None:
|
|
388
|
+
if not os.environ.get("ZENO_CC_BRIDGE_DEBUG"):
|
|
389
|
+
return
|
|
390
|
+
log_path = _zeno_home() / "cc-bridge.log"
|
|
391
|
+
try:
|
|
392
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
393
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
394
|
+
f.write(f"{_now_iso()} {message}\n")
|
|
395
|
+
except OSError:
|
|
396
|
+
pass
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _parse_ts(s):
|
|
400
|
+
if not isinstance(s, str) or not s:
|
|
401
|
+
return None
|
|
402
|
+
try:
|
|
403
|
+
dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
|
|
404
|
+
except Exception:
|
|
405
|
+
return None
|
|
406
|
+
# Normalize naive timestamps to UTC-aware. _now() is always aware, so a naive
|
|
407
|
+
# transcript ts (hand-edited / imported / a non-CC tool) would otherwise raise
|
|
408
|
+
# TypeError on the ts<since compare or the now-ts subtraction and silently drop
|
|
409
|
+
# the whole turn's cognition sample. Assume UTC for a naive value.
|
|
410
|
+
if dt.tzinfo is None:
|
|
411
|
+
dt = dt.replace(tzinfo=UTC)
|
|
412
|
+
return dt
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# ---------------------------------------------------------------------------
|
|
416
|
+
# capture schema (v1 tables; cognition_samples is owned by zeno_cognition)
|
|
417
|
+
# ---------------------------------------------------------------------------
|
|
418
|
+
def _ensure_schema(conn: sqlite3.Connection) -> None:
|
|
419
|
+
conn.executescript("""
|
|
420
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
421
|
+
id TEXT PRIMARY KEY, start_at TEXT NOT NULL, end_at TEXT,
|
|
422
|
+
agent_count_max INTEGER NOT NULL, harness TEXT NOT NULL,
|
|
423
|
+
project_id TEXT NOT NULL, synced_at TEXT
|
|
424
|
+
);
|
|
425
|
+
CREATE TABLE IF NOT EXISTS agent_runs (
|
|
426
|
+
id TEXT PRIMARY KEY, session_id TEXT NOT NULL, harness TEXT NOT NULL,
|
|
427
|
+
model TEXT NOT NULL, started_at TEXT NOT NULL, ended_at TEXT,
|
|
428
|
+
outcome TEXT NOT NULL, synced_at TEXT
|
|
429
|
+
);
|
|
430
|
+
CREATE INDEX IF NOT EXISTS ix_agent_runs_session_id ON agent_runs (session_id);
|
|
431
|
+
CREATE TABLE IF NOT EXISTS supervision_events (
|
|
432
|
+
id TEXT PRIMARY KEY, session_id TEXT NOT NULL, agent_run_id TEXT,
|
|
433
|
+
type TEXT NOT NULL, timestamp TEXT NOT NULL, latency_ms_to_decide INTEGER,
|
|
434
|
+
metadata_json TEXT NOT NULL DEFAULT '{}', synced_at TEXT
|
|
435
|
+
);
|
|
436
|
+
CREATE INDEX IF NOT EXISTS ix_supervision_events_session_id
|
|
437
|
+
ON supervision_events (session_id);
|
|
438
|
+
CREATE INDEX IF NOT EXISTS ix_supervision_events_agent_run_id
|
|
439
|
+
ON supervision_events (agent_run_id);
|
|
440
|
+
CREATE TABLE IF NOT EXISTS load_probes (
|
|
441
|
+
id TEXT PRIMARY KEY, session_id TEXT NOT NULL, prompted_at TEXT NOT NULL,
|
|
442
|
+
responded_at TEXT, skipped INTEGER NOT NULL, subscales_json TEXT, synced_at TEXT
|
|
443
|
+
);
|
|
444
|
+
CREATE INDEX IF NOT EXISTS ix_load_probes_session_id ON load_probes (session_id);
|
|
445
|
+
CREATE TABLE IF NOT EXISTS babysitting_tax_points (
|
|
446
|
+
id TEXT PRIMARY KEY, session_id TEXT NOT NULL, n_agents_active INTEGER NOT NULL,
|
|
447
|
+
composite_load REAL NOT NULL, output_quality REAL, synced_at TEXT
|
|
448
|
+
);
|
|
449
|
+
CREATE INDEX IF NOT EXISTS ix_babysitting_tax_points_session_id
|
|
450
|
+
ON babysitting_tax_points (session_id);
|
|
451
|
+
""")
|
|
452
|
+
# cognition_samples schema is owned by the shared model (single source of truth).
|
|
453
|
+
if _COG is not None:
|
|
454
|
+
try:
|
|
455
|
+
_COG.ensure_schema(conn)
|
|
456
|
+
except Exception as exc: # never let cognition schema break capture
|
|
457
|
+
_debug(f"cognition ensure_schema failed: {exc}")
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _open_db() -> sqlite3.Connection:
|
|
461
|
+
db_path = _zeno_db_path()
|
|
462
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
463
|
+
conn = sqlite3.connect(db_path, timeout=2.0)
|
|
464
|
+
conn.execute("PRAGMA journal_mode = WAL")
|
|
465
|
+
conn.execute("PRAGMA synchronous = NORMAL")
|
|
466
|
+
_ensure_schema(conn)
|
|
467
|
+
return conn
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ---------------------------------------------------------------------------
|
|
471
|
+
# per-session turn-state (cross-event scratch for the cognition signal)
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
def _turn_state_path(zeno_session: str) -> Path:
|
|
474
|
+
return _zeno_home() / "turn-state" / f"{zeno_session}.json"
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _load_turn_state(zeno_session: str) -> dict:
|
|
478
|
+
try:
|
|
479
|
+
p = _turn_state_path(zeno_session)
|
|
480
|
+
if p.exists():
|
|
481
|
+
return json.loads(p.read_text()) or {}
|
|
482
|
+
except Exception:
|
|
483
|
+
pass
|
|
484
|
+
return {}
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _save_turn_state(zeno_session: str, state: dict) -> None:
|
|
488
|
+
try:
|
|
489
|
+
p = _turn_state_path(zeno_session)
|
|
490
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
491
|
+
tmp = p.with_suffix(".tmp")
|
|
492
|
+
tmp.write_text(json.dumps(state))
|
|
493
|
+
tmp.replace(p)
|
|
494
|
+
except Exception:
|
|
495
|
+
pass
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _baseline_path() -> str:
|
|
499
|
+
return str(_zeno_home() / "cognition-baseline.json")
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _cv(values) -> float:
|
|
503
|
+
"""Coefficient of variation of recent durations, mapped to 0..1 (irregularity)."""
|
|
504
|
+
vals = [float(v) for v in values if isinstance(v, (int, float)) and v >= 0]
|
|
505
|
+
if len(vals) < 3:
|
|
506
|
+
return 0.0
|
|
507
|
+
mean = sum(vals) / len(vals)
|
|
508
|
+
if mean <= 1e-6:
|
|
509
|
+
return 0.0
|
|
510
|
+
var = sum((v - mean) ** 2 for v in vals) / len(vals)
|
|
511
|
+
cv = math.sqrt(var) / mean
|
|
512
|
+
return max(0.0, min(1.0, cv))
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
# ---------------------------------------------------------------------------
|
|
516
|
+
# transcript stats for the current turn (tool uses, tokens) - bounded tail read
|
|
517
|
+
# ---------------------------------------------------------------------------
|
|
518
|
+
def _is_tool_result_only(obj) -> bool:
|
|
519
|
+
"""True when a ``user`` transcript line is a tool-result echo, not a human turn.
|
|
520
|
+
|
|
521
|
+
Claude Code emits the result of every assistant ``tool_use`` as a ``user``-type
|
|
522
|
+
line whose ``message.content`` is a list of ``type=="tool_result"`` blocks. Those
|
|
523
|
+
are mid-turn, NOT human-prompt boundaries - the ``since is None`` fallback below
|
|
524
|
+
must not reset its per-turn counters on them, or a tool_use -> tool_result ->
|
|
525
|
+
tool_use interleave gets truncated to only the tail after the last tool result.
|
|
526
|
+
A human message's content is a string, or a list that includes text/image blocks.
|
|
527
|
+
"""
|
|
528
|
+
msg = obj.get("message") or {}
|
|
529
|
+
content = msg.get("content")
|
|
530
|
+
if not isinstance(content, list) or not content:
|
|
531
|
+
return False
|
|
532
|
+
return all(isinstance(b, dict) and b.get("type") == "tool_result" for b in content)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _last_turn_stats(transcript_path, since_iso) -> dict:
|
|
536
|
+
out = {
|
|
537
|
+
"tool_uses": 0,
|
|
538
|
+
"edits": 0,
|
|
539
|
+
"input": None,
|
|
540
|
+
"output": None,
|
|
541
|
+
"cache_read": None,
|
|
542
|
+
"cache_creation": None,
|
|
543
|
+
"total": None,
|
|
544
|
+
}
|
|
545
|
+
if not transcript_path or not os.path.exists(transcript_path):
|
|
546
|
+
return out
|
|
547
|
+
try:
|
|
548
|
+
size = os.path.getsize(transcript_path)
|
|
549
|
+
with open(transcript_path, "rb") as f:
|
|
550
|
+
if size > TRANSCRIPT_TAIL_BYTES:
|
|
551
|
+
f.seek(size - TRANSCRIPT_TAIL_BYTES)
|
|
552
|
+
f.readline() # discard partial line
|
|
553
|
+
data = f.read().decode("utf-8", "replace")
|
|
554
|
+
except Exception:
|
|
555
|
+
return out
|
|
556
|
+
since = _parse_ts(since_iso)
|
|
557
|
+
# Bounded guard: when `since` is None (turn-state lost, or a Stop fired without a
|
|
558
|
+
# recorded UserPromptSubmit), the `since and ts < since` filter below is a no-op,
|
|
559
|
+
# so WITHOUT this we would attribute the WHOLE ~512KB tail - many turns - to one
|
|
560
|
+
# sample and badly inflate tool_uses/tokens. Heuristic fallback: count only the
|
|
561
|
+
# final turn, i.e. reset the per-turn counters at each user-message boundary so we
|
|
562
|
+
# keep just the assistant messages after the last human turn in the tail.
|
|
563
|
+
use_boundary = since is None
|
|
564
|
+
last_usage = None
|
|
565
|
+
for line in data.splitlines():
|
|
566
|
+
line = line.strip()
|
|
567
|
+
if not line:
|
|
568
|
+
continue
|
|
569
|
+
try:
|
|
570
|
+
o = json.loads(line)
|
|
571
|
+
except Exception:
|
|
572
|
+
continue
|
|
573
|
+
otype = o.get("type")
|
|
574
|
+
if use_boundary and otype == "user" and not _is_tool_result_only(o):
|
|
575
|
+
# new human turn: drop everything accumulated for the previous turn.
|
|
576
|
+
# A tool_result user-line (message.content all type=="tool_result") is the
|
|
577
|
+
# echo of the assistant's own tool call, NOT a turn boundary - skip the
|
|
578
|
+
# reset for those so a tool_use -> tool_result -> tool_use interleave keeps
|
|
579
|
+
# every tool_use of the final turn instead of truncating at the last result.
|
|
580
|
+
out["tool_uses"] = 0
|
|
581
|
+
out["edits"] = 0
|
|
582
|
+
last_usage = None
|
|
583
|
+
continue
|
|
584
|
+
if otype != "assistant":
|
|
585
|
+
continue
|
|
586
|
+
ts = _parse_ts(o.get("timestamp"))
|
|
587
|
+
if since and ts and ts < since:
|
|
588
|
+
continue # only this turn's assistant messages
|
|
589
|
+
msg = o.get("message") or {}
|
|
590
|
+
content = msg.get("content")
|
|
591
|
+
if isinstance(content, list):
|
|
592
|
+
for b in content:
|
|
593
|
+
if isinstance(b, dict) and b.get("type") == "tool_use":
|
|
594
|
+
out["tool_uses"] += 1
|
|
595
|
+
if (b.get("name") or "").lower() in (
|
|
596
|
+
"edit",
|
|
597
|
+
"write",
|
|
598
|
+
"multiedit",
|
|
599
|
+
"notebookedit",
|
|
600
|
+
):
|
|
601
|
+
out["edits"] += 1
|
|
602
|
+
u = msg.get("usage")
|
|
603
|
+
if isinstance(u, dict) and u:
|
|
604
|
+
last_usage = u
|
|
605
|
+
if last_usage:
|
|
606
|
+
out["input"] = last_usage.get("input_tokens")
|
|
607
|
+
out["output"] = last_usage.get("output_tokens")
|
|
608
|
+
out["cache_read"] = last_usage.get("cache_read_input_tokens")
|
|
609
|
+
cc = last_usage.get("cache_creation")
|
|
610
|
+
if isinstance(cc, dict):
|
|
611
|
+
out["cache_creation"] = (cc.get("ephemeral_5m_input_tokens") or 0) + (
|
|
612
|
+
cc.get("ephemeral_1h_input_tokens") or 0
|
|
613
|
+
)
|
|
614
|
+
else:
|
|
615
|
+
out["cache_creation"] = last_usage.get("cache_creation_input_tokens")
|
|
616
|
+
parts = [
|
|
617
|
+
p
|
|
618
|
+
for p in (out["input"], out["output"], out["cache_read"], out["cache_creation"])
|
|
619
|
+
if isinstance(p, (int, float))
|
|
620
|
+
]
|
|
621
|
+
out["total"] = sum(parts) if parts else None
|
|
622
|
+
return out
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
# ---------------------------------------------------------------------------
|
|
626
|
+
# event handlers
|
|
627
|
+
# ---------------------------------------------------------------------------
|
|
628
|
+
def handle_session_start(conn: sqlite3.Connection, payload: dict) -> None:
|
|
629
|
+
cc_session = payload.get("session_id")
|
|
630
|
+
if not cc_session:
|
|
631
|
+
return
|
|
632
|
+
zeno_session = _cc_session_to_zeno_session(cc_session)
|
|
633
|
+
conn.execute(
|
|
634
|
+
"""
|
|
635
|
+
INSERT OR IGNORE INTO sessions
|
|
636
|
+
(id, start_at, end_at, agent_count_max, harness, project_id, synced_at)
|
|
637
|
+
VALUES (?, ?, NULL, 1, ?, ?, NULL)
|
|
638
|
+
""",
|
|
639
|
+
(zeno_session, _now_iso(), DEFAULT_HARNESS, _project_id(DEFAULT_PROJECT_SLUG)),
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def handle_user_prompt_submit(conn: sqlite3.Connection, payload: dict) -> None:
|
|
644
|
+
cc_session = payload.get("session_id")
|
|
645
|
+
if not cc_session:
|
|
646
|
+
return
|
|
647
|
+
zeno_session = _cc_session_to_zeno_session(cc_session)
|
|
648
|
+
conn.execute(
|
|
649
|
+
"""
|
|
650
|
+
INSERT OR IGNORE INTO sessions
|
|
651
|
+
(id, start_at, end_at, agent_count_max, harness, project_id, synced_at)
|
|
652
|
+
VALUES (?, ?, NULL, 1, ?, ?, NULL)
|
|
653
|
+
""",
|
|
654
|
+
(zeno_session, _now_iso(), DEFAULT_HARNESS, _project_id(DEFAULT_PROJECT_SLUG)),
|
|
655
|
+
)
|
|
656
|
+
model = payload.get("model") or os.environ.get("CLAUDE_MODEL", DEFAULT_MODEL)
|
|
657
|
+
conn.execute(
|
|
658
|
+
"""
|
|
659
|
+
INSERT INTO agent_runs
|
|
660
|
+
(id, session_id, harness, model, started_at, ended_at, outcome, synced_at)
|
|
661
|
+
VALUES (?, ?, ?, ?, ?, NULL, 'unknown', NULL)
|
|
662
|
+
""",
|
|
663
|
+
(str(uuid.uuid4()), zeno_session, DEFAULT_HARNESS, model, _now_iso()),
|
|
664
|
+
)
|
|
665
|
+
_update_agent_count_max(conn, zeno_session)
|
|
666
|
+
_record_pending_prompt(zeno_session, payload)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _record_pending_prompt(zeno_session: str, payload: dict) -> None:
|
|
670
|
+
"""Record the real submitted prompt for this turn (the effort signal) + the
|
|
671
|
+
review gap since the agent last stopped (the verification think-time). Stored
|
|
672
|
+
in turn-state and consumed at Stop. Best-effort; never raises."""
|
|
673
|
+
if _COG is None:
|
|
674
|
+
return
|
|
675
|
+
try:
|
|
676
|
+
text = (payload.get("prompt") or "").strip()
|
|
677
|
+
low = text.lower()
|
|
678
|
+
state = _load_turn_state(zeno_session)
|
|
679
|
+
now = _now()
|
|
680
|
+
last_stop = _parse_ts(state.get("last_stop_ts"))
|
|
681
|
+
review_gap = (now - last_stop).total_seconds() if last_stop else 0.0
|
|
682
|
+
lead = len(text) <= 6 and low in (
|
|
683
|
+
"y",
|
|
684
|
+
"yes",
|
|
685
|
+
"go",
|
|
686
|
+
"ok",
|
|
687
|
+
"continue",
|
|
688
|
+
"do it",
|
|
689
|
+
"ship it",
|
|
690
|
+
"next",
|
|
691
|
+
)
|
|
692
|
+
correction_markers = (
|
|
693
|
+
"fix",
|
|
694
|
+
"wrong",
|
|
695
|
+
"still",
|
|
696
|
+
"broken",
|
|
697
|
+
"redo",
|
|
698
|
+
"revert",
|
|
699
|
+
"undo",
|
|
700
|
+
"no,",
|
|
701
|
+
"not work",
|
|
702
|
+
)
|
|
703
|
+
error_markers = ("error", "broken", "failing", "traceback", "exception")
|
|
704
|
+
is_correction = any(m in low for m in correction_markers)
|
|
705
|
+
streak = int(state.get("correction_streak", 0))
|
|
706
|
+
streak = streak + 1 if is_correction else 0
|
|
707
|
+
state["correction_streak"] = streak
|
|
708
|
+
state["pending"] = {
|
|
709
|
+
"prompt_ts": now.isoformat(),
|
|
710
|
+
"review_gap_s": round(max(0.0, review_gap), 1),
|
|
711
|
+
"chars": len(text),
|
|
712
|
+
"has_code": "```" in text,
|
|
713
|
+
"multiline": text.count("\n") >= 2,
|
|
714
|
+
"lead_word": lead,
|
|
715
|
+
"reprompts": streak,
|
|
716
|
+
"errors": any(m in low for m in error_markers),
|
|
717
|
+
}
|
|
718
|
+
_save_turn_state(zeno_session, state)
|
|
719
|
+
except Exception as exc:
|
|
720
|
+
_debug(f"record_pending failed: {exc}")
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _update_agent_count_max(conn: sqlite3.Connection, zeno_session: str) -> None:
|
|
724
|
+
open_count = conn.execute(
|
|
725
|
+
"SELECT COUNT(*) FROM agent_runs WHERE session_id = ? AND ended_at IS NULL",
|
|
726
|
+
(zeno_session,),
|
|
727
|
+
).fetchone()[0]
|
|
728
|
+
conn.execute(
|
|
729
|
+
"UPDATE sessions SET agent_count_max = MAX(agent_count_max, ?) WHERE id = ?",
|
|
730
|
+
(open_count, zeno_session),
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def handle_stop(conn: sqlite3.Connection, payload: dict) -> None:
|
|
735
|
+
cc_session = payload.get("session_id")
|
|
736
|
+
if not cc_session:
|
|
737
|
+
return
|
|
738
|
+
zeno_session = _cc_session_to_zeno_session(cc_session)
|
|
739
|
+
row = conn.execute(
|
|
740
|
+
"""
|
|
741
|
+
SELECT id, started_at FROM agent_runs
|
|
742
|
+
WHERE session_id = ? AND ended_at IS NULL
|
|
743
|
+
ORDER BY started_at DESC LIMIT 1
|
|
744
|
+
""",
|
|
745
|
+
(zeno_session,),
|
|
746
|
+
).fetchone()
|
|
747
|
+
if row is not None:
|
|
748
|
+
run_id, started_at = row
|
|
749
|
+
conn.execute(
|
|
750
|
+
"UPDATE agent_runs SET ended_at = ?, outcome = ? WHERE id = ?",
|
|
751
|
+
(_now_iso(), _infer_outcome(conn, zeno_session, started_at), run_id),
|
|
752
|
+
)
|
|
753
|
+
_write_cognition(conn, payload, zeno_session)
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _write_cognition(conn: sqlite3.Connection, payload: dict, zeno_session: str) -> None:
|
|
757
|
+
"""Compose the five drivers for the just-finished turn and write one
|
|
758
|
+
cognition_samples row. Best-effort: any failure leaves capture intact."""
|
|
759
|
+
if _COG is None:
|
|
760
|
+
return
|
|
761
|
+
try:
|
|
762
|
+
state = _load_turn_state(zeno_session)
|
|
763
|
+
pending = state.get("pending") or {}
|
|
764
|
+
now = _now()
|
|
765
|
+
|
|
766
|
+
prompt_ts = _parse_ts(pending.get("prompt_ts"))
|
|
767
|
+
autonomous_s = (now - prompt_ts).total_seconds() if prompt_ts else 0.0
|
|
768
|
+
review_gap_s = float(pending.get("review_gap_s", 0.0) or 0.0)
|
|
769
|
+
|
|
770
|
+
start_row = conn.execute(
|
|
771
|
+
"SELECT start_at FROM sessions WHERE id = ?", (zeno_session,)
|
|
772
|
+
).fetchone()
|
|
773
|
+
start_ts = _parse_ts(start_row[0]) if start_row else None
|
|
774
|
+
active_min = ((now - start_ts).total_seconds() / 60.0) if start_ts else 0.0
|
|
775
|
+
|
|
776
|
+
# Bound the transcript scan: prefer this turn's prompt_ts, but if it was lost
|
|
777
|
+
# (turn-state missing / Stop without a recorded UserPromptSubmit) fall back to
|
|
778
|
+
# the session start_at so we never pass since=None and over-count the tail.
|
|
779
|
+
since_iso = pending.get("prompt_ts") or (start_row[0] if start_row else None)
|
|
780
|
+
tstats = _last_turn_stats(payload.get("transcript_path"), since_iso)
|
|
781
|
+
|
|
782
|
+
durations = state.get("durations", [])
|
|
783
|
+
durations.append(round(max(0.0, autonomous_s), 1))
|
|
784
|
+
durations = durations[-RECENT_DURATIONS:]
|
|
785
|
+
inter_var = _cv(durations)
|
|
786
|
+
cadence_reg = max(0.0, 1.0 - inter_var)
|
|
787
|
+
|
|
788
|
+
reprompts = int(pending.get("reprompts", 0))
|
|
789
|
+
churn = min(1.0, reprompts / 3.0)
|
|
790
|
+
errors = bool(pending.get("errors"))
|
|
791
|
+
effort_level = None
|
|
792
|
+
eff = payload.get("effort")
|
|
793
|
+
if isinstance(eff, dict):
|
|
794
|
+
effort_level = eff.get("level")
|
|
795
|
+
|
|
796
|
+
raw = {
|
|
797
|
+
"effort": _COG.effort_raw(
|
|
798
|
+
pending.get("chars", 0),
|
|
799
|
+
pending.get("has_code", False),
|
|
800
|
+
pending.get("multiline", False),
|
|
801
|
+
effort_level,
|
|
802
|
+
pending.get("lead_word", False),
|
|
803
|
+
),
|
|
804
|
+
"autonomy": _COG.autonomy_raw(
|
|
805
|
+
tstats["tool_uses"], pending.get("chars", 0), autonomous_s
|
|
806
|
+
),
|
|
807
|
+
# accepts/rejects left 0: agent edit count is NOT a human accept/reject
|
|
808
|
+
# decision - feeding it would raise "review" exactly when autonomous
|
|
809
|
+
# editing is heaviest (backwards). gap + reprompts + churn carry
|
|
810
|
+
# verification until a real accept/reject signal exists.
|
|
811
|
+
"verification": _COG.verification_raw(review_gap_s, reprompts, churn),
|
|
812
|
+
"fatigue": _COG.fatigue_raw(active_min, inter_var, now.astimezone().hour),
|
|
813
|
+
"flow": _COG.flow_raw(cadence_reg, 0.5),
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
bl = _COG.Baselines.load(_baseline_path())
|
|
817
|
+
recent = [s for s in state.get("recent_scores", []) if isinstance(s, (int, float))]
|
|
818
|
+
context = {"long_session": active_min >= 120, "churn": churn, "errors": errors}
|
|
819
|
+
comp = _COG.compose(raw, bl, recent, context)
|
|
820
|
+
|
|
821
|
+
# always update baselines (even if compose declined) so they keep learning
|
|
822
|
+
for d, v in raw.items():
|
|
823
|
+
bl.update(d, v)
|
|
824
|
+
bl.save(_baseline_path())
|
|
825
|
+
|
|
826
|
+
if comp.get("ok"):
|
|
827
|
+
total = tstats["total"]
|
|
828
|
+
ctx_pct = None
|
|
829
|
+
ins = [tstats["input"], tstats["cache_read"], tstats["cache_creation"]]
|
|
830
|
+
if any(isinstance(x, (int, float)) for x in ins):
|
|
831
|
+
used = sum(x for x in ins if isinstance(x, (int, float)))
|
|
832
|
+
ctx_pct = round(min(100.0, used / 2000.0), 1) # est. vs ~200k window
|
|
833
|
+
drv = comp["drivers"]
|
|
834
|
+
model = payload.get("model") or DEFAULT_MODEL
|
|
835
|
+
# The cognition row id IS the sample_id - the third leg of the dedup
|
|
836
|
+
# triple (device_id, session_id, sample_id). Pushing the same id keeps
|
|
837
|
+
# the local row and the synced row identified by construction.
|
|
838
|
+
sample_id = str(uuid.uuid4())
|
|
839
|
+
conn.execute(
|
|
840
|
+
"""
|
|
841
|
+
INSERT INTO cognition_samples (
|
|
842
|
+
id, session_id, ts, context_pct,
|
|
843
|
+
input_tokens, output_tokens, cache_read_tokens,
|
|
844
|
+
cache_creation_tokens, total_tokens,
|
|
845
|
+
attention_score, attention_effort, attention_deliberation,
|
|
846
|
+
attention_trend, model, harness,
|
|
847
|
+
attention_autonomy, attention_verification,
|
|
848
|
+
attention_fatigue, attention_flow
|
|
849
|
+
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
|
850
|
+
""",
|
|
851
|
+
(
|
|
852
|
+
sample_id,
|
|
853
|
+
zeno_session,
|
|
854
|
+
now.isoformat(),
|
|
855
|
+
ctx_pct,
|
|
856
|
+
tstats["input"],
|
|
857
|
+
tstats["output"],
|
|
858
|
+
tstats["cache_read"],
|
|
859
|
+
tstats["cache_creation"],
|
|
860
|
+
total,
|
|
861
|
+
comp["score"],
|
|
862
|
+
drv.get("effort"),
|
|
863
|
+
None, # attention_deliberation: v1 legacy column, unused in v2
|
|
864
|
+
comp.get("trend"),
|
|
865
|
+
model,
|
|
866
|
+
DEFAULT_HARNESS,
|
|
867
|
+
drv.get("autonomy"),
|
|
868
|
+
drv.get("verification"),
|
|
869
|
+
drv.get("fatigue"),
|
|
870
|
+
drv.get("flow"),
|
|
871
|
+
),
|
|
872
|
+
)
|
|
873
|
+
recent.append(comp["score"])
|
|
874
|
+
recent = recent[-RECENT_SCORES:]
|
|
875
|
+
|
|
876
|
+
# Stash the DERIVED-ONLY sample for the post-commit push. Numerics +
|
|
877
|
+
# bounded labels + ids ONLY - never any prompt/transcript text. main()
|
|
878
|
+
# consumes _PENDING_PUSH after conn.commit()/close() so capture stays
|
|
879
|
+
# decoupled from the network.
|
|
880
|
+
_PENDING_PUSH["sample"] = {
|
|
881
|
+
"zeno_session": zeno_session,
|
|
882
|
+
"ts": now.isoformat(),
|
|
883
|
+
"model": model,
|
|
884
|
+
"sample": {
|
|
885
|
+
"sample_id": sample_id,
|
|
886
|
+
"attention_score": comp["score"],
|
|
887
|
+
"attention_effort": drv.get("effort"),
|
|
888
|
+
"attention_autonomy": drv.get("autonomy"),
|
|
889
|
+
"attention_verification": drv.get("verification"),
|
|
890
|
+
"attention_fatigue": drv.get("fatigue"),
|
|
891
|
+
"attention_flow": drv.get("flow"),
|
|
892
|
+
"attention_trend": comp.get("trend"),
|
|
893
|
+
"context_pct": ctx_pct,
|
|
894
|
+
"input_tokens": tstats["input"],
|
|
895
|
+
"output_tokens": tstats["output"],
|
|
896
|
+
"cache_read_tokens": tstats["cache_read"],
|
|
897
|
+
"cache_creation_tokens": tstats["cache_creation"],
|
|
898
|
+
"total_tokens": total,
|
|
899
|
+
},
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
state["recent_scores"] = recent
|
|
903
|
+
state["durations"] = durations
|
|
904
|
+
state["last_stop_ts"] = now.isoformat()
|
|
905
|
+
state["pending"] = {}
|
|
906
|
+
_save_turn_state(zeno_session, state)
|
|
907
|
+
except Exception as exc:
|
|
908
|
+
_debug(f"write_cognition failed: {exc}")
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def _infer_outcome(conn: sqlite3.Connection, zeno_session: str, started_at: str) -> str:
|
|
912
|
+
intervened = conn.execute(
|
|
913
|
+
"""
|
|
914
|
+
SELECT 1 FROM supervision_events
|
|
915
|
+
WHERE session_id = ? AND type = 'intervene' AND timestamp >= ? LIMIT 1
|
|
916
|
+
""",
|
|
917
|
+
(zeno_session, started_at),
|
|
918
|
+
).fetchone()
|
|
919
|
+
return "unknown" if intervened else "completed"
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def handle_notification(conn: sqlite3.Connection, payload: dict) -> None:
|
|
923
|
+
cc_session = payload.get("session_id")
|
|
924
|
+
if not cc_session:
|
|
925
|
+
return
|
|
926
|
+
zeno_session = _cc_session_to_zeno_session(cc_session)
|
|
927
|
+
metadata = {"source": "cc:notification"}
|
|
928
|
+
message = payload.get("message")
|
|
929
|
+
if isinstance(message, str) and message:
|
|
930
|
+
metadata["message"] = message[:200]
|
|
931
|
+
conn.execute(
|
|
932
|
+
"""
|
|
933
|
+
INSERT INTO supervision_events
|
|
934
|
+
(id, session_id, agent_run_id, type, timestamp,
|
|
935
|
+
latency_ms_to_decide, metadata_json, synced_at)
|
|
936
|
+
VALUES (?, ?, NULL, 'intervene', ?, NULL, ?, NULL)
|
|
937
|
+
""",
|
|
938
|
+
(str(uuid.uuid4()), zeno_session, _now_iso(), json.dumps(metadata)),
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def handle_session_end(conn: sqlite3.Connection, payload: dict) -> None:
|
|
943
|
+
cc_session = payload.get("session_id")
|
|
944
|
+
if not cc_session:
|
|
945
|
+
return
|
|
946
|
+
zeno_session = _cc_session_to_zeno_session(cc_session)
|
|
947
|
+
now = _now_iso()
|
|
948
|
+
open_runs = conn.execute(
|
|
949
|
+
"SELECT id, started_at FROM agent_runs WHERE session_id = ? AND ended_at IS NULL",
|
|
950
|
+
(zeno_session,),
|
|
951
|
+
).fetchall()
|
|
952
|
+
for run_id, started_at in open_runs:
|
|
953
|
+
conn.execute(
|
|
954
|
+
"UPDATE agent_runs SET ended_at = ?, outcome = ? WHERE id = ?",
|
|
955
|
+
(now, _infer_outcome(conn, zeno_session, started_at), run_id),
|
|
956
|
+
)
|
|
957
|
+
conn.execute(
|
|
958
|
+
"UPDATE sessions SET end_at = ? WHERE id = ? AND end_at IS NULL",
|
|
959
|
+
(now, zeno_session),
|
|
960
|
+
)
|
|
961
|
+
# tidy the per-session turn-state scratch
|
|
962
|
+
try:
|
|
963
|
+
_turn_state_path(zeno_session).unlink(missing_ok=True)
|
|
964
|
+
except Exception:
|
|
965
|
+
pass
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
_DISPATCH = {
|
|
969
|
+
"SessionStart": handle_session_start,
|
|
970
|
+
"UserPromptSubmit": handle_user_prompt_submit,
|
|
971
|
+
"Stop": handle_stop,
|
|
972
|
+
"Notification": handle_notification,
|
|
973
|
+
"SessionEnd": handle_session_end,
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def main() -> int:
|
|
978
|
+
raw = sys.stdin.read()
|
|
979
|
+
try:
|
|
980
|
+
payload = json.loads(raw) if raw.strip() else {}
|
|
981
|
+
except json.JSONDecodeError:
|
|
982
|
+
_debug(f"bad json: {raw[:200]!r}")
|
|
983
|
+
return 0
|
|
984
|
+
event = payload.get("hook_event_name")
|
|
985
|
+
handler = _DISPATCH.get(event)
|
|
986
|
+
if handler is None:
|
|
987
|
+
_debug(f"no handler for event: {event!r}")
|
|
988
|
+
return 0
|
|
989
|
+
_PENDING_PUSH.clear() # fresh per invocation; never inherit a prior stash
|
|
990
|
+
try:
|
|
991
|
+
conn = _open_db()
|
|
992
|
+
except sqlite3.Error as exc:
|
|
993
|
+
_debug(f"open_db failed: {exc}")
|
|
994
|
+
return 0
|
|
995
|
+
committed = False
|
|
996
|
+
try:
|
|
997
|
+
handler(conn, payload)
|
|
998
|
+
conn.commit()
|
|
999
|
+
committed = True
|
|
1000
|
+
_debug(f"handled {event} for session {payload.get('session_id')!r}")
|
|
1001
|
+
except sqlite3.Error as exc:
|
|
1002
|
+
_debug(f"handler error on {event}: {exc}")
|
|
1003
|
+
except Exception as exc: # cognition path must never break capture
|
|
1004
|
+
_debug(f"unexpected error on {event}: {exc}")
|
|
1005
|
+
finally:
|
|
1006
|
+
conn.close()
|
|
1007
|
+
# LOCAL CAPTURE IS SACRED: the push runs STRICTLY AFTER the local commit/close
|
|
1008
|
+
# above, only when that commit succeeded, and is fully swallowed inside
|
|
1009
|
+
# _maybe_push. The hook ALWAYS returns 0 regardless of any network outcome.
|
|
1010
|
+
if committed:
|
|
1011
|
+
_maybe_push(event, payload)
|
|
1012
|
+
return 0
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
if __name__ == "__main__":
|
|
1016
|
+
sys.exit(main())
|