zeno-cli 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. zeno_adapters/__init__.py +17 -0
  2. zeno_adapters/_common.py +38 -0
  3. zeno_adapters/anthropic.py +68 -0
  4. zeno_adapters/claude_code.py +101 -0
  5. zeno_adapters/crewai.py +92 -0
  6. zeno_adapters/langgraph.py +49 -0
  7. zeno_adapters/openai.py +108 -0
  8. zeno_cli/__init__.py +1 -0
  9. zeno_cli/_hooks/cc_bridge.py +1016 -0
  10. zeno_cli/doctor.py +535 -0
  11. zeno_cli/hook_install.py +269 -0
  12. zeno_cli/hud/__init__.py +1 -0
  13. zeno_cli/hud/hud_install.py +652 -0
  14. zeno_cli/hud/zeno_attention.py +288 -0
  15. zeno_cli/hud/zeno_cognition.py +457 -0
  16. zeno_cli/hud/zeno_hud.py +496 -0
  17. zeno_cli/interview_invites.py +342 -0
  18. zeno_cli/login.py +241 -0
  19. zeno_cli/main.py +2534 -0
  20. zeno_cli/onboard.py +206 -0
  21. zeno_cli/outreach.py +456 -0
  22. zeno_cli/version.py +67 -0
  23. zeno_cli-0.3.4.dist-info/METADATA +161 -0
  24. zeno_cli-0.3.4.dist-info/RECORD +69 -0
  25. zeno_cli-0.3.4.dist-info/WHEEL +4 -0
  26. zeno_cli-0.3.4.dist-info/entry_points.txt +4 -0
  27. zeno_core/__init__.py +67 -0
  28. zeno_core/analytics.py +193 -0
  29. zeno_core/rtlx_s.py +460 -0
  30. zeno_core/streak.py +178 -0
  31. zeno_core/tlx_s.py +192 -0
  32. zeno_sdk/__init__.py +6 -0
  33. zeno_sdk/_generated/__init__.py +6 -0
  34. zeno_sdk/_generated/client.py +819 -0
  35. zeno_sdk/_migrations/alembic/env.py +33 -0
  36. zeno_sdk/_migrations/alembic/script.py.mako +18 -0
  37. zeno_sdk/_migrations/alembic/versions/0001_initial.py +79 -0
  38. zeno_sdk/_migrations/alembic/versions/0002_cognition_samples.py +53 -0
  39. zeno_sdk/_migrations/alembic/versions/0003_cognition_drivers.py +41 -0
  40. zeno_sdk/_migrations/alembic/versions/0004_transcript_intelligence.py +248 -0
  41. zeno_sdk/_migrations/alembic.ini +35 -0
  42. zeno_sdk/_runtime.py +12 -0
  43. zeno_sdk/adapters/__init__.py +15 -0
  44. zeno_sdk/adapters/anthropic.py +5 -0
  45. zeno_sdk/adapters/claude_code.py +5 -0
  46. zeno_sdk/adapters/crewai.py +5 -0
  47. zeno_sdk/adapters/langgraph.py +5 -0
  48. zeno_sdk/adapters/openai.py +5 -0
  49. zeno_sdk/auth.py +25 -0
  50. zeno_sdk/client.py +87 -0
  51. zeno_sdk/config.py +61 -0
  52. zeno_sdk/daemon.py +72 -0
  53. zeno_sdk/privacy.py +46 -0
  54. zeno_sdk/session.py +179 -0
  55. zeno_sdk/storage.py +487 -0
  56. zeno_sdk/types/__init__.py +121 -0
  57. zeno_session_intel/__init__.py +19 -0
  58. zeno_session_intel/analytics.py +588 -0
  59. zeno_session_intel/compression.py +123 -0
  60. zeno_session_intel/ingest.py +376 -0
  61. zeno_session_intel/model.py +129 -0
  62. zeno_session_intel/parsers/__init__.py +31 -0
  63. zeno_session_intel/parsers/claude_code.py +169 -0
  64. zeno_session_intel/parsers/codex.py +265 -0
  65. zeno_session_intel/parsers/cursor.py +198 -0
  66. zeno_session_intel/prices.py +281 -0
  67. zeno_session_intel/schema.py +277 -0
  68. zeno_session_intel/signals.py +319 -0
  69. zeno_session_intel/taxonomy.py +71 -0
@@ -0,0 +1,281 @@
1
+ """Local, cache-aware model pricing: a bundled rate table + cost math + a name matcher.
2
+
3
+ Local-first by doctrine: the rate table ships in-repo, no network fetch at runtime. The
4
+ cost formula is ported from agentsview ``cmd/agentsview/usage.go:738-771`` (MIT, see
5
+ THIRD_PARTY_LICENSES.md). Anthropic rates are the authoritative current rates (verified
6
+ against the bundled ``claude-api`` skill on 2026-06-17); OpenAI/Codex rates are from
7
+ agentsview ``internal/pricing/fallback.go``. Cache rates follow Anthropic's standard
8
+ multipliers: cache-creation = 1.25x input, cache-read = 0.10x input.
9
+
10
+ Rates are USD per million tokens (per-MTok). ``cost_for`` returns USD for one usage row.
11
+ ``resolve_model`` ports agentsview's 4-stage matcher (``internal/pricing/normalize.go``):
12
+ resolve a logged model id to a table key ONCE at ingest, store the resolved key, so the
13
+ analytics SQL stays a plain equijoin against ``model_pricing``.
14
+
15
+ Stdlib-only (Python 3.12, never raises into hot paths).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import sqlite3
22
+
23
+ # Bump when the rates below change so a re-seed re-upserts (mirrors agentsview's
24
+ # FallbackVersion discipline). zeno's own reconciliation date.
25
+ PRICING_VERSION = "2026-06-17"
26
+
27
+ # 1M / 200K context windows are powered into the peak-context-pct lens (zeno addition).
28
+ _M = 1_000_000
29
+
30
+ # model_pattern -> rate row. cache_* default to 0 (graceful: 0 contribution, never a
31
+ # silent partial). context_window is None where not authoritative (OpenAI/Codex).
32
+ FALLBACK_PRICING: tuple[dict[str, object], ...] = (
33
+ # --- Anthropic (authoritative current rates; cache = 1.25x / 0.10x of input) ---
34
+ {
35
+ "model_pattern": "claude-opus-4-8",
36
+ "input_per_mtok": 5.0,
37
+ "output_per_mtok": 25.0,
38
+ "cache_creation_per_mtok": 6.25,
39
+ "cache_read_per_mtok": 0.50,
40
+ "context_window": _M,
41
+ },
42
+ {
43
+ "model_pattern": "claude-opus-4-7",
44
+ "input_per_mtok": 5.0,
45
+ "output_per_mtok": 25.0,
46
+ "cache_creation_per_mtok": 6.25,
47
+ "cache_read_per_mtok": 0.50,
48
+ "context_window": _M,
49
+ },
50
+ {
51
+ "model_pattern": "claude-opus-4-6",
52
+ "input_per_mtok": 5.0,
53
+ "output_per_mtok": 25.0,
54
+ "cache_creation_per_mtok": 6.25,
55
+ "cache_read_per_mtok": 0.50,
56
+ "context_window": _M,
57
+ },
58
+ {
59
+ "model_pattern": "claude-opus-4-5",
60
+ "input_per_mtok": 5.0,
61
+ "output_per_mtok": 25.0,
62
+ "cache_creation_per_mtok": 6.25,
63
+ "cache_read_per_mtok": 0.50,
64
+ "context_window": _M,
65
+ },
66
+ {
67
+ "model_pattern": "claude-sonnet-4-6",
68
+ "input_per_mtok": 3.0,
69
+ "output_per_mtok": 15.0,
70
+ "cache_creation_per_mtok": 3.75,
71
+ "cache_read_per_mtok": 0.30,
72
+ "context_window": _M,
73
+ },
74
+ {
75
+ "model_pattern": "claude-sonnet-4-5",
76
+ "input_per_mtok": 3.0,
77
+ "output_per_mtok": 15.0,
78
+ "cache_creation_per_mtok": 3.75,
79
+ "cache_read_per_mtok": 0.30,
80
+ "context_window": _M,
81
+ },
82
+ {
83
+ "model_pattern": "claude-haiku-4-5",
84
+ "input_per_mtok": 1.0,
85
+ "output_per_mtok": 5.0,
86
+ "cache_creation_per_mtok": 1.25,
87
+ "cache_read_per_mtok": 0.10,
88
+ "context_window": 200_000,
89
+ },
90
+ {
91
+ "model_pattern": "claude-fable-5",
92
+ "input_per_mtok": 10.0,
93
+ "output_per_mtok": 50.0,
94
+ "cache_creation_per_mtok": 12.50,
95
+ "cache_read_per_mtok": 1.0,
96
+ "context_window": _M,
97
+ },
98
+ # --- OpenAI / Codex (from agentsview fallback.go) ---
99
+ {
100
+ "model_pattern": "gpt-5.5",
101
+ "input_per_mtok": 5.0,
102
+ "output_per_mtok": 30.0,
103
+ "cache_creation_per_mtok": 0.0,
104
+ "cache_read_per_mtok": 0.50,
105
+ "context_window": None,
106
+ },
107
+ {
108
+ "model_pattern": "gpt-5.4",
109
+ "input_per_mtok": 2.50,
110
+ "output_per_mtok": 15.0,
111
+ "cache_creation_per_mtok": 0.0,
112
+ "cache_read_per_mtok": 0.0,
113
+ "context_window": None,
114
+ },
115
+ {
116
+ "model_pattern": "gpt-5.4-mini",
117
+ "input_per_mtok": 0.75,
118
+ "output_per_mtok": 4.50,
119
+ "cache_creation_per_mtok": 0.0,
120
+ "cache_read_per_mtok": 0.0,
121
+ "context_window": None,
122
+ },
123
+ {
124
+ "model_pattern": "gpt-5.4-nano",
125
+ "input_per_mtok": 0.20,
126
+ "output_per_mtok": 1.25,
127
+ "cache_creation_per_mtok": 0.0,
128
+ "cache_read_per_mtok": 0.0,
129
+ "context_window": None,
130
+ },
131
+ {
132
+ "model_pattern": "gpt-5.3-codex",
133
+ "input_per_mtok": 1.75,
134
+ "output_per_mtok": 14.0,
135
+ "cache_creation_per_mtok": 0.0,
136
+ "cache_read_per_mtok": 0.0,
137
+ "context_window": None,
138
+ },
139
+ {
140
+ "model_pattern": "gpt-5.2-codex",
141
+ "input_per_mtok": 1.75,
142
+ "output_per_mtok": 14.0,
143
+ "cache_creation_per_mtok": 0.0,
144
+ "cache_read_per_mtok": 0.0,
145
+ "context_window": None,
146
+ },
147
+ {
148
+ "model_pattern": "gpt-5.1-codex-max",
149
+ "input_per_mtok": 1.25,
150
+ "output_per_mtok": 10.0,
151
+ "cache_creation_per_mtok": 0.0,
152
+ "cache_read_per_mtok": 0.0,
153
+ "context_window": None,
154
+ },
155
+ )
156
+
157
+ _RATE_FIELDS = (
158
+ "input_per_mtok",
159
+ "output_per_mtok",
160
+ "cache_creation_per_mtok",
161
+ "cache_read_per_mtok",
162
+ )
163
+
164
+
165
+ def seed_pricing(con: sqlite3.Connection) -> int:
166
+ """Insert the bundled rate table into model_pricing without clobbering richer rows.
167
+
168
+ ``INSERT ... ON CONFLICT DO NOTHING`` so an operator who later refreshes a row from
169
+ LiteLLM keeps it. Returns the number of rows inserted. Assumes the table exists.
170
+ """
171
+ n = 0
172
+ for row in FALLBACK_PRICING:
173
+ cur = con.execute(
174
+ "INSERT INTO model_pricing "
175
+ "(model_pattern, input_per_mtok, output_per_mtok, "
176
+ " cache_creation_per_mtok, cache_read_per_mtok, context_window) "
177
+ "VALUES (?,?,?,?,?,?) ON CONFLICT(model_pattern) DO NOTHING",
178
+ (
179
+ row["model_pattern"],
180
+ row["input_per_mtok"],
181
+ row["output_per_mtok"],
182
+ row["cache_creation_per_mtok"],
183
+ row["cache_read_per_mtok"],
184
+ row["context_window"],
185
+ ),
186
+ )
187
+ n += cur.rowcount or 0
188
+ return n
189
+
190
+
191
+ def cost_for(tokens: dict[str, float], rate: dict[str, float] | None) -> float | None:
192
+ """USD for one usage row given a rate row. None when unpriced (no matching rate).
193
+
194
+ cost = (input*in + output*out + cache_creation*cc + cache_read*cr) / 1e6
195
+ Missing token fields count as 0; a missing rate returns None (never a silent 0 total).
196
+ """
197
+ if not rate:
198
+ return None
199
+ try:
200
+ return (
201
+ float(tokens.get("input_tokens", 0) or 0) * float(rate.get("input_per_mtok", 0) or 0)
202
+ + float(tokens.get("output_tokens", 0) or 0)
203
+ * float(rate.get("output_per_mtok", 0) or 0)
204
+ + float(tokens.get("cache_creation_input_tokens", 0) or 0)
205
+ * float(rate.get("cache_creation_per_mtok", 0) or 0)
206
+ + float(tokens.get("cache_read_input_tokens", 0) or 0)
207
+ * float(rate.get("cache_read_per_mtok", 0) or 0)
208
+ ) / 1_000_000.0
209
+ except Exception:
210
+ return None
211
+
212
+
213
+ def cache_savings(tokens: dict[str, float], rate: dict[str, float] | None) -> float | None:
214
+ """USD saved by caching vs paying full input rate (ccusage-class). May be negative
215
+ on write-heavy sessions (cache creation can cost more than the input it replaced) -
216
+ do NOT clamp; a negative value is itself the finding. None when unpriced."""
217
+ if not rate:
218
+ return None
219
+ try:
220
+ in_rate = float(rate.get("input_per_mtok", 0) or 0)
221
+ read = float(tokens.get("cache_read_input_tokens", 0) or 0) * (
222
+ in_rate - float(rate.get("cache_read_per_mtok", 0) or 0)
223
+ )
224
+ cr = float(tokens.get("cache_creation_input_tokens", 0) or 0) * (
225
+ in_rate - float(rate.get("cache_creation_per_mtok", 0) or 0)
226
+ )
227
+ return (read + cr) / 1_000_000.0
228
+ except Exception:
229
+ return None
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # model-name resolution (agentsview normalize.go 4-stage matcher)
234
+ # ---------------------------------------------------------------------------
235
+ _PROVIDER_PREFIX = re.compile(r"^[a-z0-9_]+/")
236
+ _TRAILING_DATE = re.compile(r"-\d{8}$")
237
+ _TRAILING_GROUP = re.compile(r"[(\[][^)\]]*[)\]]$")
238
+ _NON_ALNUM = re.compile(r"[^a-z0-9]")
239
+
240
+
241
+ def _canonical(name: str) -> str:
242
+ """Stage-4 canonical form: lowercase, strip one provider prefix, strip one trailing
243
+ (...)/[...] group, strip one trailing -YYYYMMDD, drop all non-alphanumerics."""
244
+ s = name.strip().lower()
245
+ s = _PROVIDER_PREFIX.sub("", s, count=1)
246
+ s = _TRAILING_GROUP.sub("", s).strip()
247
+ s = _TRAILING_DATE.sub("", s)
248
+ return _NON_ALNUM.sub("", s)
249
+
250
+
251
+ def resolve_model(name: str, patterns: tuple[str, ...] | list[str]) -> str | None:
252
+ """Resolve a logged model id to a known pricing pattern, or None if unresolved.
253
+
254
+ Four stages, no arbitrary substring match (so ``gpt-5.5`` never silently matches
255
+ ``gpt-5.5-codex``); an ambiguous canonical tie stays unresolved. Resolve once at
256
+ ingest and store the result so SQL stays a plain equijoin.
257
+ """
258
+ if not name:
259
+ return None
260
+ pats = list(patterns)
261
+ # 1. exact
262
+ if name in pats:
263
+ return name
264
+ # 2. dots -> dashes
265
+ dashed = name.replace(".", "-")
266
+ for p in pats:
267
+ if p == dashed or p.replace(".", "-") == dashed:
268
+ return p
269
+ # 3. case-insensitive
270
+ low = name.lower()
271
+ for p in pats:
272
+ if p.lower() == low:
273
+ return p
274
+ # 4. canonical, with ambiguity guard
275
+ target = _canonical(name)
276
+ if not target:
277
+ return None
278
+ matches = [p for p in pats if _canonical(p) == target]
279
+ if len(matches) == 1:
280
+ return matches[0]
281
+ return None # 0 matches, or ambiguous tie -> unresolved
@@ -0,0 +1,277 @@
1
+ """Canonical SQLite schema for the session-intelligence tables (single source of truth).
2
+
3
+ These tables are an ADDITIVE, derived cache of coding-agent transcripts. They sit
4
+ ALONGSIDE zeno's primary capture (``sessions``/``agent_runs``/``cognition_samples``)
5
+ and never replace it: agentsview-derived tables score *agent* behavior (token/cost,
6
+ tool health, outcomes), while ``cognition_samples`` scores *human* cognition. The two
7
+ are orthogonal and complementary.
8
+
9
+ Schema + analytics design is adapted from agentsview (MIT, (c) 2026 Kenn Software LLC).
10
+ The FTS5 virtual table + triggers are ported verbatim (``internal/db/db.go:261-277``);
11
+ the table/column shapes map from ``internal/db/schema.sql``. See THIRD_PARTY_LICENSES.md.
12
+
13
+ Naming: agentsview's ``sessions``/``messages`` collide with zeno's existing tables, so
14
+ they are namespaced ``transcript_*``. ``token_usage_events`` is the cache-aware billing
15
+ ledger (deduped); ``model_pricing`` is the per-MTok rate table.
16
+
17
+ This module is stdlib-only (Python 3.12, never raises into the caller) so the ingester
18
+ (a stdlib ``sqlite3`` writer) and the dashboard export scripts can both create + read the
19
+ tables without importing the heavy SDK. The alembic migration
20
+ ``packages/sdk-python/alembic/versions/0004_transcript_intelligence.py`` mirrors this
21
+ schema for the SDK-managed DB; the schema-drift test
22
+ (``tests/test_session_intel_schema.py``) guards the two from diverging.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import sqlite3
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # column definitions: (name, sqlite_decl). Order is the literal CREATE order.
31
+ # ---------------------------------------------------------------------------
32
+
33
+ TRANSCRIPT_SESSIONS_COLUMNS: tuple[tuple[str, str], ...] = (
34
+ ("id", "TEXT PRIMARY KEY"), # CC/Cursor/Codex session id
35
+ ("zeno_session_id", "TEXT"), # soft bridge -> sessions.id (lifecycles differ; no hard FK)
36
+ ("agent", "TEXT NOT NULL DEFAULT 'claude'"), # claude|codex|cursor
37
+ ("project", "TEXT NOT NULL DEFAULT ''"),
38
+ ("machine", "TEXT NOT NULL DEFAULT 'local'"),
39
+ ("first_message", "TEXT"),
40
+ ("display_name", "TEXT"),
41
+ ("started_at", "TEXT"),
42
+ ("ended_at", "TEXT"),
43
+ ("message_count", "INTEGER NOT NULL DEFAULT 0"),
44
+ ("user_message_count", "INTEGER NOT NULL DEFAULT 0"),
45
+ # token rollups (cache-aware) -- schema.sql:23-26
46
+ ("total_output_tokens", "INTEGER NOT NULL DEFAULT 0"),
47
+ ("peak_context_tokens", "INTEGER NOT NULL DEFAULT 0"),
48
+ # behavioral health signals -- schema.sql:27-41 / signals/score.go inputs
49
+ ("is_automated", "INTEGER NOT NULL DEFAULT 0"),
50
+ ("tool_failure_signal_count", "INTEGER NOT NULL DEFAULT 0"),
51
+ ("tool_retry_count", "INTEGER NOT NULL DEFAULT 0"),
52
+ ("edit_churn_count", "INTEGER NOT NULL DEFAULT 0"),
53
+ ("consecutive_failure_max", "INTEGER NOT NULL DEFAULT 0"),
54
+ ("outcome", "TEXT NOT NULL DEFAULT 'unknown'"),
55
+ ("outcome_confidence", "TEXT NOT NULL DEFAULT 'low'"),
56
+ ("ended_with_role", "TEXT NOT NULL DEFAULT ''"),
57
+ ("final_failure_streak", "INTEGER NOT NULL DEFAULT 0"),
58
+ ("compaction_count", "INTEGER NOT NULL DEFAULT 0"),
59
+ ("mid_task_compaction_count", "INTEGER NOT NULL DEFAULT 0"),
60
+ ("context_pressure_max", "REAL"),
61
+ ("health_score", "INTEGER"),
62
+ ("health_grade", "TEXT"),
63
+ # capability flags / env
64
+ ("has_tool_calls", "INTEGER NOT NULL DEFAULT 0"),
65
+ ("has_context_data", "INTEGER NOT NULL DEFAULT 0"),
66
+ ("cwd", "TEXT NOT NULL DEFAULT ''"),
67
+ ("git_branch", "TEXT NOT NULL DEFAULT ''"),
68
+ # ingest identity (idempotency -- zeno additions)
69
+ ("file_path", "TEXT"),
70
+ ("file_mtime", "INTEGER"),
71
+ ("content_hash", "TEXT"),
72
+ ("created_at", "TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))"),
73
+ )
74
+
75
+ TRANSCRIPT_MESSAGES_COLUMNS: tuple[tuple[str, str], ...] = (
76
+ ("id", "INTEGER PRIMARY KEY"), # rowid alias == FTS5 content_rowid (do NOT make composite)
77
+ ("session_id", "TEXT NOT NULL REFERENCES transcript_sessions(id) ON DELETE CASCADE"),
78
+ ("ordinal", "INTEGER NOT NULL"),
79
+ ("role", "TEXT NOT NULL"),
80
+ ("content", "TEXT NOT NULL"),
81
+ ("thinking_text", "TEXT NOT NULL DEFAULT ''"),
82
+ ("timestamp", "TEXT"),
83
+ ("has_thinking", "INTEGER NOT NULL DEFAULT 0"),
84
+ ("has_tool_use", "INTEGER NOT NULL DEFAULT 0"),
85
+ ("content_length", "INTEGER NOT NULL DEFAULT 0"),
86
+ ("model", "TEXT NOT NULL DEFAULT ''"),
87
+ ("token_usage", "TEXT NOT NULL DEFAULT ''"), # raw usage JSON (provenance)
88
+ ("context_tokens", "INTEGER NOT NULL DEFAULT 0"),
89
+ ("output_tokens", "INTEGER NOT NULL DEFAULT 0"),
90
+ ("source_uuid", "TEXT NOT NULL DEFAULT ''"),
91
+ ("source_parent_uuid", "TEXT NOT NULL DEFAULT ''"), # threading DAG
92
+ ("is_sidechain", "INTEGER NOT NULL DEFAULT 0"), # subagent branch
93
+ ("is_compact_boundary", "INTEGER NOT NULL DEFAULT 0"),
94
+ )
95
+
96
+ TOKEN_USAGE_EVENTS_COLUMNS: tuple[tuple[str, str], ...] = (
97
+ ("id", "INTEGER PRIMARY KEY"),
98
+ ("session_id", "TEXT NOT NULL REFERENCES transcript_sessions(id) ON DELETE CASCADE"),
99
+ ("message_ordinal", "INTEGER"), # NULL = session-level event
100
+ ("source", "TEXT NOT NULL"), # claude|codex|cursor
101
+ ("model", "TEXT NOT NULL"), # resolved model_pattern (so cost is a plain equijoin)
102
+ ("input_tokens", "INTEGER NOT NULL DEFAULT 0"),
103
+ ("output_tokens", "INTEGER NOT NULL DEFAULT 0"),
104
+ ("cache_creation_input_tokens", "INTEGER NOT NULL DEFAULT 0"),
105
+ ("cache_read_input_tokens", "INTEGER NOT NULL DEFAULT 0"),
106
+ ("reasoning_tokens", "INTEGER NOT NULL DEFAULT 0"),
107
+ ("cost_usd", "REAL"), # NULL = unpriced
108
+ ("cost_status", "TEXT NOT NULL DEFAULT ''"), # ''|computed|reported (zeno enum)
109
+ ("cost_source", "TEXT NOT NULL DEFAULT ''"), # computed|reported
110
+ ("occurred_at", "TEXT"),
111
+ ("dedup_key", "TEXT NOT NULL DEFAULT ''"), # stable per-event id (idempotency)
112
+ )
113
+
114
+ TRANSCRIPT_TOOL_CALLS_COLUMNS: tuple[tuple[str, str], ...] = (
115
+ ("id", "INTEGER PRIMARY KEY"),
116
+ ("session_id", "TEXT NOT NULL REFERENCES transcript_sessions(id) ON DELETE CASCADE"),
117
+ ("message_ordinal", "INTEGER NOT NULL DEFAULT 0"),
118
+ ("ordinal_in_message", "INTEGER NOT NULL DEFAULT 0"),
119
+ ("tool_name", "TEXT NOT NULL DEFAULT ''"),
120
+ ("category", "TEXT NOT NULL DEFAULT 'Other'"), # one of taxonomy.CATEGORIES
121
+ ("is_error", "INTEGER NOT NULL DEFAULT 0"), # matched tool_result failure
122
+ ("source", "TEXT NOT NULL DEFAULT ''"), # claude|codex|cursor
123
+ )
124
+
125
+ MODEL_PRICING_COLUMNS: tuple[tuple[str, str], ...] = (
126
+ ("model_pattern", "TEXT PRIMARY KEY"),
127
+ ("input_per_mtok", "REAL NOT NULL DEFAULT 0"),
128
+ ("output_per_mtok", "REAL NOT NULL DEFAULT 0"),
129
+ ("cache_creation_per_mtok", "REAL NOT NULL DEFAULT 0"),
130
+ ("cache_read_per_mtok", "REAL NOT NULL DEFAULT 0"),
131
+ ("context_window", "INTEGER"), # zeno addition: powers the peak-context-pct lens
132
+ ("updated_at", "TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))"),
133
+ )
134
+
135
+ # table_name -> column tuple (drives the drift test + the generic create path)
136
+ TABLES: dict[str, tuple[tuple[str, str], ...]] = {
137
+ "transcript_sessions": TRANSCRIPT_SESSIONS_COLUMNS,
138
+ "transcript_messages": TRANSCRIPT_MESSAGES_COLUMNS,
139
+ "transcript_tool_calls": TRANSCRIPT_TOOL_CALLS_COLUMNS,
140
+ "token_usage_events": TOKEN_USAGE_EVENTS_COLUMNS,
141
+ "model_pricing": MODEL_PRICING_COLUMNS,
142
+ }
143
+
144
+ # non-FTS indexes (name, sql). Mirrored in the alembic migration.
145
+ INDEXES: tuple[tuple[str, str], ...] = (
146
+ (
147
+ "idx_transcript_sessions_ended",
148
+ "CREATE INDEX IF NOT EXISTS idx_transcript_sessions_ended "
149
+ "ON transcript_sessions (ended_at DESC, id)",
150
+ ),
151
+ (
152
+ "idx_transcript_sessions_agent",
153
+ "CREATE INDEX IF NOT EXISTS idx_transcript_sessions_agent "
154
+ "ON transcript_sessions (agent)",
155
+ ),
156
+ (
157
+ "idx_transcript_sessions_project",
158
+ "CREATE INDEX IF NOT EXISTS idx_transcript_sessions_project "
159
+ "ON transcript_sessions (project)",
160
+ ),
161
+ (
162
+ "idx_transcript_sessions_zeno",
163
+ "CREATE INDEX IF NOT EXISTS idx_transcript_sessions_zeno "
164
+ "ON transcript_sessions (zeno_session_id)",
165
+ ),
166
+ (
167
+ "idx_transcript_messages_session_ordinal",
168
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_transcript_messages_session_ordinal "
169
+ "ON transcript_messages (session_id, ordinal)",
170
+ ),
171
+ (
172
+ "idx_transcript_messages_session_role",
173
+ "CREATE INDEX IF NOT EXISTS idx_transcript_messages_session_role "
174
+ "ON transcript_messages (session_id, role)",
175
+ ),
176
+ (
177
+ "idx_transcript_tool_calls_session",
178
+ "CREATE INDEX IF NOT EXISTS idx_transcript_tool_calls_session "
179
+ "ON transcript_tool_calls (session_id)",
180
+ ),
181
+ (
182
+ "idx_transcript_tool_calls_category",
183
+ "CREATE INDEX IF NOT EXISTS idx_transcript_tool_calls_category "
184
+ "ON transcript_tool_calls (category)",
185
+ ),
186
+ (
187
+ "idx_token_usage_events_dedup",
188
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_token_usage_events_dedup "
189
+ "ON token_usage_events (session_id, source, dedup_key) WHERE dedup_key != ''",
190
+ ),
191
+ (
192
+ "idx_token_usage_events_session",
193
+ "CREATE INDEX IF NOT EXISTS idx_token_usage_events_session "
194
+ "ON token_usage_events (session_id)",
195
+ ),
196
+ (
197
+ "idx_token_usage_events_occurred",
198
+ "CREATE INDEX IF NOT EXISTS idx_token_usage_events_occurred "
199
+ "ON token_usage_events (occurred_at)",
200
+ ),
201
+ )
202
+
203
+ # FTS5 over transcript_messages.content. Ported verbatim from agentsview db.go:261-277,
204
+ # renamed messages -> transcript_messages. `porter unicode61` is the search-quality
205
+ # contract; the content/content_rowid pairing makes it an external-content index synced
206
+ # by the triggers below. Optional: skipped (logged) on SQLite builds without fts5.
207
+ FTS_TABLE = "transcript_messages_fts"
208
+ FTS_DDL = """
209
+ CREATE VIRTUAL TABLE IF NOT EXISTS transcript_messages_fts USING fts5(
210
+ content,
211
+ content='transcript_messages',
212
+ content_rowid='id',
213
+ tokenize='porter unicode61'
214
+ );
215
+ CREATE TRIGGER IF NOT EXISTS transcript_messages_ai
216
+ AFTER INSERT ON transcript_messages BEGIN
217
+ INSERT INTO transcript_messages_fts(rowid, content) VALUES (new.id, new.content);
218
+ END;
219
+ CREATE TRIGGER IF NOT EXISTS transcript_messages_ad
220
+ AFTER DELETE ON transcript_messages BEGIN
221
+ INSERT INTO transcript_messages_fts(transcript_messages_fts, rowid, content)
222
+ VALUES ('delete', old.id, old.content);
223
+ END;
224
+ CREATE TRIGGER IF NOT EXISTS transcript_messages_au
225
+ AFTER UPDATE ON transcript_messages BEGIN
226
+ INSERT INTO transcript_messages_fts(transcript_messages_fts, rowid, content)
227
+ VALUES ('delete', old.id, old.content);
228
+ INSERT INTO transcript_messages_fts(rowid, content) VALUES (new.id, new.content);
229
+ END;
230
+ """
231
+
232
+
233
+ def _create_table_sql(name: str, columns: tuple[tuple[str, str], ...]) -> str:
234
+ cols = ",\n ".join(f"{n} {decl}" for n, decl in columns)
235
+ return f"CREATE TABLE IF NOT EXISTS {name} (\n {cols}\n)"
236
+
237
+
238
+ def fts5_available(con: sqlite3.Connection) -> bool:
239
+ """True when the SQLite build has the fts5 module compiled in. Never raises."""
240
+ try:
241
+ con.execute("CREATE VIRTUAL TABLE temp._zeno_fts5_probe USING fts5(x)")
242
+ con.execute("DROP TABLE temp._zeno_fts5_probe")
243
+ return True
244
+ except Exception:
245
+ return False
246
+
247
+
248
+ def ensure_schema(con: sqlite3.Connection) -> bool:
249
+ """Create the session-intelligence tables + indexes + FTS5 if missing. Idempotent.
250
+
251
+ Returns True if FTS5 was set up, False if the SQLite build lacks the fts5 module
252
+ (search degrades but ingest still works). Never raises into the caller; works on a
253
+ fresh DB and is a no-op when everything already exists.
254
+ """
255
+ con.execute("PRAGMA foreign_keys = ON")
256
+ for name, columns in TABLES.items():
257
+ con.execute(_create_table_sql(name, columns))
258
+ for _name, sql in INDEXES:
259
+ con.execute(sql)
260
+ if fts5_available(con):
261
+ con.executescript(FTS_DDL)
262
+ return True
263
+ return False
264
+
265
+
266
+ def rebuild_fts(con: sqlite3.Connection) -> None:
267
+ """Rebuild the external-content FTS index from transcript_messages.
268
+
269
+ Used after a bulk reload where triggers were bypassed. No-op without fts5.
270
+ """
271
+ if fts5_available(con):
272
+ try:
273
+ con.execute(
274
+ "INSERT INTO transcript_messages_fts(transcript_messages_fts) VALUES('rebuild')"
275
+ )
276
+ except Exception:
277
+ pass