superbased-observer 1.7.25__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- observer/__init__.py +1 -0
- observer/__main__.py +46 -0
- superbased_observer-1.7.25.dist-info/METADATA +1123 -0
- superbased_observer-1.7.25.dist-info/RECORD +7 -0
- superbased_observer-1.7.25.dist-info/WHEEL +5 -0
- superbased_observer-1.7.25.dist-info/entry_points.txt +2 -0
- superbased_observer-1.7.25.dist-info/licenses/LICENSE +20 -0
observer/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.7.25"
|
observer/__main__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Console-script entry point: locate the bundled observer binary and
|
|
2
|
+
exec it, forwarding argv (and on POSIX, signals via process replacement).
|
|
3
|
+
|
|
4
|
+
Mirrors the @superbased/observer Node shim. Each platform-tagged wheel
|
|
5
|
+
ships its own binary under observer/_bin/.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import NoReturn
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _binary_path() -> Path:
|
|
17
|
+
name = "observer.exe" if sys.platform == "win32" else "observer"
|
|
18
|
+
return Path(__file__).resolve().parent / "_bin" / name
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _report_missing(path: Path) -> NoReturn:
|
|
22
|
+
sys.stderr.write(
|
|
23
|
+
"superbased-observer: bundled binary not found at "
|
|
24
|
+
f"{path}\n"
|
|
25
|
+
"This usually means the installed wheel is for a different\n"
|
|
26
|
+
"platform than the one Python is running on. Re-install with:\n"
|
|
27
|
+
" pip install --force-reinstall superbased-observer\n"
|
|
28
|
+
"or report at https://github.com/marmutapp/superbased-observer/issues\n"
|
|
29
|
+
)
|
|
30
|
+
raise SystemExit(1)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def main() -> NoReturn:
|
|
34
|
+
binary = _binary_path()
|
|
35
|
+
if not binary.is_file():
|
|
36
|
+
_report_missing(binary)
|
|
37
|
+
argv = [str(binary), *sys.argv[1:]]
|
|
38
|
+
if sys.platform == "win32":
|
|
39
|
+
import subprocess
|
|
40
|
+
|
|
41
|
+
raise SystemExit(subprocess.call(argv))
|
|
42
|
+
os.execv(str(binary), argv)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
main()
|
|
@@ -0,0 +1,1123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: superbased-observer
|
|
3
|
+
Version: 1.7.25
|
|
4
|
+
Summary: SuperBased Observer — capture, normalize, compress, and analyze AI coding tool activity across Claude Code, Codex, Cursor, Cline/Roo, and Copilot.
|
|
5
|
+
Project-URL: Homepage, https://github.com/marmutapp/superbased-observer
|
|
6
|
+
Project-URL: Documentation, https://github.com/marmutapp/superbased-observer#readme
|
|
7
|
+
Project-URL: Issues, https://github.com/marmutapp/superbased-observer/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/marmutapp/superbased-observer/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Santosh Kathira <contact@marmut.app>
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai,anthropic,claude-code,codex,cost,cursor,mcp,observability,openai,tokens
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: System Administrators
|
|
16
|
+
Classifier: Operating System :: MacOS
|
|
17
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
18
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
27
|
+
Classifier: Topic :: Software Development
|
|
28
|
+
Classifier: Topic :: System :: Monitoring
|
|
29
|
+
Requires-Python: >=3.8
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# superbased-observer
|
|
33
|
+
|
|
34
|
+
[](https://pypi.org/project/superbased-observer/)
|
|
35
|
+
[](https://pypi.org/project/superbased-observer/)
|
|
36
|
+
[](https://www.apache.org/licenses/LICENSE-2.0)
|
|
37
|
+
[](https://github.com/marmutapp/superbased-observer)
|
|
38
|
+
|
|
39
|
+
> This is the **PyPI distribution**. `pip install superbased-observer`
|
|
40
|
+
> bundles the same prebuilt binary that
|
|
41
|
+
> [`@superbased/observer`](https://www.npmjs.com/package/@superbased/observer)
|
|
42
|
+
> ships on npm — version numbers are kept in lock-step. Pick whichever
|
|
43
|
+
> package manager fits your environment; the resulting `observer`
|
|
44
|
+
> command on your `$PATH` is identical.
|
|
45
|
+
|
|
46
|
+
**Capture, normalize, compress, and analyze every AI coding tool call you
|
|
47
|
+
run** — across Claude Code, Codex, Cursor, Cline / Roo Code, GitHub
|
|
48
|
+
Copilot (VS Code), GitHub Copilot CLI, OpenCode, OpenClaw, Pi, Google
|
|
49
|
+
Antigravity, Gemini CLI, and Cowork — in one local single-binary tool.
|
|
50
|
+
No telemetry, no cloud, no data leaves your machine.
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/infographics/one-local-path.png" alt="One local path for AI coding activity" width="780">
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
# Table of contents
|
|
57
|
+
|
|
58
|
+
- [Install](#install)
|
|
59
|
+
- [Five-minute quickstart](#five-minute-quickstart)
|
|
60
|
+
- [Per-AI-client setup](#per-ai-client-setup)
|
|
61
|
+
- [Architecture in detail](#architecture-in-detail)
|
|
62
|
+
- [Dashboard tour](#dashboard-tour)
|
|
63
|
+
- [MCP tools reference](#mcp-tools-reference)
|
|
64
|
+
- [Compression mechanisms](#compression-mechanisms)
|
|
65
|
+
- [Cost and token math](#cost-and-token-math)
|
|
66
|
+
- [Terminology and glossary](#terminology-and-glossary)
|
|
67
|
+
- [CLI reference](#cli-reference)
|
|
68
|
+
- [Configuration](#configuration)
|
|
69
|
+
- [Troubleshooting](#troubleshooting)
|
|
70
|
+
- [Security and privacy](#security-and-privacy)
|
|
71
|
+
- [Source, contributing, license](#source-contributing-license)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
## Install
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install superbased-observer
|
|
78
|
+
observer --version
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Or with [`uv`](https://docs.astral.sh/uv/) (recommended for tools — installs in an isolated env automatically):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uv tool install superbased-observer
|
|
85
|
+
observer --version
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Or with [`pipx`](https://pipx.pypa.io/) (same idea as `uv tool`):
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pipx install superbased-observer
|
|
92
|
+
observer --version
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Plain `pip install --user superbased-observer` works too, but
|
|
96
|
+
`uv tool` / `pipx` keep the install isolated from your project's
|
|
97
|
+
Python environment, which is generally what you want for a CLI tool.
|
|
98
|
+
|
|
99
|
+
Pre-built wheels ship for:
|
|
100
|
+
|
|
101
|
+
| Platform | Architecture | Wheel tag |
|
|
102
|
+
|-----------------------|--------------|-----------|
|
|
103
|
+
| Linux | x64 | `manylinux2014_x86_64` (glibc ≥ 2.17) |
|
|
104
|
+
| Linux | arm64 | `manylinux2014_aarch64` |
|
|
105
|
+
| macOS (Intel) | x64 | `macosx_10_15_x86_64` |
|
|
106
|
+
| macOS (Apple Silicon) | arm64 | `macosx_11_0_arm64` |
|
|
107
|
+
| Windows | x64 | `win_amd64` |
|
|
108
|
+
|
|
109
|
+
Each wheel bundles its platform's prebuilt binary directly — no
|
|
110
|
+
postinstall download, no compile step, no Go toolchain required.
|
|
111
|
+
pip picks the right wheel for your machine automatically.
|
|
112
|
+
|
|
113
|
+
If your platform isn't listed, build from source — instructions in
|
|
114
|
+
the [main repo](https://github.com/marmutapp/superbased-observer).
|
|
115
|
+
|
|
116
|
+
**Already use `@superbased/observer` from npm?** Don't install both
|
|
117
|
+
globally — whichever directory comes first on `$PATH` wins, which
|
|
118
|
+
gets confusing if their versions drift mid-upgrade. Pick one.
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
## Five-minute quickstart
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# 1) Install. `observer init` is OPTIONAL — only run it if you want
|
|
125
|
+
# the MCP server registered with your AI clients (gives them
|
|
126
|
+
# on-demand tools like check_file_freshness / get_cost_summary
|
|
127
|
+
# at the cost of ~1,800 tokens of schema per turn).
|
|
128
|
+
pip install superbased-observer # or: uv tool install superbased-observer
|
|
129
|
+
observer init # OPTIONAL — interactive: pick clients;
|
|
130
|
+
# writes MCP + codex proxy-route into AI client configs.
|
|
131
|
+
# Skip this step for an MCP-free install.
|
|
132
|
+
|
|
133
|
+
# 2) Start the long-running services (proxy + watcher + dashboard).
|
|
134
|
+
# Auto-registers HOOKS for every detected AI tool on first launch.
|
|
135
|
+
observer start &
|
|
136
|
+
|
|
137
|
+
# 3) Engage the proxy by pointing your AI client at the local URL.
|
|
138
|
+
# See "Per-AI-client setup" for the matching env var.
|
|
139
|
+
export ANTHROPIC_BASE_URL=http://127.0.0.1:8820 # Claude Code
|
|
140
|
+
export OPENAI_BASE_URL=http://127.0.0.1:8820/v1 # Codex / OpenAI
|
|
141
|
+
|
|
142
|
+
# 4) Open the dashboard.
|
|
143
|
+
open http://127.0.0.1:8081/ # macOS
|
|
144
|
+
xdg-open http://127.0.0.1:8081/ # Linux
|
|
145
|
+
start http://127.0.0.1:8081/ # Windows
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**What `start` does vs what `init` adds:**
|
|
149
|
+
|
|
150
|
+
| Step | Hooks | Proxy listening | Watcher | Dashboard | MCP in AI clients | Codex proxy route |
|
|
151
|
+
|---|---|---|---|---|---|---|
|
|
152
|
+
| `observer start` alone | auto-registers ✓ | ✓ | ✓ | ✓ | — | — |
|
|
153
|
+
| `observer init` + `observer start` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
154
|
+
| `observer init --skip-mcp` + `start` | ✓ | ✓ | ✓ | ✓ | — | ✓ |
|
|
155
|
+
|
|
156
|
+
MCP and codex routing are explicit-only because both write per-client
|
|
157
|
+
config files. Hooks self-heal on every `start`.
|
|
158
|
+
|
|
159
|
+
After ten minutes of normal AI-coding usage, the dashboard will be
|
|
160
|
+
populated with cost over time, per-tool activity, compression
|
|
161
|
+
savings, and stale-reread waste signals.
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
## Per-AI-client setup
|
|
165
|
+
|
|
166
|
+
Different clients send to different upstreams. The local proxy on
|
|
167
|
+
`127.0.0.1:8820` handles all of them — routes by URL path. Set the
|
|
168
|
+
env var that matches your client(s); both can coexist on one machine.
|
|
169
|
+
|
|
170
|
+
| AI client | Env var | Capture mode |
|
|
171
|
+
|---------------------------------|-------------------------------------------------------|--------------|
|
|
172
|
+
| **Claude Code** | `ANTHROPIC_BASE_URL=http://127.0.0.1:8820` | proxy + JSONL |
|
|
173
|
+
| **Cursor** (Anthropic mode) | `ANTHROPIC_BASE_URL=http://127.0.0.1:8820` | proxy + JSONL |
|
|
174
|
+
| **Codex** | `OPENAI_BASE_URL=http://127.0.0.1:8820/v1` (note `/v1`) | proxy + JSONL with API-key auth; ChatGPT-plan login currently behaves as JSONL only |
|
|
175
|
+
| **Cursor** (OpenAI mode) | `OPENAI_BASE_URL=http://127.0.0.1:8820/v1` | proxy + JSONL |
|
|
176
|
+
| **Cline / Roo Code** | `ANTHROPIC_BASE_URL=...` or `OPENAI_BASE_URL=...` per provider | proxy + JSONL |
|
|
177
|
+
| **GitHub Copilot** | (no proxy yet) | JSONL only |
|
|
178
|
+
| **OpenCode** ([opencode.ai](https://opencode.ai/)) | (no proxy yet) | SQLite — actual install path is `~/.local/share/opencode/opencode.db` (XDG). Captures **token counts + model + cost** per assistant message from OpenCode's InfoData (`tokens.input/output/reasoning/cache.{read,write}` + `cost`); **subtask** parts → `spawn_subagent` actions; **todo** table → `todo_update` actions; tool-name coverage extended to webfetch/websearch/task/todowrite/todoread/multiedit. Tagged `Source=jsonl, Reliability=approximate`. |
|
|
179
|
+
| **OpenClaw** ([openclaw.ai](https://openclaw.ai/)) | (no proxy yet) | JSONL + sqlite — `~/.openclaw/tasks/runs.sqlite` + `~/.openclaw/agents/<agent>/sessions/sessions.json` |
|
|
180
|
+
| **Pi** ([pi.dev](https://pi.dev/)) | (no proxy yet) | JSONL — `~/.pi/agent/sessions/--<path>--/*.jsonl` (per upstream `docs/session-format.md` v3). Captures user / assistant / toolResult / `bashExecution` message roles; `usage.cost.total` → per-message USD; terminal `stopReason` (stop/length/error/aborted) → `task_complete` with `success=false` for failures (mid-turn `toolUse` is correctly skipped); `thinking` blocks surface as preceding reasoning. Tagged `Source=jsonl, Reliability=approximate`. |
|
|
181
|
+
| **Google Antigravity** | (no proxy yet) | Encrypted protobuf — `~/.gemini/antigravity/conversations/*.pb` (Linux-native) and the matching Windows-side path on WSL2. Observer ships a per-OS Chromium-pattern `oscrypt` key fetcher (macOS Keychain / libsecret / DPAPI / WSL2-via-PowerShell helper) and a multi-cipher try-loop for local decryption. Sessions whose ciphers don't validate locally fall back to the language_server's `GetCascadeTrajectory` gRPC endpoint via a built-in helper (`antigravity-bridge.exe` on WSL2 / native gRPC elsewhere) — extracts model + per-turn token counts + Tier 0–6 ToolEvents (file views, artifact edits/writes, user prompts, assistant text, run_command terminal snapshots, structured plan steps, final summaries). State index + per-conversation title/workspace URI read from `state.vscdb` + `state.vscdb.backup`. Tagged `Source=jsonl, Reliability=approximate`. |
|
|
182
|
+
| **Gemini CLI** | (no proxy yet) | JSONL or single-object JSON — `~/.gemini/tmp/<hash>/chats/session-*.{json,jsonl}`. Dual-format dispatch: legacy single-object JSON (size-based cursor, cline-style) and proposed JSONL event records (byte-offset cursor, issue [#15292](https://github.com/google-gemini/gemini-cli/issues/15292)). Action mapping covers `read_file` / `write_file` / `edit_file` / `run_command` / `search_files` / `web_fetch` and arbitrary MCP tool calls. Project root falls back through tool-call `cwd` → `~/.gemini/history/<hash>/.git/config` worktree pointer → synthetic `[gemini-cli:<hash>]` key (promoted via ON CONFLICT DO UPDATE on `sessions.project_id` once a future scan supplies a real cwd). Tagged `Source=jsonl, Reliability=approximate`. |
|
|
183
|
+
|
|
184
|
+
**JSONL-only** clients are captured passively by the watcher whenever
|
|
185
|
+
`observer start` is running. Hooks self-heal on every `start`, so a
|
|
186
|
+
fresh install captures the JSONL side without any `init` step. You won't see real-time cost numbers
|
|
187
|
+
for them on the Compression tab (those need the proxy), but every tool
|
|
188
|
+
call shows up on Sessions / Actions / Discovery / Tools / Patterns and
|
|
189
|
+
the JSONL-derived token counts feed the Cost tab. Reliability tagging
|
|
190
|
+
is per-adapter: Claude Code emits `unreliable` (the JSONL stream uses
|
|
191
|
+
streaming-time placeholder counts per spec §24); Codex / Cline / Pi /
|
|
192
|
+
OpenCode / OpenClaw / Antigravity / Gemini CLI emit `approximate`
|
|
193
|
+
(provider-reported usage that hasn't been reconciled against an
|
|
194
|
+
upstream invoice).
|
|
195
|
+
|
|
196
|
+
For **Codex specifically**, Observer currently has two practical support modes:
|
|
197
|
+
|
|
198
|
+
- `Proxy + JSONL`: Codex is routed through `OPENAI_BASE_URL=http://127.0.0.1:8820/v1` and Observer can link proxy turns to the session, so live compression metrics are available.
|
|
199
|
+
- `JSONL only`: Observer can still recover sessions, actions, and approximate token counts from `~/.codex/sessions`, but live proxy compression is currently not available when Codex is logged in with a ChatGPT plan on the local machine.
|
|
200
|
+
|
|
201
|
+
### Persistent setups
|
|
202
|
+
|
|
203
|
+
**Claude Code** (`~/.claude/settings.json`):
|
|
204
|
+
|
|
205
|
+
```json
|
|
206
|
+
{
|
|
207
|
+
"env": { "ANTHROPIC_BASE_URL": "http://127.0.0.1:8820" },
|
|
208
|
+
"hooks": { /* `observer init` writes these */ }
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
**Codex** (`~/.codex/config.toml`):
|
|
213
|
+
|
|
214
|
+
```toml
|
|
215
|
+
[env]
|
|
216
|
+
OPENAI_BASE_URL = "http://127.0.0.1:8820/v1"
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**Shell rc** (`~/.bashrc` / `~/.zshrc`) — affects every program:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
export ANTHROPIC_BASE_URL=http://127.0.0.1:8820
|
|
223
|
+
export OPENAI_BASE_URL=http://127.0.0.1:8820/v1
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
## Architecture in detail
|
|
228
|
+
|
|
229
|
+
<p align="center">
|
|
230
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/infographics/intelligence-across-tools.png" alt="Shared local intelligence layer across tools" width="780">
|
|
231
|
+
</p>
|
|
232
|
+
|
|
233
|
+
Five components running side by side:
|
|
234
|
+
|
|
235
|
+
### 1. JSONL adapters (passive ingest)
|
|
236
|
+
|
|
237
|
+
Watch `~/.claude/projects/`, `~/.codex/sessions/`,
|
|
238
|
+
`~/.gemini/tmp/.../chats/`, `~/.gemini/antigravity/conversations/`
|
|
239
|
+
(and the matching Antigravity index `state.vscdb`), etc. for new
|
|
240
|
+
session log lines. Normalize per-client tool names to a shared
|
|
241
|
+
taxonomy (`read_file`, `run_command`, `spawn_subagent`, …) and write
|
|
242
|
+
them into the actions table. Active whenever `observer start` is
|
|
243
|
+
running; hooks self-heal on each start so no separate `init` step is
|
|
244
|
+
required for capture.
|
|
245
|
+
|
|
246
|
+
For Antigravity (which stores conversations as encrypted protobufs),
|
|
247
|
+
observer ships a per-OS `oscrypt` key fetcher (Chromium Safe Storage
|
|
248
|
+
pattern: macOS Keychain / Linux libsecret + peanuts fallback / Windows
|
|
249
|
+
DPAPI / WSL2-via-PowerShell helper) plus a `language_server`-aware
|
|
250
|
+
gRPC fallback that calls `GetCascadeTrajectory` through the bundled
|
|
251
|
+
`antigravity-bridge.exe` when local decryption can't validate the
|
|
252
|
+
ciphertext. Tier 0–6 ToolEvents (file views, artifact edits/writes,
|
|
253
|
+
user prompts, assistant text, run_command terminal snapshots,
|
|
254
|
+
structured plan steps, final summaries) are extracted from the
|
|
255
|
+
trajectory's wire format without committing to specific .proto field
|
|
256
|
+
numbers.
|
|
257
|
+
|
|
258
|
+
What this gets you: every tool call you've ever run, queryable.
|
|
259
|
+
|
|
260
|
+
### 2. API reverse proxy (active capture)
|
|
261
|
+
|
|
262
|
+
A localhost HTTP server (`127.0.0.1:8820`) you point your AI client
|
|
263
|
+
at via `ANTHROPIC_BASE_URL` / `OPENAI_BASE_URL`. Intercepts every
|
|
264
|
+
request before it hits Anthropic / OpenAI and:
|
|
265
|
+
|
|
266
|
+
- Records exact token usage from the upstream `usage` envelope (the
|
|
267
|
+
most accurate cost source — proxy beats JSONL parsing here).
|
|
268
|
+
- Runs the **conversation compression pipeline** to trim large
|
|
269
|
+
`tool_result` blocks and drop low-importance messages before
|
|
270
|
+
forwarding upstream.
|
|
271
|
+
- Captures the `cost_usd` the upstream reports (when present).
|
|
272
|
+
|
|
273
|
+
What this gets you: ground-truth cost numbers and conversation
|
|
274
|
+
compression savings you can measure.
|
|
275
|
+
|
|
276
|
+
### 3. SQLite store
|
|
277
|
+
|
|
278
|
+
A single file at `~/.observer/observer.db`. Tables include:
|
|
279
|
+
|
|
280
|
+
- `projects`, `sessions`, `actions` — the taxonomy
|
|
281
|
+
- `api_turns` — one row per proxy-intercepted upstream request
|
|
282
|
+
- `token_usage` — JSONL-derived token-row events (deduped via spec §A1)
|
|
283
|
+
- `file_state` — content hashes for freshness classification
|
|
284
|
+
- `compression_events` — per-event compression detail (post-migration 010)
|
|
285
|
+
- `project_patterns` — derived patterns from `observer patterns`
|
|
286
|
+
- `failure_context`, `action_excerpts` — diagnostic data
|
|
287
|
+
|
|
288
|
+
Pure-Go via `modernc.org/sqlite`, no CGO. WAL mode by default.
|
|
289
|
+
|
|
290
|
+
### 4. Local dashboard (`:8081`)
|
|
291
|
+
|
|
292
|
+
Eight tabs covering: Overview, Cost, Sessions, Actions, Tools,
|
|
293
|
+
Compression, Discovery, Patterns. See [Dashboard tour](#dashboard-tour).
|
|
294
|
+
|
|
295
|
+
Static HTML + Chart.js. No analytics, no external requests.
|
|
296
|
+
|
|
297
|
+
### 5. MCP server (stdio) — opt-in via `observer init`
|
|
298
|
+
|
|
299
|
+
13 read-only tools the AI client itself can call mid-conversation —
|
|
300
|
+
`check_file_freshness`, `get_last_test_result`, `search_past_outputs`,
|
|
301
|
+
etc. (plus `retrieve_stashed` when the proxy stash is configured).
|
|
302
|
+
Powers cross-client tool sharing: if Claude Code ran `go test`,
|
|
303
|
+
Cursor's MCP query for the latest test result will return Claude
|
|
304
|
+
Code's run. See [MCP tools reference](#mcp-tools-reference).
|
|
305
|
+
|
|
306
|
+
**Lifecycle**: the MCP server is a stdio subprocess spawned by your
|
|
307
|
+
AI tool — not by the observer daemon. It's registered into each AI
|
|
308
|
+
client's MCP config only when you run `observer init`. `observer
|
|
309
|
+
start` alone does NOT register the MCP server. Adds roughly 1,800
|
|
310
|
+
tokens of tool-schema overhead per AI-client turn; opt out with
|
|
311
|
+
`observer init --skip-mcp` (registers hooks only) or by simply not
|
|
312
|
+
running `init`.
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
## Dashboard tour
|
|
316
|
+
|
|
317
|
+
Open `http://127.0.0.1:8081/` after `observer start`. Ten tabs.
|
|
318
|
+
|
|
319
|
+
### Overview tab
|
|
320
|
+
|
|
321
|
+
<p align="center">
|
|
322
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/01-overview.png" alt="Overview tab" width="900">
|
|
323
|
+
</p>
|
|
324
|
+
|
|
325
|
+
High-level snapshot of the selected window:
|
|
326
|
+
|
|
327
|
+
- **KPI tiles**: Sessions count, API turns (proxy-captured), Token
|
|
328
|
+
rows (JSONL-recovered), Failures (24h)
|
|
329
|
+
- **Cost over time** chart — daily token volume, split into the four
|
|
330
|
+
billable buckets (net input / cache read / cache write / output)
|
|
331
|
+
- **Actions over time** chart — total actions vs failures
|
|
332
|
+
- **Top models (by tokens)** chart — top-8 models stacked by net
|
|
333
|
+
input / cache read / output
|
|
334
|
+
- **Top tools (actions over time)** — per-AI-client stacked-area
|
|
335
|
+
showing when each client is active
|
|
336
|
+
|
|
337
|
+
### Cost tab
|
|
338
|
+
|
|
339
|
+
<p align="center">
|
|
340
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/04-cost.png" alt="Cost tab" width="900">
|
|
341
|
+
</p>
|
|
342
|
+
|
|
343
|
+
Per-model breakdown over the selected window. Tokens split into the
|
|
344
|
+
four billing buckets, with computed dollar cost and a reliability
|
|
345
|
+
flag. Cost is always computed locally as `tokens × pricing_table[model]`
|
|
346
|
+
— neither Anthropic nor OpenAI returns cost in their API responses, so
|
|
347
|
+
the proxy can't capture upstream-billed cost. Reliability values:
|
|
348
|
+
`accurate` (proxy-captured tokens, exact rate), `approximate` (JSONL-
|
|
349
|
+
sourced tokens, rate may be a family-prefix fallback), `unreliable`
|
|
350
|
+
(Claude Code JSONL streaming placeholders, ~10% off output), `unknown`
|
|
351
|
+
(no pricing entry for the model). Two adapters — OpenCode and Pi —
|
|
352
|
+
write their own per-turn cost into `estimated_cost_usd`; the engine
|
|
353
|
+
uses those as-is when present. See
|
|
354
|
+
`docs/pricing-reference.md` for the rate sheet.
|
|
355
|
+
|
|
356
|
+
Hover any column header for tooltip; click for the full definition
|
|
357
|
+
in the help drawer.
|
|
358
|
+
|
|
359
|
+
### Analysis tab
|
|
360
|
+
|
|
361
|
+
<p align="center">
|
|
362
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/05-analysis.png" alt="Analysis tab" width="900">
|
|
363
|
+
</p>
|
|
364
|
+
|
|
365
|
+
Spending insights for the selected window. Twelve headline KPI
|
|
366
|
+
tiles comparing this period to prior: spend Δ%, MTD vs budget with
|
|
367
|
+
projection bar, $/M output rate, cache savings + cache efficacy %,
|
|
368
|
+
high-context turn count, $/turn, burn rate ($/active hour), top
|
|
369
|
+
model concentration %, Discovery waste $, sessions total. Below
|
|
370
|
+
the tiles: a daily-spend stacked bar with Model / Project / Tool
|
|
371
|
+
dimension toggle, hour-of-day heatmap, top-12 expensive sessions
|
|
372
|
+
with explanatory badges (`opus`, `lc_tier`, `many_turns`,
|
|
373
|
+
`large_prompt`), period-over-period movers (top increases / decreases
|
|
374
|
+
/ new entrants), and routing-efficiency suggestions (trivial Opus
|
|
375
|
+
sessions that could have used Sonnet).
|
|
376
|
+
|
|
377
|
+
### Sessions tab
|
|
378
|
+
|
|
379
|
+
<p align="center">
|
|
380
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/02-sessions.png" alt="Sessions tab" width="900">
|
|
381
|
+
</p>
|
|
382
|
+
|
|
383
|
+
One row per AI-coding session. Each session has a stable ID, a tool
|
|
384
|
+
(claude-code / cursor / codex / cline / copilot / opencode / openclaw /
|
|
385
|
+
pi / antigravity / gemini-cli), a working-directory project, action
|
|
386
|
+
count, sub-agent action count (when the session spawned sub-agents via
|
|
387
|
+
the `Agent` tool), per-session **Tokens** and **Cost** columns, and —
|
|
388
|
+
if `observer score` has run — quality / errors / redundancy ratios. The `~` suffix on Cost flags rows whose pricing
|
|
389
|
+
was tier-fallback rather than billing-grade ("accurate" reliability).
|
|
390
|
+
|
|
391
|
+
Click a row to open the session-detail panel:
|
|
392
|
+
|
|
393
|
+
<p align="center">
|
|
394
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/10-session-detail.png" alt="Session detail slide-over" width="900">
|
|
395
|
+
</p>
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
- **Top tiles** — Tool, Started, Actions count (ok/fail), Cost.
|
|
399
|
+
- **Action breakdown** — chart of action_type counts.
|
|
400
|
+
- **Tokens** — the four billing buckets (Net Input / Cache Read /
|
|
401
|
+
Cache Write / Output) for the whole session, per-turn-deduped
|
|
402
|
+
(proxy preferred, JSONL fills gaps).
|
|
403
|
+
- **Per-model breakdown** — when a session uses multiple models
|
|
404
|
+
(Claude Code's main + sub-agent dispatches always do), a row per
|
|
405
|
+
model with its tokens and cost.
|
|
406
|
+
- **Messages** — per-message timeline keyed on the upstream
|
|
407
|
+
Anthropic `msg_xxx`. Each row shows the message id, role, model,
|
|
408
|
+
the message's own token bucket, cost, and a `N ▾` pill that
|
|
409
|
+
expands inline to show the contained tool calls. Toggle radio at
|
|
410
|
+
the top: **Tool messages only** (default — assistant turns with
|
|
411
|
+
≥1 tool call + user prompts) vs **All messages** (also pure-text
|
|
412
|
+
assistant replies). Truncated IDs (session_id, message_id)
|
|
413
|
+
show a dotted underline on hover and copy the full value to
|
|
414
|
+
clipboard on click; truncated text fields (target, error message)
|
|
415
|
+
click to expand in-place. Server-side paginated at 50/100/200
|
|
416
|
+
messages per page (selectable in the panel footer) — keeps the
|
|
417
|
+
browser responsive on multi-thousand-message sessions. Requires
|
|
418
|
+
`observer backfill --message-id` on first upgrade for historical
|
|
419
|
+
sessions to surface their parent message ids.
|
|
420
|
+
|
|
421
|
+
### Actions tab
|
|
422
|
+
|
|
423
|
+
<p align="center">
|
|
424
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/03-actions.png" alt="Actions tab" width="900">
|
|
425
|
+
</p>
|
|
426
|
+
|
|
427
|
+
The flat firehose: every recorded tool call, normalized across
|
|
428
|
+
adapters. Filter by action type (`read_file`, `write_file`,
|
|
429
|
+
`run_command`, `spawn_subagent`, `todo_update`, `mcp_call`, …).
|
|
430
|
+
Pagination caps at 50 rows per page; total count is shown next to
|
|
431
|
+
the heading.
|
|
432
|
+
|
|
433
|
+
### Tools tab
|
|
434
|
+
|
|
435
|
+
<p align="center">
|
|
436
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/06-tools.png" alt="Tools tab" width="900">
|
|
437
|
+
</p>
|
|
438
|
+
|
|
439
|
+
Per-AI-client (the *client* — claude-code / cursor / codex / etc., not
|
|
440
|
+
the per-tool name) aggregates plus three views:
|
|
441
|
+
|
|
442
|
+
- **KPI tiles**: Total actions, Distinct tools, Overall success rate,
|
|
443
|
+
Busiest tool
|
|
444
|
+
- **Activity over time** stacked-area showing per-tool action volume
|
|
445
|
+
per day
|
|
446
|
+
- **Action-type mix per tool** horizontal stacked bar — what each
|
|
447
|
+
tool actually does (read_file vs edit_file vs run_command vs
|
|
448
|
+
search_text vs spawn_subagent)
|
|
449
|
+
- The full per-tool aggregate table with first/last seen
|
|
450
|
+
|
|
451
|
+
### Compression tab
|
|
452
|
+
|
|
453
|
+
<p align="center">
|
|
454
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/07-compression.png" alt="Compression tab" width="900">
|
|
455
|
+
</p>
|
|
456
|
+
|
|
457
|
+
How many tokens and dollars the conversation-compression pipeline
|
|
458
|
+
saved by trimming requests before forwarding upstream:
|
|
459
|
+
|
|
460
|
+
- **KPI tiles**: Tokens saved (est.), Dollars saved (est.), Bytes
|
|
461
|
+
saved, Turns compressed
|
|
462
|
+
- **Savings per day** chart — daily tokens-saved (left axis) and
|
|
463
|
+
bytes-saved (right axis)
|
|
464
|
+
- **Savings by mechanism** stacked bar — segments per mechanism
|
|
465
|
+
(json / code / logs / text / diff / html / drop). **Toggle the
|
|
466
|
+
y-axis between tokens and bytes** with the chart-header switch.
|
|
467
|
+
- **Per-model breakdown table** — tokens saved ~, $ saved ~, bytes
|
|
468
|
+
saved, saved %, turns, tool-results compressed, dropped, markers
|
|
469
|
+
- **Recent compression events** — paginated per-event detail with
|
|
470
|
+
mechanism, original / compressed / saved bytes, message slot, importance
|
|
471
|
+
score (for drops), and a **Source** column showing whether the
|
|
472
|
+
event came from a main-thread or sub-agent runtime call
|
|
473
|
+
|
|
474
|
+
### Discovery tab
|
|
475
|
+
|
|
476
|
+
<p align="center">
|
|
477
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/08-discovery.png" alt="Discovery tab" width="900">
|
|
478
|
+
</p>
|
|
479
|
+
|
|
480
|
+
Wasted-effort signals:
|
|
481
|
+
|
|
482
|
+
- **Stale rereads** — files re-read after they changed inside the
|
|
483
|
+
same session. KPI tiles show count, ~Tokens wasted, ~$ wasted (at
|
|
484
|
+
your blended input rate), affected files. The **CROSS-THREAD**
|
|
485
|
+
column flags re-reads that crossed the parent ↔ sub-agent
|
|
486
|
+
boundary — these are the "pass content via Agent's prompt
|
|
487
|
+
parameter" candidates.
|
|
488
|
+
- **Repeated commands** — commands run multiple times with no
|
|
489
|
+
relevant inputs changed in between (e.g. you ran `go test` three
|
|
490
|
+
times without editing anything between runs).
|
|
491
|
+
- **Cross-tool overlap** — files touched by ≥2 AI clients in the
|
|
492
|
+
window (e.g. claude-code AND cursor both edited `auth.ts`). This
|
|
493
|
+
is the visible side of cross-platform tool-call sharing via the
|
|
494
|
+
MCP server.
|
|
495
|
+
|
|
496
|
+
### Patterns tab
|
|
497
|
+
|
|
498
|
+
Repeatable behaviours the observer noticed across your sessions —
|
|
499
|
+
"after running `go test`, you almost always run `go vet`", "when
|
|
500
|
+
working on `auth.ts`, you also touch `login.tsx`", etc. Each pattern
|
|
501
|
+
has a confidence score (decay-weighted: more observations + recent
|
|
502
|
+
observations push it higher) and an observation count.
|
|
503
|
+
|
|
504
|
+
`observer patterns` derives them; `observer suggest` writes the
|
|
505
|
+
high-confidence ones into `CLAUDE.md` / `AGENTS.md` / `.cursorrules`
|
|
506
|
+
so new sessions inherit your habits.
|
|
507
|
+
|
|
508
|
+
### Settings tab
|
|
509
|
+
|
|
510
|
+
<p align="center">
|
|
511
|
+
<img src="https://github.com/marmutapp/superbased-observer/raw/main/docs/assets/screenshots/09-settings.png" alt="Settings tab" width="900">
|
|
512
|
+
</p>
|
|
513
|
+
|
|
514
|
+
Fully editable visual editor for everything in `config.toml`.
|
|
515
|
+
Pricing overrides hot-reload (no daemon restart — `cost.Engine`
|
|
516
|
+
swaps the pricing table atomically via `atomic.Pointer.Store`).
|
|
517
|
+
The Backfill panel surfaces every `observer backfill` mode as
|
|
518
|
+
click-to-run buttons that spawn the CLI as a child process and
|
|
519
|
+
stream output back live. Watcher / Freshness / Retention / Hooks /
|
|
520
|
+
Proxy / Compression / Intelligence sections are schema-driven forms
|
|
521
|
+
with inline help; a "Restart daemon" banner appears whenever a
|
|
522
|
+
section is saved that consumers bind at startup.
|
|
523
|
+
|
|
524
|
+
### Help drawer
|
|
525
|
+
|
|
526
|
+
Press `?` anywhere on the dashboard or click the **? Help** button in
|
|
527
|
+
the topbar. Every column header, KPI tile, chart label, and filter
|
|
528
|
+
control on every tab is annotated — hover any element to see a
|
|
529
|
+
one-liner tooltip; click to open the drawer at the matching glossary
|
|
530
|
+
entry.
|
|
531
|
+
|
|
532
|
+
The drawer has full descriptions, formulas, data sources, examples,
|
|
533
|
+
"why it matters", "what to do", and cross-links. Search at the top.
|
|
534
|
+
Deep-linkable via URL fragment — `#help/metric.stale_count` opens
|
|
535
|
+
the drawer at that entry.
|
|
536
|
+
|
|
537
|
+
Each compression mechanism (json / code / logs / text / diff / html
|
|
538
|
+
/ drop) has a "Full methodology · see more" expandable section
|
|
539
|
+
explaining the actual algorithm.
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
## MCP tools reference
|
|
543
|
+
|
|
544
|
+
Once you've run `observer init` (it's opt-in — `observer start`
|
|
545
|
+
alone does NOT register the MCP server), every connected AI client
|
|
546
|
+
gets these 13 tools registered as an MCP server — plus
|
|
547
|
+
`retrieve_stashed` when the proxy stash is configured. They're
|
|
548
|
+
**read-only** queries against the unified database, so any agent
|
|
549
|
+
can read any other agent's recorded work — true cross-platform
|
|
550
|
+
tool-call sharing.
|
|
551
|
+
|
|
552
|
+
| Tool | Purpose |
|
|
553
|
+
|-----------------------------------|---------|
|
|
554
|
+
| `check_file_freshness` | Has this file been read in the current session? Has it changed since? |
|
|
555
|
+
| `get_file_history` | Full read/edit history of a file across all sessions and clients |
|
|
556
|
+
| `get_session_summary` | Roll-up stats for a session: action count, success rate, cost, token buckets |
|
|
557
|
+
| `search_past_outputs` | Full-text search across recorded tool outputs (FTS5-indexed) |
|
|
558
|
+
| `get_last_test_result` | Most recent `go test` / `npm test` / `pytest` etc. output |
|
|
559
|
+
| `get_failure_context` | Recent failures: which command, which file, which session |
|
|
560
|
+
| `get_action_details` | One specific action's full record (target, args, output excerpt) |
|
|
561
|
+
| `check_command_freshness` | Has this command been run in the current session? With what result? |
|
|
562
|
+
| `get_session_recovery_context` | Recent activity for resuming a paused session |
|
|
563
|
+
| `get_project_patterns` | High-confidence patterns derived from this project's history |
|
|
564
|
+
| `get_cost_summary` | Daily / per-model / per-session cost rollups |
|
|
565
|
+
| `get_redundancy_report` | Stale rereads, repeated commands, cross-tool overlap for the project |
|
|
566
|
+
| `list_actions_around` | ±N actions adjacent to a pivot `action_id` — browse a session's local timeline cheaply |
|
|
567
|
+
| `retrieve_stashed` _(conditional)_| Pulls original bytes of a tool_result the proxy compressed away. Only registered when `[compression.conversation].stash` is configured. |
|
|
568
|
+
|
|
569
|
+
**Cost trade-off**: with the MCP registered, the AI client sends the
|
|
570
|
+
full tool-schema payload (~7.2 KB / ~1,800 tokens) in its system
|
|
571
|
+
context on every turn — whether or not the model actually invokes
|
|
572
|
+
any tool. To eliminate this overhead, run `observer init --skip-mcp`
|
|
573
|
+
(registers hooks only) or skip `init` entirely.
|
|
574
|
+
|
|
575
|
+
**Cross-tool sharing**: when `observer init` registers the MCP server
|
|
576
|
+
with Claude Code AND Cursor (and Codex…), all of them call the same
|
|
577
|
+
tools against the same database. Cursor's `get_last_test_result`
|
|
578
|
+
returns Claude Code's last test run; Codex's `check_file_freshness`
|
|
579
|
+
reflects edits made by Cursor.
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
## Compression mechanisms
|
|
583
|
+
|
|
584
|
+
The conversation-compression pipeline runs inside the proxy on every
|
|
585
|
+
upstream request. It tries to fit the request body within
|
|
586
|
+
`target_ratio × original_bytes` (default 0.85) without breaking the
|
|
587
|
+
conversation's referential integrity.
|
|
588
|
+
|
|
589
|
+
Two passes, in order:
|
|
590
|
+
|
|
591
|
+
### Pass 1 — per-content-type compression
|
|
592
|
+
|
|
593
|
+
Each `tool_result` block is sniffed for content type, then routed to
|
|
594
|
+
a content-aware compressor. Six compressors:
|
|
595
|
+
|
|
596
|
+
| Mechanism | What it does | When it fires |
|
|
597
|
+
|-----------|--------------|---------------|
|
|
598
|
+
| **json** | Replaces every scalar value with a type sentinel (`"<string>"`, `"<number>"`, …) preserving structure (keys, arrays, nesting). Arrays of length > 1 collapse to one element with `_len: N`. | API responses, structured logs, telemetry exports. JSON tool_results are usually the biggest savings target. |
|
|
599
|
+
| **code** | Heuristic skeleton: keeps top-of-file imports + signature lines (function / method / class / struct / interface / type), drops bodies. | Source code files. **On by default** as of v1.7.23 for the `claude-code` recipe (V7-24 empirical winner). |
|
|
600
|
+
| **logs** | Two-pass: collapses adjacent identical lines to `<line> [×N]`, then head+tail-truncates to 200 lines if still long. | Log-shaped output — `go test ./...`, `npm run build`, polling/retry loops. Lossless on distinct lines; only the truncate pass is lossy. |
|
|
601
|
+
| **text** | Catch-all for content not classified as code/json/logs/diff/html. Head+tail truncation: keeps 40 + 40 lines on inputs over 80 lines. | Markdown bodies, README excerpts, narrative descriptions. |
|
|
602
|
+
| **diff** | Strips unified-diff context beyond ±1 line of each change. Keeps every header, every `+`/`-` line, drops the rest with elision markers. | `git diff`, patch tool outputs. Lossless on changes; lossy only on the cheap-to-rebuild context. |
|
|
603
|
+
| **html** | Three regex passes: strips `<script>`, `<style>`, and HTML comments. Tag attributes + visible text + structural elements survive. | `web_fetch` results pulling whole HTML pages — usually 80%+ scripts/styles. |
|
|
604
|
+
|
|
605
|
+
### Pass 2 — drop with marker
|
|
606
|
+
|
|
607
|
+
If Pass 1's compressed body is still over budget, the budget enforcer
|
|
608
|
+
ranks remaining messages by **importance score** (a deterministic
|
|
609
|
+
weighted sum) and drops the lowest-scored non-preserved ones until
|
|
610
|
+
the budget is met. Each dropped message is replaced by a single
|
|
611
|
+
marker block (a placeholder text) so the conversation flow stays
|
|
612
|
+
intact for the model.
|
|
613
|
+
|
|
614
|
+
**Importance score** = `0.4 × recency + 0.3 × reference + 0.15 × density + 0.15 × role`
|
|
615
|
+
|
|
616
|
+
- **Recency** = `(i+1) / n` — newest message scores 1.0, oldest scores 1/n
|
|
617
|
+
- **Reference** = `1.0` if any of the message's `tool_use_ids` is
|
|
618
|
+
cited by a later `tool_result`, OR any of its `referenced_ids` points
|
|
619
|
+
to a live tool_use; `0.0` otherwise. Tool-pair-live messages always
|
|
620
|
+
get full weight regardless of position.
|
|
621
|
+
- **Density** = fraction of non-whitespace runes (whitespace-padded
|
|
622
|
+
outputs get dropped first)
|
|
623
|
+
- **Role** = `system 1.0`, `user 0.9`, `assistant 0.7`, `tool 0.5`
|
|
624
|
+
(tool outputs are the most-compressible by policy)
|
|
625
|
+
|
|
626
|
+
**Preserved messages** (never droppable):
|
|
627
|
+
- The last `PreserveLastN` messages (default 4)
|
|
628
|
+
- Any `system` role message
|
|
629
|
+
- **Tool-pair-live messages**: any message whose `tool_use_id` is
|
|
630
|
+
referenced by a later tool_result (parent side), AND any
|
|
631
|
+
`tool_result` message whose `referenced_id` points to a live
|
|
632
|
+
tool_use (consumer side)
|
|
633
|
+
|
|
634
|
+
Tool-pair preservation is symmetric — dropping either side leaves an
|
|
635
|
+
orphan that Anthropic rejects with 400.
|
|
636
|
+
|
|
637
|
+
### Per-event detail
|
|
638
|
+
|
|
639
|
+
Every drop and every per-type compression is recorded as a row in
|
|
640
|
+
the `compression_events` table (post migration 010). The Compression
|
|
641
|
+
tab's "Recent compression events" view surfaces these with
|
|
642
|
+
mechanism, original / compressed / saved bytes, message slot, and
|
|
643
|
+
importance score (for drops).
|
|
644
|
+
|
|
645
|
+
### Tuning
|
|
646
|
+
|
|
647
|
+
`observer config` settings (in `~/.observer/config.toml`). These are the only
|
|
648
|
+
keys the conversation compressor reads:
|
|
649
|
+
|
|
650
|
+
```toml
|
|
651
|
+
[compression.conversation]
|
|
652
|
+
enabled = true
|
|
653
|
+
mode = "cache_aware" # "token" | "cache" | "cache_aware" — default cache_aware; see matrix below
|
|
654
|
+
target_ratio = 0.85
|
|
655
|
+
preserve_last_n = 5 # never drop the most recent N messages
|
|
656
|
+
compress_types = ["json", "logs", "code"] # default; add "text", "diff", "html" to opt in
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
High importance scores on dropped events (≥0.5) suggest the threshold
|
|
660
|
+
is too aggressive — raise `target_ratio` (e.g. 0.9 or 0.95).
|
|
661
|
+
|
|
662
|
+
### Choosing a mode: Anthropic vs Codex
|
|
663
|
+
|
|
664
|
+
Per-type `tool_result` compression runs in every mode; `mode` only changes how
|
|
665
|
+
messages are dropped and whether an Anthropic `cache_control` marker is injected.
|
|
666
|
+
|
|
667
|
+
| `mode` | What it does | Claude Code (Anthropic) | Codex / OpenAI |
|
|
668
|
+
|---|---|---|---|
|
|
669
|
+
| `token` | Per-type compress, then drop lowest-scored messages to hit `target_ratio`. | ✅ Works. | ✅ Clearest choice for Codex/OpenAI. |
|
|
670
|
+
| `cache` | Restrict drops to the tail half + inject a `cache_control` marker at the prefix boundary. | ✅ Anthropic-specific. | ⚠️ No effect beyond `token`. |
|
|
671
|
+
| `cache_aware` *(default)* | Skip drops, narrow compression to `tool_result` blocks, no marker; keep history byte-stable across turns so Anthropic's prefix cache keeps hitting. | ✅ **Recommended for Anthropic Pro/Max** — and the shipped default. | ⚠️ No effect beyond `token`. |
|
|
672
|
+
|
|
673
|
+
The shipped default is `cache_aware` (`token` is just the internal fallback when
|
|
674
|
+
`mode` is empty). The cache modes exist for **Anthropic's content-hash prefix
|
|
675
|
+
cache** (`cache_control` is an Anthropic Messages API concept). OpenAI/Codex
|
|
676
|
+
prompt caching is **automatic and server-side** — nothing to mark or tune, so
|
|
677
|
+
the proxy's OpenAI path is mode-agnostic (the default `cache_aware` behaves like
|
|
678
|
+
`token` there). So: keep `cache_aware` for Claude Code; `mode = "token"` reads
|
|
679
|
+
honestly for a Codex/OpenAI-only setup.
|
|
680
|
+
|
|
681
|
+
Beyond the keys above, three opt-in sub-features have their own tables —
|
|
682
|
+
`[compression.conversation.stash]` (Compressed-Content Retrieval),
|
|
683
|
+
`[compression.conversation.rolling]` (rolling summarisation, with a per-provider
|
|
684
|
+
summary model: `summary_model` for Anthropic, `openai_summary_model` for
|
|
685
|
+
OpenAI/Codex), and `[compression.conversation.compaction]`. The full knob
|
|
686
|
+
reference lives in `docs/compression-modes.md`.
|
|
687
|
+
|
|
688
|
+
### Measured savings (v1.7.23)
|
|
689
|
+
|
|
690
|
+
We A/B every shipped recipe against an OFF baseline on a real refactor
|
|
691
|
+
workload (`lumen` TypeScript codebase, 408-line Zustand store →
|
|
692
|
+
4 domain sub-stores) on the v1.7.22 binary tip. The numbers below are
|
|
693
|
+
the **most recent statistically-meaningful measurements**.
|
|
694
|
+
|
|
695
|
+
**Pick a recipe based on which model you're running:**
|
|
696
|
+
|
|
697
|
+
| Recipe | Use when your model is… | Workload | n | Δ vs OFF (mean cost) | What's compressed |
|
|
698
|
+
|---|---|---|---|---|---|
|
|
699
|
+
| **`claude-code`** *(default)* | Any Anthropic Claude model — `claude-sonnet-4-6`, `claude-opus-4-7`, `claude-haiku-4-5`, … | Refactor, Claude Sonnet 4.6 via Claude Code 2.1.158 | n=8 B vs n=4 OFF | **−6.9%** (CV 7.6%; tighter than OFF's 7.5%) | json + logs + code bodies; cache-aware; stash disabled |
|
|
700
|
+
| **`codex-variant`** | OpenAI's `-codex` reasoning fork — `gpt-5.3-codex`, `gpt-5.4-codex`, `gpt-5-codex-agent`, anything matching `*-codex*` | Refactor, gpt-5.3-codex | n=10 B vs n=10 OFF | **−10%** ($0.270 vs $0.300) | Tools-defs trim; cache-aware; no per-type compression |
|
|
701
|
+
| **`codex-safe`** | Plain OpenAI GPT under the codex CLI — `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.5`, `gpt-4o`, any non-`-codex` | Refactor, gpt-5.4 + `apply_patch` | n=3 B vs n=4 OFF | not statistically distinguishable on this workload | logs only; cache-aware |
|
|
702
|
+
|
|
703
|
+
The word **"variant"** in `codex-variant` refers to the *model variant* (the `-codex` reasoning fork of GPT), NOT a variant of the codex CLI. Both codex recipes are for the codex CLI; they differ only in which model family they assume. `codex-safe` is so named because plain GPT models tolerate logs trimming safely — it's not "safer than codex-variant."
|
|
704
|
+
|
|
705
|
+
**Honest caveats:**
|
|
706
|
+
|
|
707
|
+
- **Workload-dependent.** The `codex-safe` row on gpt-5.4 was
|
|
708
|
+
inconclusive because the test workload used `apply_patch` (classified
|
|
709
|
+
as `code`, not `logs`) so `compress_types=["logs"]` never fired —
|
|
710
|
+
the proxy was a functional no-op and the cost variance was session
|
|
711
|
+
noise. A Bash-heavy workload would tell a different story.
|
|
712
|
+
- **`claude-code` requires `ENABLE_TOOL_SEARCH=true`** in your shell.
|
|
713
|
+
Without it, Claude Code's SDK disables ToolSearch under
|
|
714
|
+
`ANTHROPIC_BASE_URL` and eager-inlines all MCP schemas (~+21K
|
|
715
|
+
tokens per turn). The proxy then becomes a net loss instead of
|
|
716
|
+
the −6.9% above. Setup steps cover this; verify with
|
|
717
|
+
`printenv ENABLE_TOOL_SEARCH`.
|
|
718
|
+
- **`stash` stays disabled by default for Anthropic** (V7-25 finding:
|
|
719
|
+
+25% cost on n=1 due to prefix-cache miss; stash markers break
|
|
720
|
+
Anthropic's content-hash cache). Operators can opt in for a measured
|
|
721
|
+
workload but should A/B their own.
|
|
722
|
+
- **Historic claims of higher savings are retracted.** The v1.4.38
|
|
723
|
+
release notes cited −14.8%; the project itself walked that back
|
|
724
|
+
after a deeper repro showed it was within noise. The numbers above
|
|
725
|
+
are the post-retraction floor.
|
|
726
|
+
|
|
727
|
+
**Reproduce it yourself:**
|
|
728
|
+
|
|
729
|
+
```bash
|
|
730
|
+
# Full methodology, raw arm data, per-arm cost rows, and a reproducer
|
|
731
|
+
# script live in this repo:
|
|
732
|
+
docs/v1.7.23-compression-savings-empirical-2026-06-01.md
|
|
733
|
+
```
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
## Cost and token math
|
|
737
|
+
|
|
738
|
+
Anthropic's usage envelope reports four token buckets per request,
|
|
739
|
+
each at a different rate:
|
|
740
|
+
|
|
741
|
+
| Bucket | What it is | Bills at |
|
|
742
|
+
|--------------------|------------------------------------------------------------------------------|-----------------------------|
|
|
743
|
+
| `net_input` | Fresh prompt tokens not served from cache | model's standard input rate |
|
|
744
|
+
| `cache_read` | Prompt tokens served from Anthropic's ephemeral cache | ~10% of input rate |
|
|
745
|
+
| `cache_creation` | Tokens written to ephemeral cache | 1.25× input (5m tier) or 2× input (1h tier) |
|
|
746
|
+
| `output` | What the model generated | typically 5× input rate |
|
|
747
|
+
|
|
748
|
+
`prompt_context = net_input + cache_read + cache_creation`
|
|
749
|
+
`total_tokens = prompt_context + output`
|
|
750
|
+
|
|
751
|
+
The cost engine (`internal/intelligence/cost`) computes USD via:
|
|
752
|
+
|
|
753
|
+
```
|
|
754
|
+
cost_usd = (net_input × p.input + cache_read × p.cache_read +
|
|
755
|
+
cache_creation_5m × p.cache_creation +
|
|
756
|
+
cache_creation_1h × p.cache_creation_1h +
|
|
757
|
+
output × p.output) ÷ 1,000,000
|
|
758
|
+
```
|
|
759
|
+
|
|
760
|
+
When the upstream API returns `cost_usd` in the response envelope
|
|
761
|
+
(proxy-sourced rows), that value is preferred over the computed one —
|
|
762
|
+
ground truth, reliability=high.
|
|
763
|
+
|
|
764
|
+
### Blended input rate
|
|
765
|
+
|
|
766
|
+
The Discovery tab's "~$ wasted" tile uses a **blended input rate**
|
|
767
|
+
computed from your last-30d api_turns mix: each model's input rate
|
|
768
|
+
weighted by the prompt-token volume it consumed. Example: if you
|
|
769
|
+
spent 70% of prompt tokens on opus-4-7 ($15/1M) and 30% on
|
|
770
|
+
haiku-4-5 ($1/1M), the blended rate is `0.7 × 15 + 0.3 × 1 = $10.80/1M`.
|
|
771
|
+
|
|
772
|
+
Falls back to $3/1M (claude-sonnet-4 input) on fresh installs with
|
|
773
|
+
no proxy data.
|
|
774
|
+
|
|
775
|
+
### JSONL dedup
|
|
776
|
+
|
|
777
|
+
When the proxy isn't engaged, observer falls back to parsing the AI
|
|
778
|
+
client's on-disk session log. Clients echo the same cumulative usage
|
|
779
|
+
on every content block of a multi-block response, so naive parsing
|
|
780
|
+
counts one API call 2-4×. Two layers of dedup catch this:
|
|
781
|
+
|
|
782
|
+
1. **Adapter-level**: dedupes on Anthropic `message.id` at write time
|
|
783
|
+
2. **Cost-engine-level**: dedupes on `(source_file, model, timestamp-bucketed-to-minute, tokens)` at read time
|
|
784
|
+
|
|
785
|
+
Migration 007 ran a one-time pass collapsing pre-fix duplicates.
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
## Terminology and glossary
|
|
789
|
+
|
|
790
|
+
Quick reference; the in-platform help drawer (press `?` on the
|
|
791
|
+
dashboard) has the full versions with cross-links.
|
|
792
|
+
|
|
793
|
+
- **Action** — one normalized tool call recorded by an adapter. Action
|
|
794
|
+
types are taxonomic and cross-client: `read_file`, `write_file`,
|
|
795
|
+
`edit_file`, `run_command`, `search_text`, `search_files`,
|
|
796
|
+
`web_search`, `web_fetch`, `mcp_call`, `spawn_subagent`,
|
|
797
|
+
`todo_update`, `ask_user`, `task_complete`, `user_prompt`,
|
|
798
|
+
`api_error`, `turn_aborted` (interrupted before completion —
|
|
799
|
+
distinct from task_complete/success=false; v1.4.22+),
|
|
800
|
+
`context_compacted` (upstream-emitted compaction marker, not
|
|
801
|
+
searchable like file edits; v1.4.22+), `system_prompt`
|
|
802
|
+
(system/developer/user-envelope content; v1.4.23+), `unknown`.
|
|
803
|
+
- **API turn** — one HTTP request captured by the local proxy. Records
|
|
804
|
+
one row in `api_turns` per request, with the upstream usage envelope
|
|
805
|
+
intact.
|
|
806
|
+
- **Cache 5m vs 1h tier** — Anthropic's prompt cache has two TTLs.
|
|
807
|
+
Default is 5 minutes; `cache_control: {type: ephemeral, ttl: 3600}`
|
|
808
|
+
extends to 1 hour at 2× the write cost. Reads bill the same rate
|
|
809
|
+
regardless of tier.
|
|
810
|
+
- **Compression event** — one individual compression decision (one
|
|
811
|
+
per-type compress, or one drop) recorded post migration 010.
|
|
812
|
+
- **Conversation compression** — pre-forward trimming of API request
|
|
813
|
+
bodies. Pass 1 = per-content-type compression, Pass 2 = drop with
|
|
814
|
+
marker. See [Compression mechanisms](#compression-mechanisms).
|
|
815
|
+
- **Cross-platform tool calling** — every AI client connected via
|
|
816
|
+
`observer init` can call the 12 MCP tools against the unified
|
|
817
|
+
database. So Cursor's `get_last_test_result` can return a `go test`
|
|
818
|
+
Claude Code ran an hour earlier.
|
|
819
|
+
- **Cross-thread reread** — the parent thread re-reads a file the
|
|
820
|
+
sub-agent already saw (or vice versa) within the same session. Fix:
|
|
821
|
+
pass content via the Agent tool's `prompt` parameter rather than
|
|
822
|
+
letting the child re-read.
|
|
823
|
+
- **Freshness state** — per-read tag from the freshness engine: `fresh`
|
|
824
|
+
(first read in this session, OR re-read with same content),
|
|
825
|
+
`stale` (re-read after change in same session), `missing` (file no
|
|
826
|
+
longer exists), `modified-elsewhere` (file changed by something
|
|
827
|
+
other than an observable AI action).
|
|
828
|
+
- **Mechanism** — one of `json`, `code`, `logs`, `text`, `diff`,
|
|
829
|
+
`html` (per-content-type compressor) or `drop` (low-importance
|
|
830
|
+
message replaced by a marker).
|
|
831
|
+
- **Pattern** — a derived behaviour: `command_pair` (X often
|
|
832
|
+
followed by Y), `cross_tool_file` (file touched by multiple
|
|
833
|
+
clients), `knowledge_snippet` (consistent topic-specific habit),
|
|
834
|
+
`failure_correlation` (X often precedes a failure of Y),
|
|
835
|
+
`session_summary`. Each has a decay-weighted confidence score 0-1.
|
|
836
|
+
- **Project** — working-directory root that owns sessions and
|
|
837
|
+
actions. Derived from cwd at session start; `/.git/worktrees/...`
|
|
838
|
+
paths fold back to the working-tree root.
|
|
839
|
+
- **Proxy vs JSONL** — proxy intercepts upstream HTTP calls (ground
|
|
840
|
+
truth, reliability=high). JSONL parses the AI client's on-disk
|
|
841
|
+
session log (works without configuring a base URL, but client
|
|
842
|
+
echoes cumulative usage on every block, requiring dedup —
|
|
843
|
+
reliability=unreliable for token counts on Claude Code).
|
|
844
|
+
- **Reliability** — cost-engine confidence: `high` (upstream-
|
|
845
|
+
reported), `medium` (computed from known pricing), `low` (some
|
|
846
|
+
buckets estimated), `unreliable` (no pricing entry).
|
|
847
|
+
- **Session** — one continuous AI-coding conversation in a single
|
|
848
|
+
tool, scoped to one working directory. Has a stable ID (Claude
|
|
849
|
+
Code's UUID, Codex's rollout ID, …).
|
|
850
|
+
- **Sidechain** — actions emitted inside a sub-agent runtime spawned
|
|
851
|
+
via the parent's `Agent` tool. Sub-agents share the parent's
|
|
852
|
+
session_id; the `is_sidechain` column distinguishes them. The
|
|
853
|
+
Discovery tab's CROSS-THREAD column counts stale rereads that
|
|
854
|
+
crossed this boundary.
|
|
855
|
+
- **Stale reread** — same-session re-read of a file whose content
|
|
856
|
+
changed between reads. Cross-session reads are excluded (a fresh
|
|
857
|
+
session has no memory of a prior session's read).
|
|
858
|
+
- **Tool** — in this dashboard, "tool" means the *AI client*
|
|
859
|
+
(claude-code, cursor, codex, cline, copilot, opencode, openclaw,
|
|
860
|
+
pi, antigravity, gemini-cli), not the per-tool name (`read_file`,
|
|
861
|
+
`run_command`). The latter is "Tool name" on the Actions tab.
|
|
862
|
+
- **Tool-pair integrity** — Anthropic requires every `tool_result`
|
|
863
|
+
block to have a corresponding `tool_use` block in a preceding
|
|
864
|
+
message. The compression pipeline preserves both sides of every
|
|
865
|
+
live pair to satisfy this constraint.
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
## CLI reference
|
|
869
|
+
|
|
870
|
+
Every command supports `--help` for the full surface.
|
|
871
|
+
|
|
872
|
+
| Subcommand | Purpose |
|
|
873
|
+
|------------------------------------|---------|
|
|
874
|
+
| `observer init` | Register hooks + MCP server with installed AI clients |
|
|
875
|
+
| `observer uninstall` | Reverse `observer init` |
|
|
876
|
+
| `observer start` | Run watcher + dashboard + proxy in one process (recommended). Flags: `--dashboard-addr ADDR` (default `127.0.0.1:8081`), `--no-dashboard` to skip the HTTP UI. |
|
|
877
|
+
| `observer watch` | Long-running JSONL watcher only |
|
|
878
|
+
| `observer dashboard --addr ADDR` | HTTP dashboard only |
|
|
879
|
+
| `observer proxy start` | Reverse proxy only |
|
|
880
|
+
| `observer scan` | One-shot ingest of existing JSONL files (catch-up after install) |
|
|
881
|
+
| `observer status` | DB stats + recent activity |
|
|
882
|
+
| `observer doctor` | Diagnostic — checks paths, schemas, hook registration |
|
|
883
|
+
| `observer tail` | Live tail of incoming events |
|
|
884
|
+
| `observer cost` | Per-model cost summary CLI |
|
|
885
|
+
| `observer score` | Compute quality_score / error_rate / redundancy_ratio for sessions |
|
|
886
|
+
| `observer discover` | Stale rereads + repeated commands report (CLI version of the Discovery tab) |
|
|
887
|
+
| `observer patterns` | Derive patterns from session history |
|
|
888
|
+
| `observer learn` | Adapter for ingesting external JSONL exports |
|
|
889
|
+
| `observer suggest` | Write high-confidence patterns into CLAUDE.md / AGENTS.md / .cursorrules |
|
|
890
|
+
| `observer summarize` | Roll-up summary across sessions |
|
|
891
|
+
| `observer export` | Export DB to xlsx / json |
|
|
892
|
+
| `observer prune` | Manual retention pass (delete old data) |
|
|
893
|
+
| `observer backfill --is-sidechain` | Re-walk JSONL to populate `actions.is_sidechain` (added by migration 010) on pre-migration rows. |
|
|
894
|
+
| `observer backfill --cache-tier` | Re-walk JSONL to populate `cache_creation_1h_tokens` (added by migration 008) on pre-migration rows. Run once after upgrading to v1.4.16+ to correct historical 1h-tier cache writes that were silently billed at the cheaper 5m rate. |
|
|
895
|
+
| `observer backfill --message-id` | Re-walk JSONL to populate `message_id` on `actions` and `token_usage` (added by migration 012). Required by the per-message timeline view in the Sessions modal. |
|
|
896
|
+
| `observer backfill --all` | Run every supported backfill in one invocation. Idempotent — safe to re-run. |
|
|
897
|
+
| `observer metrics` | Prometheus-format metrics endpoint |
|
|
898
|
+
| `observer serve` | MCP server (stdio JSON-RPC) — usually invoked by `observer init` registration |
|
|
899
|
+
| `observer tail` | Live event stream |
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
## Configuration
|
|
903
|
+
|
|
904
|
+
`~/.observer/config.toml` — created with defaults on first run.
|
|
905
|
+
|
|
906
|
+
```toml
|
|
907
|
+
[paths]
|
|
908
|
+
db_path = "~/.observer/observer.db"
|
|
909
|
+
log_dir = "~/.observer/logs"
|
|
910
|
+
|
|
911
|
+
[proxy]
|
|
912
|
+
listen_addr = "127.0.0.1"
|
|
913
|
+
port = 8820
|
|
914
|
+
anthropic_upstream = "https://api.anthropic.com"
|
|
915
|
+
openai_upstream = "https://api.openai.com"
|
|
916
|
+
|
|
917
|
+
[dashboard]
|
|
918
|
+
listen_addr = "127.0.0.1"
|
|
919
|
+
port = 8081
|
|
920
|
+
|
|
921
|
+
[compression.conversation]
|
|
922
|
+
enabled = false # opt-in; default off
|
|
923
|
+
mode = "cache_aware" # default; "token" | "cache" | "cache_aware" (see "Choosing a mode")
|
|
924
|
+
target_ratio = 0.85
|
|
925
|
+
preserve_last_n = 5
|
|
926
|
+
compress_types = ["json", "logs", "code"] # default; add "text"/"diff"/"html" to opt in
|
|
927
|
+
|
|
928
|
+
[compression.shell]
|
|
929
|
+
enabled = true
|
|
930
|
+
# per-command filters configured under [compression.shell.filters]
|
|
931
|
+
|
|
932
|
+
[retention]
|
|
933
|
+
prune_on_startup = true
|
|
934
|
+
max_actions = 5_000_000
|
|
935
|
+
max_age_days = 365
|
|
936
|
+
|
|
937
|
+
[pricing]
|
|
938
|
+
# Per-model overrides if the baked-in pricing is wrong for you.
|
|
939
|
+
# [pricing.models."claude-opus-4-7"]
|
|
940
|
+
# input = 15
|
|
941
|
+
# output = 75
|
|
942
|
+
# cache_read = 1.5
|
|
943
|
+
# cache_creation = 18.75
|
|
944
|
+
```
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
## Troubleshooting
|
|
948
|
+
|
|
949
|
+
### `pip install` fails with `error: externally-managed-environment`
|
|
950
|
+
|
|
951
|
+
Modern Linux distros (Debian 12+, Ubuntu 24.04+, Fedora 38+) mark
|
|
952
|
+
the system Python as PEP 668 "externally managed" — installing
|
|
953
|
+
into it would conflict with the OS package manager. Three fixes,
|
|
954
|
+
pick one:
|
|
955
|
+
|
|
956
|
+
```bash
|
|
957
|
+
# 1) RECOMMENDED — uv tool install (isolated env, fastest)
|
|
958
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
959
|
+
uv tool install superbased-observer
|
|
960
|
+
|
|
961
|
+
# 2) pipx (same isolation, pre-PEP-668 idiomatic)
|
|
962
|
+
pipx install superbased-observer
|
|
963
|
+
|
|
964
|
+
# 3) Plain pip into your user site
|
|
965
|
+
pip install --user superbased-observer
|
|
966
|
+
# Make sure ~/.local/bin is on $PATH:
|
|
967
|
+
echo $PATH | tr ':' '\n' | grep -F "$(python3 -m site --user-base)/bin"
|
|
968
|
+
```
|
|
969
|
+
|
|
970
|
+
`uv tool` and `pipx` create a dedicated virtualenv per tool so the
|
|
971
|
+
install never collides with another project. Recommended unless you
|
|
972
|
+
have a reason to share the global env.
|
|
973
|
+
|
|
974
|
+
### `observer: command not found` after install
|
|
975
|
+
|
|
976
|
+
The console-script entry point is wherever your installer dropped
|
|
977
|
+
it:
|
|
978
|
+
|
|
979
|
+
| Installer | Location |
|
|
980
|
+
|---|---|
|
|
981
|
+
| `pip install --user` | `$(python3 -m site --user-base)/bin/observer` (often `~/.local/bin/observer`) |
|
|
982
|
+
| `pipx install` | `~/.local/bin/observer` (symlink to the pipx-managed venv) |
|
|
983
|
+
| `uv tool install` | `~/.local/share/uv/tools/superbased-observer/bin/observer` (with `~/.local/bin/observer` shim) |
|
|
984
|
+
| Plain `pip install` in a venv | `<venv>/bin/observer` |
|
|
985
|
+
|
|
986
|
+
Make sure the matching directory is on `$PATH`. If you see a
|
|
987
|
+
"command not found" error, run `pip show -f superbased-observer | grep observer`
|
|
988
|
+
to find the exact path.
|
|
989
|
+
|
|
990
|
+
### `observer init` says "no tools selected and none auto-detected"
|
|
991
|
+
|
|
992
|
+
Auto-detection looks for the AI clients' default session-log dirs
|
|
993
|
+
(`~/.claude/projects/`, `~/.codex/sessions/`, `~/.cursor/`, etc.).
|
|
994
|
+
On a fresh machine where no client has run yet, those dirs don't
|
|
995
|
+
exist. Pass the flag explicitly:
|
|
996
|
+
|
|
997
|
+
```bash
|
|
998
|
+
observer init --claude-code # or --codex / --cursor / --cline / --all
|
|
999
|
+
```
|
|
1000
|
+
|
|
1001
|
+
This registers hooks regardless — the next time the client runs,
|
|
1002
|
+
its dirs get created and the watcher picks them up.
|
|
1003
|
+
|
|
1004
|
+
### Empty dashboard / "No proxy traffic"
|
|
1005
|
+
|
|
1006
|
+
The JSONL adapter populates passively after `observer init`, but
|
|
1007
|
+
ground-truth cost / compression numbers require the proxy. Set
|
|
1008
|
+
`ANTHROPIC_BASE_URL=http://127.0.0.1:8820` (Claude Code) or
|
|
1009
|
+
`OPENAI_BASE_URL=http://127.0.0.1:8820/v1` (Codex) in the shell
|
|
1010
|
+
that launches your AI client.
|
|
1011
|
+
|
|
1012
|
+
Verify with `observer status | grep api_turns` — count should
|
|
1013
|
+
climb during AI-client activity.
|
|
1014
|
+
|
|
1015
|
+
### `observer --version` says `dev`
|
|
1016
|
+
|
|
1017
|
+
You're on a non-released build. Reinstall a tagged release with `pip install --force-reinstall superbased-observer` (or `uv tool install --force superbased-observer`), or rebuild with the workflow's `-X main.version=$VERSION` ldflag.
|
|
1018
|
+
|
|
1019
|
+
### `tool_result block must have a corresponding tool_use block`
|
|
1020
|
+
|
|
1021
|
+
Anthropic 400. Means the conversation-compression pipeline dropped
|
|
1022
|
+
a `tool_use` while keeping its matching `tool_result`. Versions
|
|
1023
|
+
prior to 1.3.2 had this bug; upgrade. If you're on 1.3.2+ and still
|
|
1024
|
+
see it, file an issue with the conversation prefix.
|
|
1025
|
+
|
|
1026
|
+
### `tool use concurrency issues`
|
|
1027
|
+
|
|
1028
|
+
Anthropic 400 surfaced in Claude Code as this message. Means the
|
|
1029
|
+
parallel-tool-use case (multiple `tool_use` blocks in one assistant
|
|
1030
|
+
message) isn't paired correctly with the multi-block tool_result
|
|
1031
|
+
that follows. Versions prior to 1.3.2 had this bug; upgrade.
|
|
1032
|
+
|
|
1033
|
+
### Cross-thread numbers are 0
|
|
1034
|
+
|
|
1035
|
+
Pre-migration data was ingested without the `is_sidechain` flag.
|
|
1036
|
+
Run `observer backfill --is-sidechain` once to re-walk JSONL and
|
|
1037
|
+
populate the flag on existing rows.
|
|
1038
|
+
|
|
1039
|
+
### Migration error: `duplicate column name`
|
|
1040
|
+
|
|
1041
|
+
Race condition between concurrent daemon startups, fixed in 1.4.1.
|
|
1042
|
+
Upgrade. If you still see it, run daemons serially: `observer
|
|
1043
|
+
watch`, wait, then `observer dashboard`, then `observer proxy
|
|
1044
|
+
start` (or just use `observer start` which runs all three in one
|
|
1045
|
+
process — proxy + watcher + dashboard).
|
|
1046
|
+
|
|
1047
|
+
### `observer start` log says only `proxy + observer` — no `:8081`
|
|
1048
|
+
|
|
1049
|
+
You're on a pre-1.4.7 build. Earlier versions ran only proxy +
|
|
1050
|
+
watcher under `observer start`; the dashboard had to be started
|
|
1051
|
+
separately via `observer dashboard --addr 127.0.0.1:8081`. Upgrade
|
|
1052
|
+
to 1.4.7+ — the dashboard goroutine is now part of `observer start`
|
|
1053
|
+
and the log line confirms all three: `proxy <addr> + watcher +
|
|
1054
|
+
dashboard http://127.0.0.1:8081`. Pass `--no-dashboard` to opt out.
|
|
1055
|
+
|
|
1056
|
+
### "address already in use" on port 8820
|
|
1057
|
+
|
|
1058
|
+
Another `observer proxy start` or `observer start` is still running.
|
|
1059
|
+
Find it with `pgrep -af 'observer (proxy|start)'` and `kill <pid>`.
|
|
1060
|
+
On macOS:
|
|
1061
|
+
|
|
1062
|
+
```bash
|
|
1063
|
+
lsof -nP -iTCP:8820 -sTCP:LISTEN
|
|
1064
|
+
kill <pid>
|
|
1065
|
+
```
|
|
1066
|
+
|
|
1067
|
+
### Dashboard port already in use
|
|
1068
|
+
|
|
1069
|
+
```bash
|
|
1070
|
+
observer dashboard --addr 127.0.0.1:8082 # pick a different port
|
|
1071
|
+
# or
|
|
1072
|
+
[dashboard]
|
|
1073
|
+
port = 8082 # in config.toml
|
|
1074
|
+
```
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
## Security and privacy
|
|
1078
|
+
|
|
1079
|
+
**Local-only. No telemetry. No remote anything.** The watcher, hook
|
|
1080
|
+
handler, dashboard, MCP server, and CLI never make an outbound network
|
|
1081
|
+
call on observer's behalf. The only code paths that touch the network
|
|
1082
|
+
are the optional API proxy (which forwards **your** requests unchanged
|
|
1083
|
+
to the AI provider you already use) and a handful of explicit opt-in
|
|
1084
|
+
features (message-summary LLM, codegraph MCP, Teams org-server).
|
|
1085
|
+
|
|
1086
|
+
The full privacy statement — what observer stores, what it reads,
|
|
1087
|
+
what it never stores, the explicit list of outbound-network call sites
|
|
1088
|
+
gated behind config, and how to verify "no telemetry" yourself with
|
|
1089
|
+
`grep`, `strings`, and a network-namespaced shell — lives in
|
|
1090
|
+
[`PRIVACY.md`](https://github.com/marmutapp/superbased-observer/blob/main/PRIVACY.md).
|
|
1091
|
+
|
|
1092
|
+
Operational shorthand:
|
|
1093
|
+
|
|
1094
|
+
- **Local-only HTTP.** The proxy and dashboard bind to `127.0.0.1`
|
|
1095
|
+
by default. Don't bind to `0.0.0.0` unless you've thought about
|
|
1096
|
+
it — there's no auth.
|
|
1097
|
+
- **Secrets scrubbing.** Tool inputs and outputs pass through
|
|
1098
|
+
`internal/scrub/` before persistence; review the regex set if your
|
|
1099
|
+
secrets follow non-default formats.
|
|
1100
|
+
- **Database.** `~/.observer/observer.db` is a SQLite file with the
|
|
1101
|
+
same security posture as your `~/.claude/` and `~/.codex/` session
|
|
1102
|
+
logs (which already hold the same content). Encrypt the disk if
|
|
1103
|
+
your threat model needs that.
|
|
1104
|
+
- **Full delete.** `rm -rf ~/.observer/` removes everything observer
|
|
1105
|
+
ever stored — no traces elsewhere on your system.
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
## Source, contributing, license
|
|
1109
|
+
|
|
1110
|
+
- **Source**: https://github.com/marmutapp/superbased-observer
|
|
1111
|
+
- **Specification**: `superbased-final-spec-v2.md` in the repo
|
|
1112
|
+
- **Issues**: https://github.com/marmutapp/superbased-observer/issues
|
|
1113
|
+
- **License**: [Apache 2.0](https://github.com/marmutapp/superbased-observer/blob/main/LICENSE)
|
|
1114
|
+
- **Author**: Santosh Kathira <contact@marmut.app>
|
|
1115
|
+
|
|
1116
|
+
This PyPI package is a thin Python launcher (`observer/__main__.py`)
|
|
1117
|
+
that `os.execv`s the bundled prebuilt binary. Same shape as `ruff` /
|
|
1118
|
+
`uv` / `polars` — each platform-tagged wheel bundles its
|
|
1119
|
+
platform's binary directly, and pip's wheel-tag selector picks the
|
|
1120
|
+
matching one. The Go source lives in the main repo; binaries are
|
|
1121
|
+
cross-compiled per release tag via GitHub Actions and published
|
|
1122
|
+
as `superbased-observer` (PyPI) and `@superbased/observer` (npm)
|
|
1123
|
+
side-by-side from the same v* tag.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
observer/__init__.py,sha256=OJoS4vSPv4LEl8a8k8kKUM7ylWd7Vv-8ygCH3XQit4I,23
|
|
2
|
+
observer/__main__.py,sha256=kAp2Ki5EZUydnFjjq3m0kAS5E1i_klISkuGAQptaWC4,1333
|
|
3
|
+
superbased_observer-1.7.25.dist-info/METADATA,sha256=kGI4VJAZHXOWhQW9X11DORPXiFj8EX_JmgDin03iAM0,57361
|
|
4
|
+
superbased_observer-1.7.25.dist-info/WHEEL,sha256=fip2IBhkkNvH-S_Xh3PV94_XVqHJB1nn9gTAcldDBj4,94
|
|
5
|
+
superbased_observer-1.7.25.dist-info/entry_points.txt,sha256=N85PPRCTJZKyvr7ClgUo1nQ0SkQaRnLc9yFBAwfnY3s,52
|
|
6
|
+
superbased_observer-1.7.25.dist-info/licenses/LICENSE,sha256=saHzyug7uLmo7N3OMLvLQvGgPOKqnlm93HmNPRqGAn4,811
|
|
7
|
+
superbased_observer-1.7.25.dist-info/RECORD,,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
Copyright 2026 Gaja AI
|
|
6
|
+
|
|
7
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
you may not use this file except in compliance with the License.
|
|
9
|
+
You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
See the License for the specific language governing permissions and
|
|
17
|
+
limitations under the License.
|
|
18
|
+
|
|
19
|
+
The full text of the Apache License 2.0 is available at:
|
|
20
|
+
http://www.apache.org/licenses/LICENSE-2.0.txt
|