snowpack 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowpack-0.1.0/.github/workflows/ci.yml +16 -0
- snowpack-0.1.0/.github/workflows/publish.yml +66 -0
- snowpack-0.1.0/.gitignore +7 -0
- snowpack-0.1.0/PKG-INFO +240 -0
- snowpack-0.1.0/README.md +219 -0
- snowpack-0.1.0/docker/docker-compose.yml +39 -0
- snowpack-0.1.0/docs/adr/ADR-001-memory-architecture.md +488 -0
- snowpack-0.1.0/docs/adr/ADR-002-pit-ui.md +93 -0
- snowpack-0.1.0/docs/claude-md-snippet.md +28 -0
- snowpack-0.1.0/docs/hooks.md +47 -0
- snowpack-0.1.0/docs/ollama-docker.md +73 -0
- snowpack-0.1.0/docs/pit.md +82 -0
- snowpack-0.1.0/docs/plans/2026-06-10-adr-phasing-merge.md +47 -0
- snowpack-0.1.0/docs/plans/2026-06-10-core-implementation.md +100 -0
- snowpack-0.1.0/docs/plans/2026-06-10-pit-ui.md +78 -0
- snowpack-0.1.0/docs/plans/README.md +13 -0
- snowpack-0.1.0/docs/releasing.md +55 -0
- snowpack-0.1.0/pyproject.toml +47 -0
- snowpack-0.1.0/scripts/seed_demo.py +282 -0
- snowpack-0.1.0/src/snowpack/__init__.py +3 -0
- snowpack-0.1.0/src/snowpack/chunker.py +95 -0
- snowpack-0.1.0/src/snowpack/cli.py +510 -0
- snowpack-0.1.0/src/snowpack/config.py +121 -0
- snowpack-0.1.0/src/snowpack/db.py +64 -0
- snowpack-0.1.0/src/snowpack/extraction.py +174 -0
- snowpack-0.1.0/src/snowpack/ingest.py +210 -0
- snowpack-0.1.0/src/snowpack/models.py +124 -0
- snowpack-0.1.0/src/snowpack/pit.py +185 -0
- snowpack-0.1.0/src/snowpack/pit_static/app.js +284 -0
- snowpack-0.1.0/src/snowpack/pit_static/force-graph.min.js +9 -0
- snowpack-0.1.0/src/snowpack/pit_static/index.html +62 -0
- snowpack-0.1.0/src/snowpack/pit_static/style.css +95 -0
- snowpack-0.1.0/src/snowpack/providers/__init__.py +53 -0
- snowpack-0.1.0/src/snowpack/providers/base.py +37 -0
- snowpack-0.1.0/src/snowpack/providers/ollama.py +63 -0
- snowpack-0.1.0/src/snowpack/providers/openai_compat.py +77 -0
- snowpack-0.1.0/src/snowpack/reconcile.py +133 -0
- snowpack-0.1.0/src/snowpack/retrieval.py +159 -0
- snowpack-0.1.0/src/snowpack/schema.sql +323 -0
- snowpack-0.1.0/src/snowpack/sinter.py +134 -0
- snowpack-0.1.0/src/snowpack/storage.py +1061 -0
- snowpack-0.1.0/src/snowpack/transcripts.py +198 -0
- snowpack-0.1.0/tests/conftest.py +112 -0
- snowpack-0.1.0/tests/test_chunker.py +59 -0
- snowpack-0.1.0/tests/test_cli.py +128 -0
- snowpack-0.1.0/tests/test_extraction.py +168 -0
- snowpack-0.1.0/tests/test_fusion.py +62 -0
- snowpack-0.1.0/tests/test_ingest.py +183 -0
- snowpack-0.1.0/tests/test_m3.py +144 -0
- snowpack-0.1.0/tests/test_pit_http.py +121 -0
- snowpack-0.1.0/tests/test_pit_storage.py +187 -0
- snowpack-0.1.0/tests/test_reconcile.py +180 -0
- snowpack-0.1.0/tests/test_retrieval.py +99 -0
- snowpack-0.1.0/tests/test_retrieval_facts.py +91 -0
- snowpack-0.1.0/tests/test_storage.py +154 -0
- snowpack-0.1.0/tests/test_transcripts.py +146 -0
- snowpack-0.1.0/uv.lock +274 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["**"]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: astral-sh/setup-uv@v5
|
|
14
|
+
- run: uv sync
|
|
15
|
+
- run: uv run ruff check
|
|
16
|
+
- run: uv run pytest -q
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: publish
|
|
2
|
+
|
|
3
|
+
# Publishes to PyPI via trusted publishing (OIDC, no tokens) on version tags.
|
|
4
|
+
# One-time setup on pypi.org is documented in docs/releasing.md.
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
tags: ["v*"]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
test:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: astral-sh/setup-uv@v5
|
|
16
|
+
- run: uv sync
|
|
17
|
+
- run: uv run ruff check
|
|
18
|
+
- run: uv run pytest -q
|
|
19
|
+
|
|
20
|
+
build:
|
|
21
|
+
needs: test
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
- uses: astral-sh/setup-uv@v5
|
|
26
|
+
- name: Check tag matches project version
|
|
27
|
+
run: |
|
|
28
|
+
version=$(python3 -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
|
29
|
+
if [ "v$version" != "$GITHUB_REF_NAME" ]; then
|
|
30
|
+
echo "tag $GITHUB_REF_NAME does not match pyproject version $version" >&2
|
|
31
|
+
exit 1
|
|
32
|
+
fi
|
|
33
|
+
- run: uv build
|
|
34
|
+
- name: Smoke-test the wheel
|
|
35
|
+
# A bad build that drops data files (schema.sql, pit_static/) would
|
|
36
|
+
# still pass unit tests; install the artifact and exercise it.
|
|
37
|
+
run: |
|
|
38
|
+
python3 -m venv /tmp/smoke
|
|
39
|
+
/tmp/smoke/bin/pip install --quiet dist/*.whl
|
|
40
|
+
/tmp/smoke/bin/snowpack --help >/dev/null
|
|
41
|
+
SNOWPACK_DB=/tmp/smoke/test.db /tmp/smoke/bin/snowpack init --model nomic-embed-text
|
|
42
|
+
SNOWPACK_DB=/tmp/smoke/test.db SNOWPACK_CLAUDE_PROJECTS=/tmp/empty \
|
|
43
|
+
/tmp/smoke/bin/snowpack probe "smoke" --all-projects --no-log
|
|
44
|
+
/tmp/smoke/bin/python - <<'EOF'
|
|
45
|
+
from importlib.resources import files
|
|
46
|
+
pkg = files('snowpack')
|
|
47
|
+
assert (pkg / 'schema.sql').is_file(), 'schema.sql missing from wheel'
|
|
48
|
+
assert (pkg / 'pit_static' / 'index.html').is_file(), 'pit_static missing from wheel'
|
|
49
|
+
EOF
|
|
50
|
+
- uses: actions/upload-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: dist
|
|
53
|
+
path: dist/
|
|
54
|
+
|
|
55
|
+
publish:
|
|
56
|
+
needs: build
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
environment: pypi
|
|
59
|
+
permissions:
|
|
60
|
+
id-token: write
|
|
61
|
+
steps:
|
|
62
|
+
- uses: actions/download-artifact@v4
|
|
63
|
+
with:
|
|
64
|
+
name: dist
|
|
65
|
+
path: dist/
|
|
66
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
snowpack-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: snowpack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first agent memory for Claude Code: episodic + semantic memory in one SQLite file.
|
|
5
|
+
Author: David Kelly
|
|
6
|
+
Keywords: agent,claude-code,llm,local-first,memory,sqlite
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: Other/Proprietary License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: httpx>=0.27
|
|
18
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
19
|
+
Requires-Dist: typer>=0.12
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# snowpack
|
|
23
|
+
|
|
24
|
+
> The snowpack is the season's memory — every storm recorded as a layer.
|
|
25
|
+
|
|
26
|
+
Local-first agent memory for Claude Code. Snowpack ingests Claude Code session
|
|
27
|
+
transcripts into **episodic memory** (what happened across sessions) and
|
|
28
|
+
**semantic memory** (durable facts, entities, relationships), all in a single
|
|
29
|
+
SQLite file with vector + keyword search. The agent reaches it through an
|
|
30
|
+
ordinary CLI — no MCP server, no daemon, no infrastructure.
|
|
31
|
+
|
|
32
|
+
## Status
|
|
33
|
+
|
|
34
|
+
Core pipeline implemented (episodic + semantic memory, hybrid retrieval,
|
|
35
|
+
telemetry, distillation). See `docs/adr/ADR-001-memory-architecture.md` for
|
|
36
|
+
the architecture and decision record, `docs/hooks.md` for ingestion hook
|
|
37
|
+
setup, and `docs/claude-md-snippet.md` for the agent-facing usage docs.
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# 1. Get Ollama running with the embedding model (see "Embeddings" below)
|
|
43
|
+
ollama pull nomic-embed-text
|
|
44
|
+
|
|
45
|
+
# 2. Install and initialize snowpack
|
|
46
|
+
uv tool install . # or: uv sync && uv run snowpack ...
|
|
47
|
+
snowpack init # create ~/.snowpack/snowpack.db
|
|
48
|
+
|
|
49
|
+
# 3. Use it
|
|
50
|
+
snowpack obs ingest # ingest Claude Code transcripts (~/.claude/projects)
|
|
51
|
+
snowpack probe "auth decisions" # hybrid retrieval (vector + keyword + recency)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Embeddings: Ollama setup and choosing a model
|
|
55
|
+
|
|
56
|
+
Vector search needs a local embedding model served by
|
|
57
|
+
[Ollama](https://ollama.com). It is a soft requirement: without it snowpack
|
|
58
|
+
still works — ingest stores episodes un-embedded, probe degrades to
|
|
59
|
+
keyword + recency search, and the next ingest after Ollama comes up
|
|
60
|
+
backfills the missing vectors automatically.
|
|
61
|
+
|
|
62
|
+
### Install and run Ollama
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# macOS
|
|
66
|
+
brew install ollama # or download the app from https://ollama.com
|
|
67
|
+
|
|
68
|
+
# Linux
|
|
69
|
+
curl -fsSL https://ollama.com/install.sh | sh
|
|
70
|
+
|
|
71
|
+
# start the server (the desktop app does this automatically)
|
|
72
|
+
ollama serve
|
|
73
|
+
|
|
74
|
+
# fetch the default embedding model (~270 MB)
|
|
75
|
+
ollama pull nomic-embed-text
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Prefer it sandboxed?** A hardened Docker setup (localhost-only API,
|
|
79
|
+
dropped capabilities, isolated model storage) ships in
|
|
80
|
+
`docker/docker-compose.yml`:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
docker compose -f docker/docker-compose.yml up -d
|
|
84
|
+
docker compose -f docker/docker-compose.yml exec ollama ollama pull nomic-embed-text
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
See `docs/ollama-docker.md` for GPU setup and the macOS caveat (containers
|
|
88
|
+
can't use Apple Silicon's GPU — native Ollama is faster there).
|
|
89
|
+
|
|
90
|
+
Verify it's answering:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
curl -s http://localhost:11434/api/embed \
|
|
94
|
+
-d '{"model": "nomic-embed-text", "input": ["hello"]}' | head -c 120
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
If Ollama runs somewhere other than `localhost:11434` (a container, another
|
|
98
|
+
machine), point snowpack at it with `SNOWPACK_OLLAMA_URL`:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
export SNOWPACK_OLLAMA_URL=http://gpu-box:11434
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Choosing the embedding model
|
|
105
|
+
|
|
106
|
+
The model is fixed **per database** at `snowpack init`, because the vector
|
|
107
|
+
tables are created with that model's output dimension (vec0 columns are
|
|
108
|
+
fixed-width):
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
snowpack init # nomic-embed-text (768-d)
|
|
112
|
+
snowpack init --model mxbai-embed-large # higher quality, 1024-d
|
|
113
|
+
snowpack init --model all-minilm # smaller/faster, 384-d
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
You normally don't pass `--dim`: init asks the running Ollama what dimension
|
|
117
|
+
the model actually produces (and refuses a `--dim` that contradicts it). If
|
|
118
|
+
Ollama isn't running, init falls back to a built-in table for common models
|
|
119
|
+
(`nomic-embed-text`, `mxbai-embed-large`, `all-minilm`,
|
|
120
|
+
`snowflake-arctic-embed`, `bge-m3`) — for anything else, either start Ollama
|
|
121
|
+
first or pass `--dim` explicitly.
|
|
122
|
+
|
|
123
|
+
The configured model, dimension, and task prefixes are recorded in the
|
|
124
|
+
database (`meta` table) and used for every subsequent embed, so you never
|
|
125
|
+
specify the model again after init — `obs ingest` and `probe` read it from
|
|
126
|
+
the database. To see what a database was initialized with:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
sqlite3 ~/.snowpack/snowpack.db "SELECT * FROM meta"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Changing models later** means re-embedding everything: until
|
|
133
|
+
`snowpack reindex` ships, that is `rm ~/.snowpack/snowpack.db`,
|
|
134
|
+
`snowpack init --model <new>`, `snowpack obs ingest` (episodes re-ingest from
|
|
135
|
+
the transcripts, but extracted facts and telemetry are lost — export first if
|
|
136
|
+
you care).
|
|
137
|
+
|
|
138
|
+
## CLI surface
|
|
139
|
+
|
|
140
|
+
| Command | Purpose |
|
|
141
|
+
|---|---|
|
|
142
|
+
| `snowpack init` | Create and configure the database |
|
|
143
|
+
| `snowpack obs ingest` | Ingest new transcript exchanges (incremental, idempotent) |
|
|
144
|
+
| `snowpack obs extract` | Extract durable facts from episodes (API-assisted) |
|
|
145
|
+
| `snowpack obs list` | List recent episodes |
|
|
146
|
+
| `snowpack probe "query"` | Hybrid retrieval (vector + keyword + graph + recency) with telemetry |
|
|
147
|
+
| `snowpack feedback` | Mark retrieved memories as used — trains ranking |
|
|
148
|
+
| `snowpack stash` | Working-memory checkpoint per project |
|
|
149
|
+
| `snowpack stats` | Telemetry overview; `--refresh` recomputes usefulness |
|
|
150
|
+
| `snowpack sinter` | Mine repeated corrections into CLAUDE.md candidates |
|
|
151
|
+
| `snowpack entity merge` | Point a duplicate entity at its canonical form |
|
|
152
|
+
| `snowpack pit` | Local web UI: entity graph + telemetry dashboard |
|
|
153
|
+
|
|
154
|
+
## The pit (web UI)
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
snowpack pit # serves http://127.0.0.1:8617 and opens the browser
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
A read-only, single-page UI over the same SQLite file (no extra dependencies,
|
|
161
|
+
no build step; the graph library is vendored so it works offline):
|
|
162
|
+
|
|
163
|
+
- **Graph tab** — entities as nodes, facts as edges. Visual weights are real
|
|
164
|
+
telemetry, not decoration: node size = usage, edge width = retrieval
|
|
165
|
+
frequency, color = staleness, and **dead gray = never retrieved** — your
|
|
166
|
+
pruning candidates at a glance. Click through node → fact → provenance
|
|
167
|
+
episode; toggle superseded facts; search to highlight.
|
|
168
|
+
- **Stats tab** — totals, retrieval latency, channel win-rate (vector vs
|
|
169
|
+
keyword vs graph — how to rebalance fusion weights), zero-result queries
|
|
170
|
+
(gap detection), most/least-used facts, persistent weak layers, and recent
|
|
171
|
+
retrievals expandable to per-result channels/scores/used flags.
|
|
172
|
+
|
|
173
|
+
The server binds 127.0.0.1 only and never mutates user data (the one write is
|
|
174
|
+
recomputing derived usefulness scores on demand). Full guide — including how
|
|
175
|
+
to read the visual encoding and troubleshooting — in `docs/pit.md`; stack
|
|
176
|
+
decisions in `docs/adr/ADR-002-pit-ui.md`.
|
|
177
|
+
|
|
178
|
+
## Documentation map
|
|
179
|
+
|
|
180
|
+
- `docs/adr/` — architecture decision records (ADR-001 core, ADR-002 pit UI)
|
|
181
|
+
- `docs/plans/` — point-in-time implementation plans approved before each
|
|
182
|
+
build round, with outcomes
|
|
183
|
+
- `docs/pit.md` — running and reading the pit UI
|
|
184
|
+
- `docs/hooks.md` — out-of-band ingestion hooks
|
|
185
|
+
- `docs/ollama-docker.md` — sandboxed Ollama
|
|
186
|
+
- `docs/claude-md-snippet.md` — agent-facing usage docs for CLAUDE.md
|
|
187
|
+
- `docs/releasing.md` — publishing wheels to PyPI (trusted publishing)
|
|
188
|
+
|
|
189
|
+
## Roadmap
|
|
190
|
+
|
|
191
|
+
The agent-memory market is crowded with cloud-first offerings (Mem0, Zep,
|
|
192
|
+
Letta). Snowpack takes the opposite entry: a **local-first core that syncs
|
|
193
|
+
up** when you want it to — local-first is the foundation later phases build
|
|
194
|
+
on, not a stage to discard.
|
|
195
|
+
|
|
196
|
+
1. **Phase 1 — local dev tool (now).** Everything in this repo: single SQLite
|
|
197
|
+
file, CLI + hooks integration, telemetry from day one. Goal: prove
|
|
198
|
+
retrieval quality and accumulate the usage data later tuning depends on.
|
|
199
|
+
2. **Phase 2 — local-first + sync.** The SQLite file stays the on-device
|
|
200
|
+
source of truth; optional sync to a hosted backend adds multi-device use,
|
|
201
|
+
backup, and selective team sharing. The integration surface broadens
|
|
202
|
+
beyond Claude Code: MCP server plus a language-agnostic SDK/HTTP API.
|
|
203
|
+
3. **Phase 3 — hosted platform.** A managed, multi-tenant memory service
|
|
204
|
+
covering all four memory types (episodic, semantic, working, procedural).
|
|
205
|
+
**Self-hosting stays a first-class path.**
|
|
206
|
+
|
|
207
|
+
Full reasoning, the fixed-vs-provisional decision table, and migration risks
|
|
208
|
+
live in `docs/adr/ADR-001-memory-architecture.md` ("Phasing & evolution").
|
|
209
|
+
|
|
210
|
+
Fact extraction needs an API key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or
|
|
211
|
+
`SNOWPACK_EXTRACTION_API_KEY`) and defaults to Anthropic's OpenAI-compatible
|
|
212
|
+
endpoint; override with `SNOWPACK_EXTRACTION_BASE_URL` / `_MODEL`. Keys are
|
|
213
|
+
read from the environment only and never stored.
|
|
214
|
+
|
|
215
|
+
## Development
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
uv sync
|
|
219
|
+
uv run pytest
|
|
220
|
+
uv run ruff check
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Demo data
|
|
224
|
+
|
|
225
|
+
To try the full surface without real transcripts (and without touching
|
|
226
|
+
`~/.snowpack`), seed a sandboxed demo — synthetic transcripts for two fake
|
|
227
|
+
projects, pre-extracted facts (including a superseded pair), and probe
|
|
228
|
+
telemetry:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
uv run python scripts/seed_demo.py # creates ~/.snowpack-demo
|
|
232
|
+
export SNOWPACK_DB=~/.snowpack-demo/snowpack.db
|
|
233
|
+
export SNOWPACK_CLAUDE_PROJECTS=~/.snowpack-demo/projects
|
|
234
|
+
snowpack probe "what did we decide about auth" --all-projects
|
|
235
|
+
snowpack stats
|
|
236
|
+
snowpack pit
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
It works without Ollama (probe degrades to keyword+recency, exactly as in
|
|
240
|
+
real use); with Ollama running the same script embeds everything.
|
snowpack-0.1.0/README.md
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# snowpack
|
|
2
|
+
|
|
3
|
+
> The snowpack is the season's memory — every storm recorded as a layer.
|
|
4
|
+
|
|
5
|
+
Local-first agent memory for Claude Code. Snowpack ingests Claude Code session
|
|
6
|
+
transcripts into **episodic memory** (what happened across sessions) and
|
|
7
|
+
**semantic memory** (durable facts, entities, relationships), all in a single
|
|
8
|
+
SQLite file with vector + keyword search. The agent reaches it through an
|
|
9
|
+
ordinary CLI — no MCP server, no daemon, no infrastructure.
|
|
10
|
+
|
|
11
|
+
## Status
|
|
12
|
+
|
|
13
|
+
Core pipeline implemented (episodic + semantic memory, hybrid retrieval,
|
|
14
|
+
telemetry, distillation). See `docs/adr/ADR-001-memory-architecture.md` for
|
|
15
|
+
the architecture and decision record, `docs/hooks.md` for ingestion hook
|
|
16
|
+
setup, and `docs/claude-md-snippet.md` for the agent-facing usage docs.
|
|
17
|
+
|
|
18
|
+
## Quick start
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# 1. Get Ollama running with the embedding model (see "Embeddings" below)
|
|
22
|
+
ollama pull nomic-embed-text
|
|
23
|
+
|
|
24
|
+
# 2. Install and initialize snowpack
|
|
25
|
+
uv tool install . # or: uv sync && uv run snowpack ...
|
|
26
|
+
snowpack init # create ~/.snowpack/snowpack.db
|
|
27
|
+
|
|
28
|
+
# 3. Use it
|
|
29
|
+
snowpack obs ingest # ingest Claude Code transcripts (~/.claude/projects)
|
|
30
|
+
snowpack probe "auth decisions" # hybrid retrieval (vector + keyword + recency)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Embeddings: Ollama setup and choosing a model
|
|
34
|
+
|
|
35
|
+
Vector search needs a local embedding model served by
|
|
36
|
+
[Ollama](https://ollama.com). It is a soft requirement: without it snowpack
|
|
37
|
+
still works — ingest stores episodes un-embedded, probe degrades to
|
|
38
|
+
keyword + recency search, and the next ingest after Ollama comes up
|
|
39
|
+
backfills the missing vectors automatically.
|
|
40
|
+
|
|
41
|
+
### Install and run Ollama
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# macOS
|
|
45
|
+
brew install ollama # or download the app from https://ollama.com
|
|
46
|
+
|
|
47
|
+
# Linux
|
|
48
|
+
curl -fsSL https://ollama.com/install.sh | sh
|
|
49
|
+
|
|
50
|
+
# start the server (the desktop app does this automatically)
|
|
51
|
+
ollama serve
|
|
52
|
+
|
|
53
|
+
# fetch the default embedding model (~270 MB)
|
|
54
|
+
ollama pull nomic-embed-text
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Prefer it sandboxed?** A hardened Docker setup (localhost-only API,
|
|
58
|
+
dropped capabilities, isolated model storage) ships in
|
|
59
|
+
`docker/docker-compose.yml`:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
docker compose -f docker/docker-compose.yml up -d
|
|
63
|
+
docker compose -f docker/docker-compose.yml exec ollama ollama pull nomic-embed-text
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
See `docs/ollama-docker.md` for GPU setup and the macOS caveat (containers
|
|
67
|
+
can't use Apple Silicon's GPU — native Ollama is faster there).
|
|
68
|
+
|
|
69
|
+
Verify it's answering:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
curl -s http://localhost:11434/api/embed \
|
|
73
|
+
-d '{"model": "nomic-embed-text", "input": ["hello"]}' | head -c 120
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
If Ollama runs somewhere other than `localhost:11434` (a container, another
|
|
77
|
+
machine), point snowpack at it with `SNOWPACK_OLLAMA_URL`:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
export SNOWPACK_OLLAMA_URL=http://gpu-box:11434
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Choosing the embedding model
|
|
84
|
+
|
|
85
|
+
The model is fixed **per database** at `snowpack init`, because the vector
|
|
86
|
+
tables are created with that model's output dimension (vec0 columns are
|
|
87
|
+
fixed-width):
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
snowpack init # nomic-embed-text (768-d)
|
|
91
|
+
snowpack init --model mxbai-embed-large # higher quality, 1024-d
|
|
92
|
+
snowpack init --model all-minilm # smaller/faster, 384-d
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
You normally don't pass `--dim`: init asks the running Ollama what dimension
|
|
96
|
+
the model actually produces (and refuses a `--dim` that contradicts it). If
|
|
97
|
+
Ollama isn't running, init falls back to a built-in table for common models
|
|
98
|
+
(`nomic-embed-text`, `mxbai-embed-large`, `all-minilm`,
|
|
99
|
+
`snowflake-arctic-embed`, `bge-m3`) — for anything else, either start Ollama
|
|
100
|
+
first or pass `--dim` explicitly.
|
|
101
|
+
|
|
102
|
+
The configured model, dimension, and task prefixes are recorded in the
|
|
103
|
+
database (`meta` table) and used for every subsequent embed, so you never
|
|
104
|
+
specify the model again after init — `obs ingest` and `probe` read it from
|
|
105
|
+
the database. To see what a database was initialized with:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
sqlite3 ~/.snowpack/snowpack.db "SELECT * FROM meta"
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Changing models later** means re-embedding everything: until
|
|
112
|
+
`snowpack reindex` ships, that is `rm ~/.snowpack/snowpack.db`,
|
|
113
|
+
`snowpack init --model <new>`, `snowpack obs ingest` (episodes re-ingest from
|
|
114
|
+
the transcripts, but extracted facts and telemetry are lost — export first if
|
|
115
|
+
you care).
|
|
116
|
+
|
|
117
|
+
## CLI surface
|
|
118
|
+
|
|
119
|
+
| Command | Purpose |
|
|
120
|
+
|---|---|
|
|
121
|
+
| `snowpack init` | Create and configure the database |
|
|
122
|
+
| `snowpack obs ingest` | Ingest new transcript exchanges (incremental, idempotent) |
|
|
123
|
+
| `snowpack obs extract` | Extract durable facts from episodes (API-assisted) |
|
|
124
|
+
| `snowpack obs list` | List recent episodes |
|
|
125
|
+
| `snowpack probe "query"` | Hybrid retrieval (vector + keyword + graph + recency) with telemetry |
|
|
126
|
+
| `snowpack feedback` | Mark retrieved memories as used — trains ranking |
|
|
127
|
+
| `snowpack stash` | Working-memory checkpoint per project |
|
|
128
|
+
| `snowpack stats` | Telemetry overview; `--refresh` recomputes usefulness |
|
|
129
|
+
| `snowpack sinter` | Mine repeated corrections into CLAUDE.md candidates |
|
|
130
|
+
| `snowpack entity merge` | Point a duplicate entity at its canonical form |
|
|
131
|
+
| `snowpack pit` | Local web UI: entity graph + telemetry dashboard |
|
|
132
|
+
|
|
133
|
+
## The pit (web UI)
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
snowpack pit # serves http://127.0.0.1:8617 and opens the browser
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
A read-only, single-page UI over the same SQLite file (no extra dependencies,
|
|
140
|
+
no build step; the graph library is vendored so it works offline):
|
|
141
|
+
|
|
142
|
+
- **Graph tab** — entities as nodes, facts as edges. Visual weights are real
|
|
143
|
+
telemetry, not decoration: node size = usage, edge width = retrieval
|
|
144
|
+
frequency, color = staleness, and **dead gray = never retrieved** — your
|
|
145
|
+
pruning candidates at a glance. Click through node → fact → provenance
|
|
146
|
+
episode; toggle superseded facts; search to highlight.
|
|
147
|
+
- **Stats tab** — totals, retrieval latency, channel win-rate (vector vs
|
|
148
|
+
keyword vs graph — how to rebalance fusion weights), zero-result queries
|
|
149
|
+
(gap detection), most/least-used facts, persistent weak layers, and recent
|
|
150
|
+
retrievals expandable to per-result channels/scores/used flags.
|
|
151
|
+
|
|
152
|
+
The server binds 127.0.0.1 only and never mutates user data (the one write is
|
|
153
|
+
recomputing derived usefulness scores on demand). Full guide — including how
|
|
154
|
+
to read the visual encoding and troubleshooting — in `docs/pit.md`; stack
|
|
155
|
+
decisions in `docs/adr/ADR-002-pit-ui.md`.
|
|
156
|
+
|
|
157
|
+
## Documentation map
|
|
158
|
+
|
|
159
|
+
- `docs/adr/` — architecture decision records (ADR-001 core, ADR-002 pit UI)
|
|
160
|
+
- `docs/plans/` — point-in-time implementation plans approved before each
|
|
161
|
+
build round, with outcomes
|
|
162
|
+
- `docs/pit.md` — running and reading the pit UI
|
|
163
|
+
- `docs/hooks.md` — out-of-band ingestion hooks
|
|
164
|
+
- `docs/ollama-docker.md` — sandboxed Ollama
|
|
165
|
+
- `docs/claude-md-snippet.md` — agent-facing usage docs for CLAUDE.md
|
|
166
|
+
- `docs/releasing.md` — publishing wheels to PyPI (trusted publishing)
|
|
167
|
+
|
|
168
|
+
## Roadmap
|
|
169
|
+
|
|
170
|
+
The agent-memory market is crowded with cloud-first offerings (Mem0, Zep,
|
|
171
|
+
Letta). Snowpack takes the opposite entry: a **local-first core that syncs
|
|
172
|
+
up** when you want it to — local-first is the foundation later phases build
|
|
173
|
+
on, not a stage to discard.
|
|
174
|
+
|
|
175
|
+
1. **Phase 1 — local dev tool (now).** Everything in this repo: single SQLite
|
|
176
|
+
file, CLI + hooks integration, telemetry from day one. Goal: prove
|
|
177
|
+
retrieval quality and accumulate the usage data later tuning depends on.
|
|
178
|
+
2. **Phase 2 — local-first + sync.** The SQLite file stays the on-device
|
|
179
|
+
source of truth; optional sync to a hosted backend adds multi-device use,
|
|
180
|
+
backup, and selective team sharing. The integration surface broadens
|
|
181
|
+
beyond Claude Code: MCP server plus a language-agnostic SDK/HTTP API.
|
|
182
|
+
3. **Phase 3 — hosted platform.** A managed, multi-tenant memory service
|
|
183
|
+
covering all four memory types (episodic, semantic, working, procedural).
|
|
184
|
+
**Self-hosting stays a first-class path.**
|
|
185
|
+
|
|
186
|
+
Full reasoning, the fixed-vs-provisional decision table, and migration risks
|
|
187
|
+
live in `docs/adr/ADR-001-memory-architecture.md` ("Phasing & evolution").
|
|
188
|
+
|
|
189
|
+
Fact extraction needs an API key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or
|
|
190
|
+
`SNOWPACK_EXTRACTION_API_KEY`) and defaults to Anthropic's OpenAI-compatible
|
|
191
|
+
endpoint; override with `SNOWPACK_EXTRACTION_BASE_URL` / `_MODEL`. Keys are
|
|
192
|
+
read from the environment only and never stored.
|
|
193
|
+
|
|
194
|
+
## Development
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
uv sync
|
|
198
|
+
uv run pytest
|
|
199
|
+
uv run ruff check
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Demo data
|
|
203
|
+
|
|
204
|
+
To try the full surface without real transcripts (and without touching
|
|
205
|
+
`~/.snowpack`), seed a sandboxed demo — synthetic transcripts for two fake
|
|
206
|
+
projects, pre-extracted facts (including a superseded pair), and probe
|
|
207
|
+
telemetry:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
uv run python scripts/seed_demo.py # creates ~/.snowpack-demo
|
|
211
|
+
export SNOWPACK_DB=~/.snowpack-demo/snowpack.db
|
|
212
|
+
export SNOWPACK_CLAUDE_PROJECTS=~/.snowpack-demo/projects
|
|
213
|
+
snowpack probe "what did we decide about auth" --all-projects
|
|
214
|
+
snowpack stats
|
|
215
|
+
snowpack pit
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
It works without Ollama (probe degrades to keyword+recency, exactly as in
|
|
219
|
+
real use); with Ollama running the same script embeds everything.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Ollama for snowpack, sandboxed.
|
|
2
|
+
#
|
|
3
|
+
# docker compose -f docker/docker-compose.yml up -d
|
|
4
|
+
# docker compose -f docker/docker-compose.yml exec ollama ollama pull nomic-embed-text
|
|
5
|
+
#
|
|
6
|
+
# The API is published on 127.0.0.1 only — snowpack's default
|
|
7
|
+
# (http://localhost:11434) works unchanged, and nothing on the network can
|
|
8
|
+
# reach the model server. See docs/ollama-docker.md for GPU setup & caveats.
|
|
9
|
+
|
|
10
|
+
services:
|
|
11
|
+
ollama:
|
|
12
|
+
image: ollama/ollama:latest
|
|
13
|
+
container_name: snowpack-ollama
|
|
14
|
+
restart: unless-stopped
|
|
15
|
+
ports:
|
|
16
|
+
- "127.0.0.1:11434:11434"
|
|
17
|
+
volumes:
|
|
18
|
+
- ollama-models:/root/.ollama # pulled models persist across restarts
|
|
19
|
+
cap_drop:
|
|
20
|
+
- ALL
|
|
21
|
+
security_opt:
|
|
22
|
+
- no-new-privileges:true
|
|
23
|
+
healthcheck:
|
|
24
|
+
test: ["CMD", "ollama", "list"]
|
|
25
|
+
interval: 30s
|
|
26
|
+
timeout: 5s
|
|
27
|
+
retries: 3
|
|
28
|
+
start_period: 10s
|
|
29
|
+
# Optional memory cap. Embedding models are small (nomic-embed-text fits
|
|
30
|
+
# comfortably in 2g); raise it if you also serve generation models here.
|
|
31
|
+
# mem_limit: 4g
|
|
32
|
+
|
|
33
|
+
# NVIDIA GPU (requires nvidia-container-toolkit); CPU-only works fine
|
|
34
|
+
# for embedding models. macOS: containers cannot use the GPU at all —
|
|
35
|
+
# prefer native Ollama on Apple Silicon.
|
|
36
|
+
# gpus: all
|
|
37
|
+
|
|
38
|
+
volumes:
|
|
39
|
+
ollama-models:
|