PyPI - snowpack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

snowpack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

snowpack/__init__.py +3 -0
snowpack/chunker.py +95 -0
snowpack/cli.py +510 -0
snowpack/config.py +121 -0
snowpack/db.py +64 -0
snowpack/extraction.py +174 -0
snowpack/ingest.py +210 -0
snowpack/models.py +124 -0
snowpack/pit.py +185 -0
snowpack/pit_static/app.js +284 -0
snowpack/pit_static/force-graph.min.js +9 -0
snowpack/pit_static/index.html +62 -0
snowpack/pit_static/style.css +95 -0
snowpack/providers/__init__.py +53 -0
snowpack/providers/base.py +37 -0
snowpack/providers/ollama.py +63 -0
snowpack/providers/openai_compat.py +77 -0
snowpack/reconcile.py +133 -0
snowpack/retrieval.py +159 -0
snowpack/schema.sql +323 -0
snowpack/sinter.py +134 -0
snowpack/storage.py +1061 -0
snowpack/transcripts.py +198 -0
snowpack-0.1.0.dist-info/METADATA +240 -0
snowpack-0.1.0.dist-info/RECORD +27 -0
snowpack-0.1.0.dist-info/WHEEL +4 -0
snowpack-0.1.0.dist-info/entry_points.txt +2 -0

snowpack/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""snowpack: local-first agent memory for Claude Code."""
+__version__ = "0.1.0"

snowpack/chunker.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Exchanges -> Chunks. Pure functions, no I/O.
+Bump config.CHUNKER_VERSION when chunking logic changes meaningfully —
+re-chunking existing episodes is an explicit `snowpack reindex --rechunk`.
+"""
+from __future__ import annotations
+from .models import Chunk, Exchange, estimate_tokens, normalize_timestamp
+CHUNK_BUDGET_TOKENS = 1000
+MERGE_BELOW_TOKENS = 150
+CONT_HEADER_CHARS = 120
+def chunk_exchanges(exchanges: list[Exchange],
+                    budget_tokens: int = CHUNK_BUDGET_TOKENS,
+                    merge_below: int = MERGE_BELOW_TOKENS) -> list[Chunk]:
+    """Default: one chunk per exchange. Oversized exchanges split at part
+    boundaries; consecutive tiny exchanges merge so 'ok'/'yes' turns don't
+    flood the store with micro-episodes.
+    """
+    chunks: list[Chunk] = []
+    small: list[tuple[str, str]] = []  # (rendered, occurred_at)
+    def flush_small() -> None:
+        if not small:
+            return
+        text = "\n\n---\n\n".join(t for t, _ in small)
+        chunks.append(Chunk(text, small[0][1], estimate_tokens(text)))
+        small.clear()
+    for ex in exchanges:
+        text = ex.render()
+        tokens = estimate_tokens(text)
+        if tokens < merge_below:
+            small.append((text, ex.occurred_at))
+            if sum(estimate_tokens(t) for t, _ in small) >= budget_tokens // 2:
+                flush_small()
+            continue
+        flush_small()
+        if tokens <= budget_tokens:
+            chunks.append(Chunk(text, ex.occurred_at, tokens))
+        else:
+            chunks.extend(_split_exchange(ex, budget_tokens))
+    flush_small()
+    return chunks
+def _split_exchange(ex: Exchange, budget_tokens: int) -> list[Chunk]:
+    """Split an oversized exchange at part boundaries into <= budget chunks.
+    Continuation chunks get a one-line header so each embeds with context.
+    """
+    user_first_line = " ".join(ex.user_text.split())
+    cont_header = f"(cont.) user asked: {user_first_line[:CONT_HEADER_CHARS]}"
+    head = f"## User ({normalize_timestamp(ex.occurred_at)})\n{ex.user_text.strip()}"
+    segments = [head]
+    if ex.parts:
+        segments.append("## Assistant")
+        segments.extend(p.strip() for p in ex.parts if p.strip())
+    # Hard-split any single segment that alone exceeds the budget.
+    max_chars = budget_tokens * 4
+    flat: list[str] = []
+    for seg in segments:
+        while estimate_tokens(seg) > budget_tokens:
+            flat.append(seg[:max_chars])
+            seg = seg[max_chars:]
+        if seg:
+            flat.append(seg)
+    chunks: list[Chunk] = []
+    current: list[str] = []
+    current_tokens = 0
+    for seg in flat:
+        seg_tokens = estimate_tokens(seg)
+        header_cost = 0 if (current or not chunks) else estimate_tokens(cont_header)
+        if current and current_tokens + seg_tokens > budget_tokens:
+            text = "\n".join(current)
+            chunks.append(Chunk(text, ex.occurred_at, estimate_tokens(text)))
+            current = []
+            current_tokens = 0
+            header_cost = estimate_tokens(cont_header)
+        if not current and chunks:
+            current.append(cont_header)
+            current_tokens = header_cost
+        current.append(seg)
+        current_tokens += seg_tokens
+    if current:
+        text = "\n".join(current)
+        chunks.append(Chunk(text, ex.occurred_at, estimate_tokens(text)))
+    return chunks

snowpack/cli.py ADDED Viewed

@@ -0,0 +1,510 @@
+"""snowpack CLI: the agent-facing contract. Thin wiring only — logic lives
+in ingest/retrieval/storage.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+import typer
+from . import config
+from . import db as snowdb
+from .ingest import SETTLE_SECONDS, ingest_all
+from .providers import ProviderUnavailable, get_embedder
+from .retrieval import probe as run_probe
+from .storage import Storage
+app = typer.Typer(no_args_is_help=True, add_completion=False,
+                  help="Local-first agent memory for Claude Code.")
+obs_app = typer.Typer(no_args_is_help=True, help="Episode ingest and listing.")
+app.add_typer(obs_app, name="obs")
+def _open() -> Storage:
+    try:
+        conn = snowdb.connect(config.db_path())
+    except snowdb.SchemaError as e:
+        typer.echo(f"error: {e}", err=True)
+        raise typer.Exit(2) from e
+    return Storage(conn)
+def _embedder(storage: Storage):
+    return get_embedder(storage.meta())
+def _resolve_project(storage: Storage, project: str | None,
+                     all_projects: bool) -> int | None:
+    """slug flag > cwd auto-detect > warn and widen to all projects."""
+    if all_projects:
+        return None
+    if project:
+        p = storage.get_project_by_slug(project)
+        if p is None:
+            known = ", ".join(pr.slug for pr in storage.list_projects()) or "(none)"
+            typer.echo(f"error: unknown project {project!r}; known: {known}", err=True)
+            raise typer.Exit(2)
+        return p.id
+    found = storage.find_project_for_cwd(os.getcwd())
+    if found is None:
+        typer.echo("note: cwd not in a known project; searching all projects",
+                   err=True)
+        return None
+    return found.id
+@app.command()
+def init(
+    model: str = typer.Option(config.DEFAULT_EMBEDDING_MODEL,
+                              help="Embedding model (must be pulled in Ollama)."),
+    dim: int = typer.Option(None,
+                            help="Embedding dimension. Usually omit: detected "
+                                 "from the running provider, or looked up for "
+                                 "known models."),
+    provider: str = typer.Option(config.DEFAULT_EMBEDDING_PROVIDER,
+                                 help="Embedding provider: ollama."),
+):
+    """Create and configure ~/.snowpack/snowpack.db.
+    The embedding dimension is baked into the database at init (vec0 columns
+    are fixed-width), so init verifies it against the live model when it can.
+    """
+    from .providers import detect_embedding_dim
+    detected = detect_embedding_dim(provider, model)
+    known = config.KNOWN_MODEL_DIMS.get(model)
+    if dim is None:
+        dim = detected or known
+        if dim is None:
+            typer.echo(
+                f"error: can't determine the embedding dimension for "
+                f"{model!r}: the provider isn't reachable and the model "
+                f"isn't in the known list. Start it (e.g. `ollama serve` + "
+                f"`ollama pull {model}`) or pass --dim explicitly.",
+                err=True)
+            raise typer.Exit(2)
+    elif detected is not None and detected != dim:
+        typer.echo(
+            f"error: model {model!r} produces {detected}-d embeddings, "
+            f"but --dim {dim} was given. Re-run without --dim, or with "
+            f"--dim {detected}.",
+            err=True)
+        raise typer.Exit(2)
+    path = config.db_path()
+    conn = snowdb.connect(path, create=True)
+    cfg = config.EmbeddingConfig.default(model=model, dim=dim, provider=provider)
+    try:
+        snowdb.init_schema(conn, dim, cfg.init_meta())
+    except snowdb.SchemaError as e:
+        typer.echo(f"error: {e}", err=True)
+        raise typer.Exit(2) from e
+    typer.echo(f"initialized {path} (model={model}, dim={dim})")
+    if detected is None:
+        typer.echo(
+            f"note: embedding provider not reachable — dimension {dim} taken "
+            f"from {'--dim' if known is None else 'the known-models table'} "
+            f"and not verified. Episodes will ingest without vectors until "
+            f"Ollama is up (`ollama serve`, `ollama pull {model}`); the next "
+            f"`snowpack obs ingest` backfills them.",
+            err=True)
+@obs_app.command("ingest")
+def obs_ingest(
+    settle: int = typer.Option(SETTLE_SECONDS,
+                               help="Seconds a file must be idle before its "
+                                    "trailing exchange is ingested."),
+    quiet: bool = typer.Option(False, "--quiet", "-q"),
+):
+    """Ingest new Claude Code transcript exchanges (incremental, idempotent)."""
+    storage = _open()
+    report = ingest_all(storage, _embedder(storage),
+                        config.claude_projects_dir(), settle_seconds=settle)
+    if report.embedder_down:
+        typer.echo("warning: embedding provider unreachable; episodes stored "
+                   "without vectors (re-run ingest to backfill)", err=True)
+    for err in report.errors:
+        typer.echo(f"warning: {err}", err=True)
+    if not quiet:
+        typer.echo(
+            f"files: {report.files_seen} seen, {report.files_processed} with new content; "
+            f"episodes: +{report.episodes_added} ({report.episodes_deduped} deduped); "
+            f"embedded: {report.embedded} episodes + {report.facts_embedded} "
+            f"facts ({report.embedding_pending} pending)"
+        )
+@obs_app.command("extract")
+def obs_extract(
+    limit: int = typer.Option(50, "--limit", "-n",
+                              help="Max episodes to process this run."),
+    retry_failed: bool = typer.Option(False, "--retry-failed"),
+):
+    """Extract durable facts from un-extracted episodes (uses the API)."""
+    from .extraction import run_extraction
+    from .providers import get_extractor
+    storage = _open()
+    try:
+        extractor = get_extractor(storage.meta())
+    except ProviderUnavailable as e:
+        typer.echo(f"error: {e}", err=True)
+        raise typer.Exit(3) from e
+    report = run_extraction(storage, extractor, _embedder(storage),
+                            limit=limit, retry_failed=retry_failed)
+    for err in report.errors:
+        typer.echo(f"warning: {err}", err=True)
+    f = report.facts
+    typer.echo(
+        f"episodes: {report.episodes_processed} processed, "
+        f"{report.episodes_failed} failed; "
+        f"facts: +{f.added} added, {f.superseded} superseded, "
+        f"{f.deduped} deduped, {f.dropped} dropped"
+    )
+@obs_app.command("list")
+def obs_list(
+    project: str = typer.Option(None, "--project", "-p"),
+    all_projects: bool = typer.Option(False, "--all-projects"),
+    limit: int = typer.Option(20, "--limit", "-n"),
+):
+    """List recent episodes."""
+    storage = _open()
+    pid = _resolve_project(storage, project, all_projects)
+    for row in storage.list_episodes(pid, limit):
+        snippet = " ".join(row.content.split())[:100]
+        typer.echo(f"[e{row.id}] {row.occurred_at}  {row.project_slug:<16} {snippet}")
+@app.command()
+def probe(
+    query: str = typer.Argument(..., help="What to recall."),
+    project: str = typer.Option(None, "--project", "-p"),
+    all_projects: bool = typer.Option(False, "--all-projects"),
+    k: int = typer.Option(8, "-k"),
+    kind: str = typer.Option("hybrid", "--kind",
+                             help="hybrid | episodes | facts"),
+    json_out: bool = typer.Option(False, "--json"),
+    full: bool = typer.Option(False, "--full", help="Print full episode text."),
+    no_log: bool = typer.Option(False, "--no-log", help="Skip telemetry logging."),
+):
+    """Hybrid retrieval (vector + keyword + graph + recency) with telemetry."""
+    if kind not in ("hybrid", "episodes", "facts"):
+        typer.echo(f"error: bad --kind {kind!r}", err=True)
+        raise typer.Exit(2)
+    storage = _open()
+    pid = _resolve_project(storage, project, all_projects)
+    embedder = _embedder(storage)
+    res = run_probe(storage, embedder, query, project_id=pid, k=k, kind=kind,
+                    session_id=os.environ.get("CLAUDE_SESSION_ID"),
+                    log=not no_log)
+    if not res.vector_used:
+        typer.echo("note: embeddings unavailable; keyword+recency only", err=True)
+    def describe(h):
+        """(date, project, text) for either memory kind."""
+        if h.kind == "episode":
+            ep = res.episodes.get(h.id)
+            if ep is None:
+                return None
+            return ep.occurred_at, ep.project_slug, ep.content
+        fact = res.facts.get(h.id)
+        if fact is None:
+            return None
+        return (fact["valid_from"], fact["project_slug"] or "global",
+                fact["statement"])
+    if json_out:
+        results = []
+        for h in res.hits:
+            d = describe(h)
+            if d is None:
+                continue
+            occurred_at, project_slug, content = d
+            results.append({
+                "id": f"{h.kind[0]}{h.id}",
+                "kind": h.kind,
+                "score": round(h.score, 4),
+                "channel": h.channel,
+                "channels": list(h.channels),
+                "occurred_at": occurred_at,
+                "project": project_slug,
+                "content": content,
+            })
+        typer.echo(json.dumps({
+            "retrieval_id": res.retrieval_id,
+            "latency_ms": res.latency_ms,
+            "results": results,
+        }, indent=2))
+        return
+    if not res.hits:
+        typer.echo("no memories found")
+        return
+    for h in res.hits:
+        d = describe(h)
+        if d is None:
+            continue
+        occurred_at, project_slug, content = d
+        date = occurred_at[:10]
+        if full:
+            typer.echo(f"[{h.kind[0]}{h.id}] {h.score:.3f}  {date}  {project_slug}")
+            typer.echo(content)
+            typer.echo("---")
+        else:
+            snippet = " ".join(content.split())[:160]
+            typer.echo(f"[{h.kind[0]}{h.id}] {h.score:.3f}  {date}  "
+                       f"{project_slug:<16} {snippet}")
+    if res.retrieval_id is not None:
+        top = res.hits[0]
+        typer.echo(f"retrieval:{res.retrieval_id}   mark useful: "
+                   f"snowpack feedback {res.retrieval_id} --used {top.kind[0]}{top.id}")
+def _parse_memory_ids(raw: str) -> list[tuple[str, int]]:
+    kinds = {"e": "episode", "f": "fact"}
+    out = []
+    for piece in raw.split(","):
+        piece = piece.strip()
+        if not piece:
+            continue
+        if piece[0].lower() not in kinds or not piece[1:].isdigit():
+            typer.echo(f"error: bad memory id {piece!r} (expected e123 or f45)",
+                       err=True)
+            raise typer.Exit(2)
+        out.append((kinds[piece[0].lower()], int(piece[1:])))
+    return out
+@app.command()
+def feedback(
+    retrieval_id: int = typer.Argument(...),
+    used: str = typer.Option("", "--used", help="Comma-separated ids, e.g. e123,f45."),
+    unused: str = typer.Option("", "--unused"),
+):
+    """Mark probe results as used/unused — the gold telemetry signal."""
+    storage = _open()
+    n = storage.set_feedback(retrieval_id, _parse_memory_ids(used),
+                             _parse_memory_ids(unused))
+    storage.commit()
+    if n == 0:
+        typer.echo("warning: no matching retrieval results", err=True)
+        raise typer.Exit(1)
+    typer.echo(f"updated {n} result(s)")
+@app.command()
+def stash(
+    text: str = typer.Argument(None, help="Scratchpad markdown; '-' for stdin."),
+    project: str = typer.Option(None, "--project", "-p"),
+    show: bool = typer.Option(False, "--show", help="Print the current stash."),
+    archive: bool = typer.Option(False, "--archive",
+                                 help="Convert the stash into an episode and clear it."),
+):
+    """Working-memory checkpoint: one scratchpad per project.
+    Survives compaction and session restarts; archive it when a task completes.
+    """
+    storage = _open()
+    pid = _resolve_project(storage, project, all_projects=False)
+    if pid is None:
+        typer.echo("error: no project for cwd; pass --project", err=True)
+        raise typer.Exit(2)
+    if show:
+        row = storage.get_session_state(pid)
+        if row is None:
+            typer.echo("no stash for this project")
+        else:
+            typer.echo(f"# stash (updated {row['updated_at']})")
+            typer.echo(row["state"])
+        return
+    if archive:
+        row = storage.get_session_state(pid)
+        if row is None:
+            typer.echo("error: nothing stashed to archive", err=True)
+            raise typer.Exit(1)
+        from .models import content_sha256, estimate_tokens
+        content = f"## Working-memory checkpoint (archived)\n{row['state']}"
+        session_id = row["session_id"] or "stash"
+        episode_id = storage.insert_episode(
+            project_id=pid, session_id=session_id, source_path=None,
+            content=content, content_hash=content_sha256(content),
+            token_count=estimate_tokens(content),
+            occurred_at=row["updated_at"],
+        )
+        storage.clear_session_state(pid)
+        storage.commit()
+        typer.echo(f"archived stash as episode e{episode_id}"
+                   if episode_id else "archived (duplicate of an existing episode)")
+        return
+    if text is None:
+        typer.echo("error: provide TEXT, '-' for stdin, --show, or --archive",
+                   err=True)
+        raise typer.Exit(2)
+    if text == "-":
+        text = sys.stdin.read()
+    if not text.strip():
+        typer.echo("error: empty stash", err=True)
+        raise typer.Exit(2)
+    storage.upsert_session_state(pid, os.environ.get("CLAUDE_SESSION_ID"), text)
+    storage.commit()
+    typer.echo("stashed")
+@app.command()
+def stats(
+    refresh: bool = typer.Option(False, "--refresh",
+                                 help="Recompute usefulness from telemetry first."),
+):
+    """Memory and telemetry overview (snotel)."""
+    storage = _open()
+    if refresh:
+        n = storage.refresh_memory_stats()
+        storage.commit()
+        typer.echo(f"refreshed usefulness for {n} memories")
+    s = storage.stats_summary()
+    typer.echo(
+        f"projects: {s['projects']}  episodes: {s['episodes']}  "
+        f"facts: {s['facts_current']} current / {s['facts_superseded']} superseded  "
+        f"entities: {s['entities']}"
+    )
+    typer.echo(
+        f"retrievals: {s['retrievals']}  zero-result gaps: {s['gaps']}  "
+        f"pending embeddings: {s['pending_embeddings']}"
+    )
+    if s["channel_winrate"]:
+        typer.echo("channel win-rate (returned / used / rate):")
+        for row in s["channel_winrate"]:
+            typer.echo(f"  {row['channel']:<8} {row['results_returned']:>6} "
+                       f"{row['results_used']:>6}   {row['use_rate']}")
+    if s["weak_layers"]:
+        typer.echo("persistent weak layers (retrieved, never used):")
+        for row in s["weak_layers"]:
+            stmt = " ".join(row["statement"].split())[:80]
+            typer.echo(f"  [f{row['id']}] x{row['retrieval_count']}  {stmt}")
+sinter_app = typer.Typer(no_args_is_help=False,
+                         help="Procedural distillation: mine corrections "
+                              "into CLAUDE.md candidates.")
+app.add_typer(sinter_app, name="sinter")
+@sinter_app.callback(invoke_without_command=True)
+def sinter_run(ctx: typer.Context):
+    """Mine episodes for repeated corrections and propose candidates."""
+    if ctx.invoked_subcommand is not None:
+        return
+    from .sinter import run_sinter
+    storage = _open()
+    report = run_sinter(storage)
+    typer.echo(f"mined {report.episodes_mined} correction episodes; "
+               f"{report.clusters_found} qualifying clusters; "
+               f"{report.candidates_created} new candidates")
+    if report.candidate_ids:
+        typer.echo("review with: snowpack sinter review")
+@sinter_app.command("review")
+def sinter_review():
+    """List proposed candidates."""
+    storage = _open()
+    rows = storage.list_procedure_candidates("proposed")
+    if not rows:
+        typer.echo("no proposed candidates")
+        return
+    for row in rows:
+        typer.echo(f"--- candidate {row['id']} "
+                   f"(evidence: {row['evidence_count']} episodes) ---")
+        typer.echo(row["rationale"] or "")
+        typer.echo(row["pattern"])
+    typer.echo("accept/reject with: snowpack sinter accept|reject <id>")
+def _decide(candidate_id: int, status: str) -> None:
+    storage = _open()
+    if not storage.decide_procedure_candidate(candidate_id, status):
+        typer.echo(f"error: no proposed candidate {candidate_id}", err=True)
+        raise typer.Exit(2)
+    storage.commit()
+    if status == "accepted":
+        typer.echo("accepted — paste the pattern into CLAUDE.md or a skill")
+    else:
+        typer.echo("rejected")
+@sinter_app.command("accept")
+def sinter_accept(candidate_id: int = typer.Argument(...)):
+    """Accept a candidate (then paste its pattern into CLAUDE.md)."""
+    _decide(candidate_id, "accepted")
+@sinter_app.command("reject")
+def sinter_reject(candidate_id: int = typer.Argument(...)):
+    """Reject a candidate; its episodes won't be re-proposed."""
+    _decide(candidate_id, "rejected")
+@app.command()
+def pit(
+    port: int = typer.Option(8617, "--port"),
+    no_browser: bool = typer.Option(False, "--no-browser"),
+):
+    """Open the pit: read-only graph + stats UI. Binds 127.0.0.1 only."""
+    storage = _open()  # validates the DB exists before binding a port
+    storage.refresh_memory_stats()
+    storage.commit()
+    storage.conn.close()
+    from .pit import serve
+    try:
+        serve(config.db_path(), port=port, open_browser=not no_browser)
+    except OSError as e:
+        typer.echo(f"error: can't bind port {port} ({e.strerror}); "
+                   f"try --port", err=True)
+        raise typer.Exit(2) from e
+entity_app = typer.Typer(no_args_is_help=True, help="Entity maintenance.")
+app.add_typer(entity_app, name="entity")
+@entity_app.command("merge")
+def entity_merge(
+    alias: str = typer.Argument(..., help="Duplicate entity name."),
+    canonical: str = typer.Argument(..., help="Entity it should point to."),
+):
+    """Mark one entity as an alias of another and repoint its facts."""
+    storage = _open()
+    a = storage.get_entity_by_name(alias)
+    c = storage.get_entity_by_name(canonical)
+    if a is None or c is None:
+        missing = alias if a is None else canonical
+        typer.echo(f"error: no entity named {missing!r}", err=True)
+        raise typer.Exit(2)
+    canonical_id = c["canonical_id"] or c["id"]
+    if a["id"] == canonical_id:
+        typer.echo("error: those are already the same entity", err=True)
+        raise typer.Exit(2)
+    storage.merge_entities(a["id"], canonical_id)
+    storage.commit()
+    typer.echo(f"merged {a['name']!r} -> {c['name']!r}")
+def main() -> None:  # pragma: no cover
+    try:
+        app()
+    except ProviderUnavailable as e:
+        typer.echo(f"error: {e}", err=True)
+        sys.exit(3)
+if __name__ == "__main__":  # pragma: no cover
+    main()