PyPI - stasima - Versions diffs - 1.0.0__py3-none-any.whl - Mend

stasima 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

stasima/__init__.py +4 -0
stasima/admin.py +259 -0
stasima/airlock.py +217 -0
stasima/audit_log.py +179 -0
stasima/authz.py +52 -0
stasima/canon.py +109 -0
stasima/cap_server.py +491 -0
stasima/config.py +150 -0
stasima/entries.py +46 -0
stasima/local_capstore.py +472 -0
stasima/map_index.py +279 -0
stasima/orientation.py +77 -0
stasima-1.0.0.dist-info/METADATA +110 -0
stasima-1.0.0.dist-info/RECORD +18 -0
stasima-1.0.0.dist-info/WHEEL +4 -0
stasima-1.0.0.dist-info/entry_points.txt +3 -0
stasima-1.0.0.dist-info/licenses/LICENSE +201 -0
stasima-1.0.0.dist-info/licenses/NOTICE +12 -0

stasima/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Stasima — a server letting multiple AI instances share one durable, version-controlled
+body of knowledge, with a human practitioner as the gate to shared truth."""
+__version__ = "1.0.0"

stasima/admin.py ADDED Viewed

@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: Apache-2.0
+"""
+Admin CLI — practitioner-side maintenance + promotion. NOT model-facing: these are operator ops,
+and `land` (promotion to canon) IS the human gate, performed here out of band.
+    stasima-admin --config stasima.toml <command>
+      bootstrap <dir>     seed an EMPTY canon from a folder of .md entries (one-time)
+      totp-provision      generate the airlock TOTP secret (prints the otpauth:// URI)
+      totp-check <code>   verify a code from your app (consumes nothing; diagnoses clock skew)
+      inbox [--all] [--read PATH]   the practitioner's mail, from the cockpit (pull)
+      backup <dest>       full backup of everything that is truth: git mirror (all refs+tags),
+                          consistent audit snapshot, config, TOTP secret. Run it anywhere — a
+                          synced folder, an external drive, another machine over a share.
+      status              canon head, perspectives, proposals, audit health
+      reindex             rebuild the MAP index from git
+      reconcile           backfill audit events for committed ops missing one
+      verify              check the audit chain (+ the git-anchored checkpoint)
+      anchor              write the audit head into git now
+      preview <id>        dry-run a proposal merge (conflicts / changed paths)
+      land <id> [--by X]  approve + land a proposal to canon (audit + reindex + anchor)
+"""
+import argparse
+import json
+import os
+import subprocess as sp
+import sys
+import time
+from .config import Config
+from .entries import compose_entry
+from .cap_server import components_from_config
+from .canon import reindex_from_git, land_and_record, canon_seq, seq_display, LOG_DIR
+from .audit_log import reconcile_from_git, anchor_audit_head, verify_against_anchor
+from .local_capstore import Approval, MergeConflict, PERSP_PREFIX as PERSP, PROP_PREFIX as PROP
+from .airlock import generate_secret, otpauth_uri, verify_code, totp_at, STEP
+def _first_heading(text: str):
+    for line in text.splitlines():
+        if line.startswith("# "):
+            return line[2:].strip()
+    return None
+def _qr_ascii(data: str):
+    """ASCII QR of `data`, or None if the optional qrcode package isn't installed."""
+    try:
+        import qrcode
+    except ImportError:
+        return None
+    import io
+    qr = qrcode.QRCode(border=2)
+    qr.add_data(data)
+    qr.make(fit=True)
+    buf = io.StringIO()
+    qr.print_ascii(out=buf, invert=True)   # dark-terminal polarity; the URI below is the fallback
+    return buf.getvalue()
+def run(args) -> dict:
+    cfg = Config.load(args.config)
+    store, index, embedder, audit, authz, airlock = components_from_config(cfg)
+    if args.cmd == "totp-provision":
+        issuer = cfg.deployment_name or "Stasima"
+        uri = lambda s: otpauth_uri(s, label=f"{issuer}:practitioner", issuer=issuer)
+        path = cfg.resolved_airlock_secret()
+        if os.path.exists(path) and not args.force:
+            if args.qr:   # re-display the EXISTING secret's QR — no rotation
+                with open(path, encoding="utf-8") as f:
+                    secret = f.read().strip()
+                qr = _qr_ascii(uri(secret))
+                print(qr if qr else "(pip install qrcode for a scannable QR)")
+                return {"secret_path": path, "otpauth_uri": uri(secret), "rotated": False,
+                        "note": "existing secret re-displayed; scan the QR or enter the secret= value manually"}
+            raise SystemExit(f"secret already exists at {path} — pass --force to rotate "
+                             f"(rotating invalidates the practitioner's current authenticator entry), "
+                             f"or --qr to re-display it")
+        secret = generate_secret()
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(secret + "\n")
+        if args.qr:
+            qr = _qr_ascii(uri(secret))
+            print(qr if qr else "(pip install qrcode for a scannable QR)")
+        return {"secret_path": path, "otpauth_uri": uri(secret),
+                "note": "scan the QR (or enter the secret= value manually); the secret stays "
+                        "server-side, never in git — if the QR won't scan, terminal polarity is the "
+                        "usual culprit; the manual key always works"}
+    if args.cmd == "totp-check":
+        # verification only — consumes no windows, approves nothing; safe to run as often as you like
+        spath = cfg.resolved_airlock_secret()
+        if not os.path.exists(spath):
+            raise SystemExit(f"no secret at {spath} — run totp-provision first")
+        with open(spath, encoding="utf-8") as f:
+            secret = f.read().strip()
+        now = time.time()
+        w = verify_code(secret, args.code, now)
+        if w is not None:
+            return {"valid": True, "matched_window": w, "current_window": int(now // STEP),
+                    "note": "your authenticator and the server agree — the airlock will accept codes"}
+        cur = int(now // STEP)
+        for delta in range(-10, 11):                       # diagnose clock skew beyond the ±1 acceptance
+            if totp_at(secret, cur + delta) == str(args.code).strip():
+                return {"valid": False, "matched_window": cur + delta,
+                        "skew": f"{delta:+d} windows (≈ {delta * STEP:+d}s)",
+                        "note": "code is from the right secret but outside the ±1-window acceptance — "
+                                "sync the server or phone clock"}
+        return {"valid": False,
+                "note": "no match within ±10 windows — mistyped code, or the app holds a different/old secret "
+                        "(re-provision with --force and re-add to the app)"}
+    if args.cmd == "bootstrap":
+        if not os.path.isdir(os.path.join(cfg.git_dir, "objects")):
+            sp.run(["git", "init", "--bare", "-q", cfg.git_dir], check=True)   # create the bare repo if missing
+        if store.resolve_ref(cfg.canon_ref) is not None:
+            raise SystemExit("canon already exists — add entries via propose + land, not bootstrap")
+        changes = {}
+        for root, _, files in os.walk(args.seed_dir):
+            for fn in sorted(files):
+                if not fn.endswith(".md"):
+                    continue
+                rel = os.path.relpath(os.path.join(root, fn), args.seed_dir).replace(os.sep, "/")
+                with open(os.path.join(root, fn), encoding="utf-8") as f:
+                    text = f.read()
+                if not text.lstrip().startswith("---"):    # plain markdown -> wrap with a sensible envelope
+                    title = _first_heading(text) or os.path.splitext(fn)[0].replace("-", " ").title()
+                    etype = "ori" if rel.startswith("technical/orientation/") else "kno"
+                    text = compose_entry({"type": etype, "title": title, "status": "active"}, text)
+                changes[rel] = text.encode()
+        if not changes:
+            raise SystemExit(f"no .md files found under {args.seed_dir!r}")
+        r = store.bootstrap_canon(changes, "Bootstrap canon")
+        return {"bootstrapped": r.oid, "entries": sorted(changes), "indexed": reindex_from_git(store, index, embedder)}
+    if args.cmd == "status":
+        ok, bad = audit.verify()
+        unread = [m for m in index.inbox("practitioner") if not audit.is_read("practitioner", m.path)]
+        return {"canon_head": store.resolve_ref(cfg.canon_ref),
+                "canon_seq": seq_display(canon_seq(store, cfg.seq_origin)),
+                "perspectives": [r.name[len(PERSP):] for r in store.list_refs(PERSP)],
+                "proposals": [r.name[len(PROP):] for r in store.list_refs(PROP)],
+                "staged": airlock.staged(),
+                "practitioner_unread": len(unread),
+                "audit_events": audit.count(), "audit_verify_ok": ok,
+                "audit_vs_anchor": verify_against_anchor(store, audit)}
+    if args.cmd == "inbox":
+        if args.read:
+            audit.append_read("practitioner", args.read)
+            return {"marked_read": args.read}
+        msgs = index.inbox("practitioner")
+        if not args.all:
+            msgs = [m for m in msgs if not audit.is_read("practitioner", m.path)]
+        return {"unread" if not args.all else "all":
+                [{"path": m.path, "from": m.authoring_instance, "subject": m.subject,
+                  "coordinates": m.links} for m in msgs],
+                "note": "read a message body with: kip_get equivalent -> git show <perspective>:<path>; "
+                        "mark handled with: inbox --read <path>"}
+    if args.cmd == "backup":
+        # everything that is TRUTH, in one destination: full-ref git mirror (consistent by nature),
+        # a consistent audit snapshot (sqlite backup API, safe against a live server), config + secret.
+        # The map index is a derived cache and is deliberately not backed up.
+        import shutil
+        import sqlite3 as _sq
+        os.makedirs(args.dest, exist_ok=True)
+        mirror = os.path.join(args.dest, "stasima-mirror.git")
+        if not os.path.isdir(os.path.join(mirror, "objects")):
+            sp.run(["git", "init", "--bare", "-q", mirror], check=True)
+        store.set_remote("backup", mirror)
+        sync = store.push_all("backup")
+        audit_copy = os.path.join(args.dest, "audit.sqlite")
+        dst = _sq.connect(audit_copy)
+        audit.conn.backup(dst)
+        dst.close()
+        copied = ["stasima-mirror.git", "audit.sqlite"]
+        for src in (args.config, cfg.resolved_airlock_secret()):
+            if src and os.path.exists(src):
+                shutil.copy2(src, args.dest)
+                copied.append(os.path.basename(src))
+        ok = not sync["missing_on_remote"] and not sync["oid_mismatch"]
+        return {"dest": args.dest, "git_sync_ok": ok, "synced_refs": len(sync["synced"]),
+                "audit_events": audit.count(), "copied": copied}
+    if args.cmd == "reindex":
+        return {"reindexed": reindex_from_git(store, index, embedder)}
+    if args.cmd == "reconcile":
+        return {"backfilled": reconcile_from_git(store, audit)}
+    if args.cmd == "verify":
+        ok, bad = audit.verify()
+        return {"audit_verify_ok": ok, "first_bad_seq": bad,
+                "audit_vs_anchor": verify_against_anchor(store, audit)}
+    if args.cmd == "anchor":
+        return {"anchored": anchor_audit_head(store, audit)}
+    if args.cmd == "preview":
+        s = store.preview_merge(PROP + args.proposal_id, cfg.canon_ref)
+        logs = [p for p in s.changed_paths if p.startswith(LOG_DIR)]
+        return {"conflicts": s.conflicts, "changed_paths": s.changed_paths, "authors": s.authoring_instances,
+                "log_entries": logs, "log_entry_ok": len(logs) == 1,
+                "expected_seq": format(canon_seq(store, cfg.seq_origin) + 1, "x")}
+    if args.cmd == "land":
+        approver = args.by or sorted(cfg.approvers)[0]
+        if approver not in cfg.approvers:
+            raise SystemExit(f"{approver!r} is not a configured approver ({sorted(cfg.approvers)})")
+        try:
+            prepared = store.prepare_merge(PROP + args.proposal_id, cfg.canon_ref)
+        except MergeConflict as e:
+            raise SystemExit(f"conflict — not landing: {e}")
+        try:
+            return land_and_record(store, index, embedder, audit, prepared,
+                                   Approval(prepared.candidate_oid, approver, "cli-confirm"),
+                                   origin=cfg.seq_origin)
+        except ValueError as e:
+            raise SystemExit(f"not landing: {e}")
+    raise SystemExit(f"unknown command {args.cmd!r}")
+def build_parser() -> argparse.ArgumentParser:
+    ap = argparse.ArgumentParser(prog="stasima-admin", description="Stasima maintenance + promotion")
+    ap.add_argument("--config", default=os.environ.get("STASIMA_CONFIG"))
+    sub = ap.add_subparsers(dest="cmd", required=True)
+    for c in ("status", "reindex", "reconcile", "verify", "anchor"):
+        sub.add_parser(c)
+    sub.add_parser("bootstrap").add_argument("seed_dir", help="folder of .md entries to seed an empty canon")
+    tp = sub.add_parser("totp-provision")
+    tp.add_argument("--force", action="store_true", help="rotate an existing secret")
+    tp.add_argument("--qr", action="store_true", help="render a scannable ASCII QR (re-displays if the secret exists)")
+    sub.add_parser("totp-check").add_argument("code", help="a code from your authenticator app")
+    sub.add_parser("backup").add_argument("dest", help="destination folder for the full backup")
+    ib = sub.add_parser("inbox")
+    ib.add_argument("--all", action="store_true", help="include already-read messages")
+    ib.add_argument("--read", default=None, metavar="PATH", help="mark a message path as read")
+    sub.add_parser("preview").add_argument("proposal_id")
+    land = sub.add_parser("land")
+    land.add_argument("proposal_id")
+    land.add_argument("--by", default=None, help="approver (defaults to the first configured)")
+    return ap
+def main(argv=None) -> dict:
+    try:   # Windows consoles default to cp1252, which can't print the QR block chars (or em dashes)
+        sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    except Exception:
+        pass
+    result = run(build_parser().parse_args(argv))
+    print(json.dumps(result, indent=2, default=str))
+    return result
+if __name__ == "__main__":
+    main()

stasima/airlock.py ADDED Viewed

@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: Apache-2.0
+"""
+Airlock — TOTP two-phase remote approval (the mediated-channel counterpart of console `land`).
+At the console, the console itself is the out-of-band channel, so admin `land` stays a single
+approval. When the practitioner approves *through an instance conversation* (phone, relay), the
+channel is the thing to defend against: the airlock binds presence-proofs (TOTP codes) to the
+existing prepare/land two-phase gate so no single code — harvested, relayed, or replayed — can
+both stage and land, and nothing can land unreviewed.
+    open --code 1--> staged --(review: floor..ceiling)--code 2--> landed
+      ^                |
+      +---- revert ----+    (abort: FREE, no code; TTL expiry: lazy auto-revert)
+Why the 120s floor: a TOTP code lives at most ~90s (30s step, +/-1 window acceptance), so any code
+visible at staging time is arithmetically dead by the earliest legal landing moment. Strict window
+ordering (code 2 strictly later than code 1) and consume-once (a window number is never accepted
+twice for one purpose) close the same-window and replay paths as defense in depth. Content-binding
+(landing names the staged oid) means what lands is exactly what was staged — a swap fails closed.
+Abort is free by design: charging presence-proof to *decline* would incentivize landing.
+Honest residual: the practitioner's view of what was staged flows through the relaying instance.
+Content-binding makes swap-after-stage impossible and the audit trail makes deception detectable
+after the fact; it does NOT make the relay's display trustworthy in the moment. The console remains
+the stronger channel.
+State (open | staged | landed, with revert folding back to open) is derived from the audit log —
+staging is operational, not content; nothing here touches the storage spine. The clock is
+injectable for tests; server time is authoritative for every gate.
+"""
+import base64
+import hashlib
+import hmac
+import os
+import struct
+import time
+from .local_capstore import MergePreparation, MergeSummary, Approval, PROP_PREFIX as PROP
+STEP = 30                      # RFC 6238 time step (seconds)
+DIGITS = 6
+class AirlockError(Exception):
+    """A gate refused. The message names the failed gate and both values where applicable."""
+# ---------------------------------------------------------------- TOTP (RFC 6238, stdlib only)
+def generate_secret() -> str:
+    return base64.b32encode(os.urandom(20)).decode()
+def otpauth_uri(secret: str, label: str = "Stasima:practitioner", issuer: str = "Stasima") -> str:
+    return (f"otpauth://totp/{label}?secret={secret}&issuer={issuer}"
+            f"&algorithm=SHA1&digits={DIGITS}&period={STEP}")
+def totp_at(secret: str, window: int) -> str:
+    """The code for an absolute window number (window = unix_time // STEP)."""
+    key = base64.b32decode(secret.strip(), casefold=True)
+    mac = hmac.new(key, struct.pack(">Q", window), hashlib.sha1).digest()
+    off = mac[-1] & 0x0F
+    code = (struct.unpack(">I", mac[off:off + 4])[0] & 0x7FFFFFFF) % (10 ** DIGITS)
+    return f"{code:0{DIGITS}d}"
+def verify_code(secret: str, code: str, now: float):
+    """Accept the current window +/-1 (clock skew). Returns the MATCHED window number, else None."""
+    w = int(now // STEP)
+    for cand in (w, w - 1, w + 1):
+        if hmac.compare_digest(totp_at(secret, cand), str(code).strip()):
+            return cand
+    return None
+# ---------------------------------------------------------------- the gate
+class Airlock:
+    def __init__(self, store, audit, *, secret_path, land_fn, validate_fn, approver,
+                 floor_s: int = 120, ceiling_s: int = 7200, clock=time.time, prop_prefix: str = PROP):
+        # floor must exceed worst-case code lifetime (STEP + one window of skew acceptance ~= 90s)
+        # so that no code obtained at staging survives to the landing moment.
+        self.store = store
+        self.audit = audit
+        self.secret_path = secret_path
+        self.land_fn = land_fn          # (prepared, approval) -> land_and_record result
+        self.validate_fn = validate_fn  # (prepared) -> log-entry seq (early feedback; land re-validates)
+        self.approver = approver
+        self.floor_s = floor_s
+        self.ceiling_s = ceiling_s
+        self.clock = clock
+        self.prop_prefix = prop_prefix
+    # ---- secret + code consumption ----
+    def _secret(self) -> str:
+        if not os.path.exists(self.secret_path):
+            raise AirlockError("airlock not provisioned — run: admin totp-provision")
+        with open(self.secret_path, encoding="utf-8") as f:
+            return f.read().strip()
+    def _consume(self, code, purpose, obj, min_window=None) -> int:
+        """Verify a code and burn its window for this purpose. Codes are never logged — windows are."""
+        w = verify_code(self._secret(), code, self.clock())
+        if w is None:
+            self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
+                                                               "reason": "invalid"})
+            raise AirlockError("invalid code")
+        if min_window is not None and w <= min_window:
+            self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
+                                                               "window": w, "min_window": min_window,
+                                                               "reason": "not strictly later"})
+            raise AirlockError(f"code is from window {w}, which is not strictly later than the staging "
+                               f"window {min_window} — wait for a fresh code")
+        for e in self.audit.events(op="totp_accept"):
+            if e["detail"].get("window") == w and e["detail"].get("purpose") == purpose:
+                self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
+                                                                   "window": w, "reason": "replay"})
+                raise AirlockError(f"a code from window {w} was already used for {purpose} (consume-once)")
+        self.audit.append("practitioner", "totp_accept", detail={"window": w, "purpose": purpose, "object": obj})
+        return w
+    # ---- state machine (derived from the audit log; TTL is lazy) ----
+    def _fold(self, proposal_id):
+        cur, ev = "open", None
+        full_ref = self.prop_prefix + proposal_id
+        for e in self.audit.events():
+            d = e["detail"]
+            if e["op"] == "airlock_stage" and d.get("proposal") == proposal_id:
+                cur, ev = "staged", e
+            elif e["op"] == "airlock_revert" and d.get("proposal") == proposal_id:
+                cur, ev = "open", e
+            elif e["op"] == "land_merge" and d.get("proposal") == full_ref:
+                cur, ev = "landed", e
+        return cur, ev
+    def state(self, proposal_id) -> dict:
+        st, e = self._fold(proposal_id)
+        if st == "staged":
+            d = e["detail"]
+            if self.clock() - d["staged_at"] > self.ceiling_s:   # lazy TTL: observed -> reverted
+                self.audit.append("system", "airlock_revert", target_ref=self.prop_prefix + proposal_id,
+                                  detail={"proposal": proposal_id, "reason": "ttl",
+                                          "staged_oid": d["staged_oid"]})
+                return {"state": "open", "reverted": "ttl"}
+            return {"state": "staged", "staged_oid": d["staged_oid"], "staged_at": d["staged_at"],
+                    "window": d["window"], "changed_paths": d.get("changed_paths", []),
+                    "lands_after": d["staged_at"] + self.floor_s,
+                    "expires_at": d["staged_at"] + self.ceiling_s}
+        return {"state": st}
+    def staged(self) -> list:
+        """Currently staged proposals (cockpit view)."""
+        return [{"proposal_id": pid, "staged_oid": st["staged_oid"],
+                 "lands_after": st["lands_after"], "expires_at": st["expires_at"]}
+                for pid, st in self._staged_proposals()]
+    def _staged_proposals(self):
+        seen, out = set(), []
+        for e in self.audit.events(op="airlock_stage"):
+            pid = e["detail"]["proposal"]
+            if pid in seen:
+                continue
+            seen.add(pid)
+            st = self.state(pid)
+            if st["state"] == "staged":
+                out.append((pid, st))
+        return out
+    # ---- the three ops ----
+    def stage(self, proposal_id, code) -> dict:
+        """Code 1: freeze the proposal, prepare the merge, start the review clock."""
+        if self.state(proposal_id)["state"] == "staged":
+            raise AirlockError(f"{proposal_id} is already staged")
+        # prove the proposal stageable BEFORE consuming the code — failures must not burn windows
+        prepared = self.store.prepare_merge(self.prop_prefix + proposal_id, self.store.canon_ref)
+        seq = self.validate_fn(prepared)
+        w = self._consume(code, "stage", proposal_id)
+        staged_at = self.clock()
+        self.audit.append("practitioner", "airlock_stage", target_ref=self.prop_prefix + proposal_id,
+                          result_oid=prepared.candidate_oid,
+                          detail={"proposal": proposal_id, "staged_oid": prepared.candidate_oid,
+                                  "window": w, "staged_at": staged_at, "seq": seq,
+                                  "changed_paths": prepared.summary.changed_paths})
+        return {"proposal_id": proposal_id, "staged_oid": prepared.candidate_oid,
+                "staged_at": staged_at, "lands_after": staged_at + self.floor_s,
+                "expires_at": staged_at + self.ceiling_s,
+                "changed_paths": prepared.summary.changed_paths, "log_seq": seq}
+    def land(self, staged_oid_prefix, code) -> dict:
+        """Code 2: after the review floor, strictly later window, bound to the staged content."""
+        if len(str(staged_oid_prefix)) < 8:
+            raise AirlockError("staged oid prefix too short — give at least 8 hex characters")
+        matches = [(pid, st) for pid, st in self._staged_proposals()
+                   if st["staged_oid"].startswith(staged_oid_prefix)]
+        if not matches:
+            raise AirlockError(f"no staged proposal matches oid prefix {staged_oid_prefix!r} (content-binding)")
+        if len(matches) > 1:
+            raise AirlockError(f"oid prefix {staged_oid_prefix!r} is ambiguous across staged proposals")
+        pid, st = matches[0]
+        elapsed = self.clock() - st["staged_at"]
+        if elapsed < self.floor_s:   # gates before code verification — a floor miss must not burn a window
+            raise AirlockError(f"review floor not met: {elapsed:.0f}s since staging, floor is "
+                               f"{self.floor_s}s — review, then retry with a fresh code")
+        w2 = self._consume(code, "land", pid, min_window=st["window"])
+        prepared = MergePreparation(candidate_oid=st["staged_oid"], into=self.store.canon_ref,
+                                    proposal_ref=self.prop_prefix + pid,
+                                    summary=MergeSummary(st.get("changed_paths", []), [], []))
+        return self.land_fn(prepared, Approval(st["staged_oid"], self.approver, f"airlock-totp-w{w2}"))
+    def revert(self, proposal_id) -> dict:
+        """Abort a staged review. FREE — never requires a code (charging for decline would
+        incentivize landing). Returns the proposal to open with its entries intact."""
+        if self.state(proposal_id)["state"] != "staged":
+            raise AirlockError(f"{proposal_id} is not staged")
+        self.audit.append("system", "airlock_revert", target_ref=self.prop_prefix + proposal_id,
+                          detail={"proposal": proposal_id, "reason": "manual"})
+        return {"proposal_id": proposal_id, "state": "open"}

stasima/audit_log.py ADDED Viewed

@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: Apache-2.0
+"""
+Audit log — the operation-layer TRUTH, complementary to git's content-layer truth.
+Records what git can't: ops that produce no commit (read-receipts, denials), the order/timing
+of operations, and outcomes (ok/error). Append-only, hash-chained. SQLite is its source of truth
+(git stays source of truth for the information itself).
+Scope: writes (state changes) and failures (what's breaking). Successful reads are observability,
+not logged; read-state IS logged (a read-receipt is a write-like, forensic event).
+The hash chain is tamper-EVIDENCE at this threat model (cooperative, single practitioner, no crypto):
+it detects accidental corruption, deletion, and reordering, and yields one head hash summarizing the
+whole history. It is not forgery-proof (no signature to forge) — signing the head is the additive
+upgrade. Per-canon-land, the head is anchored into git (replicated, durable), so the git substrate
+can witness tampering of the SQLite log.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import sqlite3
+from abc import ABC, abstractmethod
+from datetime import datetime, timezone
+from typing import Optional
+from .local_capstore import Identity, PERSP_PREFIX, PROP_PREFIX
+GENESIS = "0" * 64
+ANCHOR_REF = "refs/cap/audit-anchor"
+_HASHED = ["seq", "ts", "actor", "op", "target_ref", "target_path",
+           "op_id", "result_oid", "outcome", "detail", "prev_hash"]
+def _canonical(ev: dict) -> str:
+    return json.dumps({k: ev.get(k) for k in _HASHED}, sort_keys=True, default=str, separators=(",", ":"))
+def _hash(ev: dict) -> str:
+    return hashlib.sha256(_canonical(ev).encode()).hexdigest()
+class AuditLog(ABC):
+    @abstractmethod
+    def append(self, actor: str, op: str, *, target_ref=None, target_path=None,
+               op_id=None, result_oid=None, outcome="ok", detail=None) -> dict: ...
+    @abstractmethod
+    def head(self) -> str: ...
+    @abstractmethod
+    def count(self) -> int: ...
+    @abstractmethod
+    def verify(self) -> tuple[bool, Optional[int]]: ...
+    @abstractmethod
+    def head_at(self, seq: int) -> str: ...
+    @abstractmethod
+    def events(self, *, op=None, actor=None, op_id=None) -> list[dict]: ...
+    @abstractmethod
+    def append_read(self, instance_id: str, message_path: str) -> dict: ...
+    @abstractmethod
+    def is_read(self, instance_id: str, message_path: str) -> bool: ...
+class SqliteAuditLog(AuditLog):
+    def __init__(self, db_path: str = ":memory:"):
+        self.conn = sqlite3.connect(db_path)
+        self.conn.row_factory = sqlite3.Row
+        self.conn.execute(
+            """CREATE TABLE IF NOT EXISTS audit_events (
+                 seq INTEGER PRIMARY KEY, ts TEXT, actor TEXT, op TEXT,
+                 target_ref TEXT, target_path TEXT, op_id TEXT, result_oid TEXT,
+                 outcome TEXT, detail TEXT, prev_hash TEXT, hash TEXT)""")
+        self.conn.commit()
+    def _row(self, r: sqlite3.Row) -> dict:
+        d = dict(r)
+        d["detail"] = json.loads(d["detail"]) if d["detail"] else {}
+        return d
+    def append(self, actor, op, *, target_ref=None, target_path=None,
+               op_id=None, result_oid=None, outcome="ok", detail=None) -> dict:
+        ev = {"seq": self.count() + 1,
+              "ts": datetime.now(timezone.utc).isoformat(),
+              "actor": actor, "op": op, "target_ref": target_ref, "target_path": target_path,
+              "op_id": op_id, "result_oid": result_oid, "outcome": outcome,
+              "detail": detail or {}, "prev_hash": self.head()}
+        ev["hash"] = _hash(ev)
+        self.conn.execute(
+            "INSERT INTO audit_events VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
+            (ev["seq"], ev["ts"], ev["actor"], ev["op"], ev["target_ref"], ev["target_path"],
+             ev["op_id"], ev["result_oid"], ev["outcome"], json.dumps(ev["detail"]), ev["prev_hash"], ev["hash"]))
+        self.conn.commit()
+        return ev
+    def head(self) -> str:
+        r = self.conn.execute("SELECT hash FROM audit_events ORDER BY seq DESC LIMIT 1").fetchone()
+        return r["hash"] if r else GENESIS
+    def count(self) -> int:
+        return self.conn.execute("SELECT COUNT(*) c FROM audit_events").fetchone()["c"]
+    def verify(self) -> tuple[bool, Optional[int]]:
+        prev = GENESIS
+        for r in self.conn.execute("SELECT * FROM audit_events ORDER BY seq"):
+            d = self._row(r)
+            if d["prev_hash"] != prev:
+                return False, d["seq"]
+            if _hash(d) != d["hash"]:
+                return False, d["seq"]
+            prev = d["hash"]
+        return True, None
+    def head_at(self, seq: int) -> str:
+        """Recompute the chain hash up to `seq` from stored fields, chaining the RECOMPUTED prev
+        (not the stored prev_hash) so an upstream tamper propagates forward and is detectable."""
+        prev = GENESIS
+        for r in self.conn.execute("SELECT * FROM audit_events WHERE seq<=? ORDER BY seq", (seq,)):
+            d = self._row(r)
+            d["prev_hash"] = prev
+            prev = _hash(d)
+        return prev
+    def events(self, *, op=None, actor=None, op_id=None) -> list[dict]:
+        where, params = [], []
+        for col, val in (("op", op), ("actor", actor), ("op_id", op_id)):
+            if val is not None:
+                where.append(f"{col}=?"); params.append(val)
+        sql = "SELECT * FROM audit_events" + (" WHERE " + " AND ".join(where) if where else "") + " ORDER BY seq"
+        return [self._row(r) for r in self.conn.execute(sql, params)]
+    def append_read(self, instance_id, message_path) -> dict:
+        return self.append(instance_id, "read_receipt", target_path=message_path)
+    def is_read(self, instance_id, message_path) -> bool:
+        r = self.conn.execute(
+            "SELECT 1 FROM audit_events WHERE op='read_receipt' AND actor=? AND target_path=? LIMIT 1",
+            (instance_id, message_path)).fetchone()
+        return r is not None
+# ---------------------------------------------------------------- git integration
+def reconcile_from_git(store, audit: AuditLog) -> int:
+    """git-first-then-audit recovery (CAPstore OQ5): for any committed op_id with no audit event,
+    backfill one from the self-describing commit. Closes the tolerated failure (handler died after
+    the commit, before the audit append)."""
+    canon = store.canon_ref
+    refs = ([canon] if store.resolve_ref(canon) else [])
+    refs += [r.name for r in store.list_refs(PERSP_PREFIX)]
+    refs += [r.name for r in store.list_refs(PROP_PREFIX)]
+    known = {e["op_id"] for e in audit.events() if e["op_id"]}
+    backfilled = 0
+    for ref in refs:
+        for c in store.commit_ops(ref):
+            if c["op_id"] and c["op_id"] not in known:
+                audit.append(c["author"], "reconciled_commit", target_ref=ref,
+                             op_id=c["op_id"], result_oid=c["oid"], detail={"reconciled": True})
+                known.add(c["op_id"])
+                backfilled += 1
+    return backfilled
+def anchor_audit_head(store, audit: AuditLog, anchor_ref: str = ANCHOR_REF) -> dict:
+    """Write the current chain head into git — call on each canon land. Rides the refs/cap/*
+    sync refspec, so the anchor replicates to any mirror. git then witnesses the SQLite log's integrity."""
+    payload = {"seq": audit.count(), "head": audit.head()}
+    store.commit(anchor_ref, {"audit-head.json": json.dumps(payload, separators=(",", ":")).encode()},
+                 f"audit anchor @ seq {payload['seq']}", Identity("system"),
+                 expected_parent=store.resolve_ref(anchor_ref), op_id=f"anchor-{payload['head'][:12]}")
+    return payload
+def verify_against_anchor(store, audit: AuditLog, anchor_ref: str = ANCHOR_REF) -> Optional[bool]:
+    """True/False if the SQLite log still matches the git-anchored checkpoint; None if no anchor yet.
+    Recomputes the chain up to the anchored seq and compares to the git-stored head — so tampering of
+    the SQLite truth is caught by the replicated git substrate."""
+    if store.resolve_ref(anchor_ref) is None:
+        return None
+    anchor = json.loads(store.read_blob(anchor_ref, "audit-head.json").decode())
+    return audit.head_at(anchor["seq"]) == anchor["head"]