stasima 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stasima/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Stasima — a server letting multiple AI instances share one durable, version-controlled
3
+ body of knowledge, with a human practitioner as the gate to shared truth."""
4
+ __version__ = "1.0.0"
stasima/admin.py ADDED
@@ -0,0 +1,259 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """
3
+ Admin CLI — practitioner-side maintenance + promotion. NOT model-facing: these are operator ops,
4
+ and `land` (promotion to canon) IS the human gate, performed here out of band.
5
+
6
+ stasima-admin --config stasima.toml <command>
7
+
8
+ bootstrap <dir> seed an EMPTY canon from a folder of .md entries (one-time)
9
+ totp-provision generate the airlock TOTP secret (prints the otpauth:// URI)
10
+ totp-check <code> verify a code from your app (consumes nothing; diagnoses clock skew)
11
+ inbox [--all] [--read PATH] the practitioner's mail, from the cockpit (pull)
12
+ backup <dest> full backup of everything that is truth: git mirror (all refs+tags),
13
+ consistent audit snapshot, config, TOTP secret. Run it anywhere — a
14
+ synced folder, an external drive, another machine over a share.
15
+ status canon head, perspectives, proposals, audit health
16
+ reindex rebuild the MAP index from git
17
+ reconcile backfill audit events for committed ops missing one
18
+ verify check the audit chain (+ the git-anchored checkpoint)
19
+ anchor write the audit head into git now
20
+ preview <id> dry-run a proposal merge (conflicts / changed paths)
21
+ land <id> [--by X] approve + land a proposal to canon (audit + reindex + anchor)
22
+ """
23
+ import argparse
24
+ import json
25
+ import os
26
+ import subprocess as sp
27
+ import sys
28
+ import time
29
+
30
+ from .config import Config
31
+ from .entries import compose_entry
32
+ from .cap_server import components_from_config
33
+ from .canon import reindex_from_git, land_and_record, canon_seq, seq_display, LOG_DIR
34
+ from .audit_log import reconcile_from_git, anchor_audit_head, verify_against_anchor
35
+ from .local_capstore import Approval, MergeConflict, PERSP_PREFIX as PERSP, PROP_PREFIX as PROP
36
+ from .airlock import generate_secret, otpauth_uri, verify_code, totp_at, STEP
37
+
38
+
39
+ def _first_heading(text: str):
40
+ for line in text.splitlines():
41
+ if line.startswith("# "):
42
+ return line[2:].strip()
43
+ return None
44
+
45
+
46
+ def _qr_ascii(data: str):
47
+ """ASCII QR of `data`, or None if the optional qrcode package isn't installed."""
48
+ try:
49
+ import qrcode
50
+ except ImportError:
51
+ return None
52
+ import io
53
+ qr = qrcode.QRCode(border=2)
54
+ qr.add_data(data)
55
+ qr.make(fit=True)
56
+ buf = io.StringIO()
57
+ qr.print_ascii(out=buf, invert=True) # dark-terminal polarity; the URI below is the fallback
58
+ return buf.getvalue()
59
+
60
+
61
+ def run(args) -> dict:
62
+ cfg = Config.load(args.config)
63
+ store, index, embedder, audit, authz, airlock = components_from_config(cfg)
64
+
65
+ if args.cmd == "totp-provision":
66
+ issuer = cfg.deployment_name or "Stasima"
67
+ uri = lambda s: otpauth_uri(s, label=f"{issuer}:practitioner", issuer=issuer)
68
+ path = cfg.resolved_airlock_secret()
69
+ if os.path.exists(path) and not args.force:
70
+ if args.qr: # re-display the EXISTING secret's QR — no rotation
71
+ with open(path, encoding="utf-8") as f:
72
+ secret = f.read().strip()
73
+ qr = _qr_ascii(uri(secret))
74
+ print(qr if qr else "(pip install qrcode for a scannable QR)")
75
+ return {"secret_path": path, "otpauth_uri": uri(secret), "rotated": False,
76
+ "note": "existing secret re-displayed; scan the QR or enter the secret= value manually"}
77
+ raise SystemExit(f"secret already exists at {path} — pass --force to rotate "
78
+ f"(rotating invalidates the practitioner's current authenticator entry), "
79
+ f"or --qr to re-display it")
80
+ secret = generate_secret()
81
+ with open(path, "w", encoding="utf-8") as f:
82
+ f.write(secret + "\n")
83
+ if args.qr:
84
+ qr = _qr_ascii(uri(secret))
85
+ print(qr if qr else "(pip install qrcode for a scannable QR)")
86
+ return {"secret_path": path, "otpauth_uri": uri(secret),
87
+ "note": "scan the QR (or enter the secret= value manually); the secret stays "
88
+ "server-side, never in git — if the QR won't scan, terminal polarity is the "
89
+ "usual culprit; the manual key always works"}
90
+
91
+ if args.cmd == "totp-check":
92
+ # verification only — consumes no windows, approves nothing; safe to run as often as you like
93
+ spath = cfg.resolved_airlock_secret()
94
+ if not os.path.exists(spath):
95
+ raise SystemExit(f"no secret at {spath} — run totp-provision first")
96
+ with open(spath, encoding="utf-8") as f:
97
+ secret = f.read().strip()
98
+ now = time.time()
99
+ w = verify_code(secret, args.code, now)
100
+ if w is not None:
101
+ return {"valid": True, "matched_window": w, "current_window": int(now // STEP),
102
+ "note": "your authenticator and the server agree — the airlock will accept codes"}
103
+ cur = int(now // STEP)
104
+ for delta in range(-10, 11): # diagnose clock skew beyond the ±1 acceptance
105
+ if totp_at(secret, cur + delta) == str(args.code).strip():
106
+ return {"valid": False, "matched_window": cur + delta,
107
+ "skew": f"{delta:+d} windows (≈ {delta * STEP:+d}s)",
108
+ "note": "code is from the right secret but outside the ±1-window acceptance — "
109
+ "sync the server or phone clock"}
110
+ return {"valid": False,
111
+ "note": "no match within ±10 windows — mistyped code, or the app holds a different/old secret "
112
+ "(re-provision with --force and re-add to the app)"}
113
+
114
+ if args.cmd == "bootstrap":
115
+ if not os.path.isdir(os.path.join(cfg.git_dir, "objects")):
116
+ sp.run(["git", "init", "--bare", "-q", cfg.git_dir], check=True) # create the bare repo if missing
117
+ if store.resolve_ref(cfg.canon_ref) is not None:
118
+ raise SystemExit("canon already exists — add entries via propose + land, not bootstrap")
119
+ changes = {}
120
+ for root, _, files in os.walk(args.seed_dir):
121
+ for fn in sorted(files):
122
+ if not fn.endswith(".md"):
123
+ continue
124
+ rel = os.path.relpath(os.path.join(root, fn), args.seed_dir).replace(os.sep, "/")
125
+ with open(os.path.join(root, fn), encoding="utf-8") as f:
126
+ text = f.read()
127
+ if not text.lstrip().startswith("---"): # plain markdown -> wrap with a sensible envelope
128
+ title = _first_heading(text) or os.path.splitext(fn)[0].replace("-", " ").title()
129
+ etype = "ori" if rel.startswith("technical/orientation/") else "kno"
130
+ text = compose_entry({"type": etype, "title": title, "status": "active"}, text)
131
+ changes[rel] = text.encode()
132
+ if not changes:
133
+ raise SystemExit(f"no .md files found under {args.seed_dir!r}")
134
+ r = store.bootstrap_canon(changes, "Bootstrap canon")
135
+ return {"bootstrapped": r.oid, "entries": sorted(changes), "indexed": reindex_from_git(store, index, embedder)}
136
+
137
+ if args.cmd == "status":
138
+ ok, bad = audit.verify()
139
+ unread = [m for m in index.inbox("practitioner") if not audit.is_read("practitioner", m.path)]
140
+ return {"canon_head": store.resolve_ref(cfg.canon_ref),
141
+ "canon_seq": seq_display(canon_seq(store, cfg.seq_origin)),
142
+ "perspectives": [r.name[len(PERSP):] for r in store.list_refs(PERSP)],
143
+ "proposals": [r.name[len(PROP):] for r in store.list_refs(PROP)],
144
+ "staged": airlock.staged(),
145
+ "practitioner_unread": len(unread),
146
+ "audit_events": audit.count(), "audit_verify_ok": ok,
147
+ "audit_vs_anchor": verify_against_anchor(store, audit)}
148
+
149
+ if args.cmd == "inbox":
150
+ if args.read:
151
+ audit.append_read("practitioner", args.read)
152
+ return {"marked_read": args.read}
153
+ msgs = index.inbox("practitioner")
154
+ if not args.all:
155
+ msgs = [m for m in msgs if not audit.is_read("practitioner", m.path)]
156
+ return {"unread" if not args.all else "all":
157
+ [{"path": m.path, "from": m.authoring_instance, "subject": m.subject,
158
+ "coordinates": m.links} for m in msgs],
159
+ "note": "read a message body with: kip_get equivalent -> git show <perspective>:<path>; "
160
+ "mark handled with: inbox --read <path>"}
161
+
162
+ if args.cmd == "backup":
163
+ # everything that is TRUTH, in one destination: full-ref git mirror (consistent by nature),
164
+ # a consistent audit snapshot (sqlite backup API, safe against a live server), config + secret.
165
+ # The map index is a derived cache and is deliberately not backed up.
166
+ import shutil
167
+ import sqlite3 as _sq
168
+ os.makedirs(args.dest, exist_ok=True)
169
+ mirror = os.path.join(args.dest, "stasima-mirror.git")
170
+ if not os.path.isdir(os.path.join(mirror, "objects")):
171
+ sp.run(["git", "init", "--bare", "-q", mirror], check=True)
172
+ store.set_remote("backup", mirror)
173
+ sync = store.push_all("backup")
174
+ audit_copy = os.path.join(args.dest, "audit.sqlite")
175
+ dst = _sq.connect(audit_copy)
176
+ audit.conn.backup(dst)
177
+ dst.close()
178
+ copied = ["stasima-mirror.git", "audit.sqlite"]
179
+ for src in (args.config, cfg.resolved_airlock_secret()):
180
+ if src and os.path.exists(src):
181
+ shutil.copy2(src, args.dest)
182
+ copied.append(os.path.basename(src))
183
+ ok = not sync["missing_on_remote"] and not sync["oid_mismatch"]
184
+ return {"dest": args.dest, "git_sync_ok": ok, "synced_refs": len(sync["synced"]),
185
+ "audit_events": audit.count(), "copied": copied}
186
+
187
+ if args.cmd == "reindex":
188
+ return {"reindexed": reindex_from_git(store, index, embedder)}
189
+
190
+ if args.cmd == "reconcile":
191
+ return {"backfilled": reconcile_from_git(store, audit)}
192
+
193
+ if args.cmd == "verify":
194
+ ok, bad = audit.verify()
195
+ return {"audit_verify_ok": ok, "first_bad_seq": bad,
196
+ "audit_vs_anchor": verify_against_anchor(store, audit)}
197
+
198
+ if args.cmd == "anchor":
199
+ return {"anchored": anchor_audit_head(store, audit)}
200
+
201
+ if args.cmd == "preview":
202
+ s = store.preview_merge(PROP + args.proposal_id, cfg.canon_ref)
203
+ logs = [p for p in s.changed_paths if p.startswith(LOG_DIR)]
204
+ return {"conflicts": s.conflicts, "changed_paths": s.changed_paths, "authors": s.authoring_instances,
205
+ "log_entries": logs, "log_entry_ok": len(logs) == 1,
206
+ "expected_seq": format(canon_seq(store, cfg.seq_origin) + 1, "x")}
207
+
208
+ if args.cmd == "land":
209
+ approver = args.by or sorted(cfg.approvers)[0]
210
+ if approver not in cfg.approvers:
211
+ raise SystemExit(f"{approver!r} is not a configured approver ({sorted(cfg.approvers)})")
212
+ try:
213
+ prepared = store.prepare_merge(PROP + args.proposal_id, cfg.canon_ref)
214
+ except MergeConflict as e:
215
+ raise SystemExit(f"conflict — not landing: {e}")
216
+ try:
217
+ return land_and_record(store, index, embedder, audit, prepared,
218
+ Approval(prepared.candidate_oid, approver, "cli-confirm"),
219
+ origin=cfg.seq_origin)
220
+ except ValueError as e:
221
+ raise SystemExit(f"not landing: {e}")
222
+
223
+ raise SystemExit(f"unknown command {args.cmd!r}")
224
+
225
+
226
+ def build_parser() -> argparse.ArgumentParser:
227
+ ap = argparse.ArgumentParser(prog="stasima-admin", description="Stasima maintenance + promotion")
228
+ ap.add_argument("--config", default=os.environ.get("STASIMA_CONFIG"))
229
+ sub = ap.add_subparsers(dest="cmd", required=True)
230
+ for c in ("status", "reindex", "reconcile", "verify", "anchor"):
231
+ sub.add_parser(c)
232
+ sub.add_parser("bootstrap").add_argument("seed_dir", help="folder of .md entries to seed an empty canon")
233
+ tp = sub.add_parser("totp-provision")
234
+ tp.add_argument("--force", action="store_true", help="rotate an existing secret")
235
+ tp.add_argument("--qr", action="store_true", help="render a scannable ASCII QR (re-displays if the secret exists)")
236
+ sub.add_parser("totp-check").add_argument("code", help="a code from your authenticator app")
237
+ sub.add_parser("backup").add_argument("dest", help="destination folder for the full backup")
238
+ ib = sub.add_parser("inbox")
239
+ ib.add_argument("--all", action="store_true", help="include already-read messages")
240
+ ib.add_argument("--read", default=None, metavar="PATH", help="mark a message path as read")
241
+ sub.add_parser("preview").add_argument("proposal_id")
242
+ land = sub.add_parser("land")
243
+ land.add_argument("proposal_id")
244
+ land.add_argument("--by", default=None, help="approver (defaults to the first configured)")
245
+ return ap
246
+
247
+
248
+ def main(argv=None) -> dict:
249
+ try: # Windows consoles default to cp1252, which can't print the QR block chars (or em dashes)
250
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
251
+ except Exception:
252
+ pass
253
+ result = run(build_parser().parse_args(argv))
254
+ print(json.dumps(result, indent=2, default=str))
255
+ return result
256
+
257
+
258
+ if __name__ == "__main__":
259
+ main()
stasima/airlock.py ADDED
@@ -0,0 +1,217 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """
3
+ Airlock — TOTP two-phase remote approval (the mediated-channel counterpart of console `land`).
4
+
5
+ At the console, the console itself is the out-of-band channel, so admin `land` stays a single
6
+ approval. When the practitioner approves *through an instance conversation* (phone, relay), the
7
+ channel is the thing to defend against: the airlock binds presence-proofs (TOTP codes) to the
8
+ existing prepare/land two-phase gate so no single code — harvested, relayed, or replayed — can
9
+ both stage and land, and nothing can land unreviewed.
10
+
11
+ open --code 1--> staged --(review: floor..ceiling)--code 2--> landed
12
+ ^ |
13
+ +---- revert ----+ (abort: FREE, no code; TTL expiry: lazy auto-revert)
14
+
15
+ Why the 120s floor: a TOTP code lives at most ~90s (30s step, +/-1 window acceptance), so any code
16
+ visible at staging time is arithmetically dead by the earliest legal landing moment. Strict window
17
+ ordering (code 2 strictly later than code 1) and consume-once (a window number is never accepted
18
+ twice for one purpose) close the same-window and replay paths as defense in depth. Content-binding
19
+ (landing names the staged oid) means what lands is exactly what was staged — a swap fails closed.
20
+
21
+ Abort is free by design: charging presence-proof to *decline* would incentivize landing.
22
+
23
+ Honest residual: the practitioner's view of what was staged flows through the relaying instance.
24
+ Content-binding makes swap-after-stage impossible and the audit trail makes deception detectable
25
+ after the fact; it does NOT make the relay's display trustworthy in the moment. The console remains
26
+ the stronger channel.
27
+
28
+ State (open | staged | landed, with revert folding back to open) is derived from the audit log —
29
+ staging is operational, not content; nothing here touches the storage spine. The clock is
30
+ injectable for tests; server time is authoritative for every gate.
31
+ """
32
+ import base64
33
+ import hashlib
34
+ import hmac
35
+ import os
36
+ import struct
37
+ import time
38
+
39
+ from .local_capstore import MergePreparation, MergeSummary, Approval, PROP_PREFIX as PROP
40
+
41
+ STEP = 30 # RFC 6238 time step (seconds)
42
+ DIGITS = 6
43
+
44
+
45
+ class AirlockError(Exception):
46
+ """A gate refused. The message names the failed gate and both values where applicable."""
47
+
48
+
49
+ # ---------------------------------------------------------------- TOTP (RFC 6238, stdlib only)
50
+ def generate_secret() -> str:
51
+ return base64.b32encode(os.urandom(20)).decode()
52
+
53
+
54
+ def otpauth_uri(secret: str, label: str = "Stasima:practitioner", issuer: str = "Stasima") -> str:
55
+ return (f"otpauth://totp/{label}?secret={secret}&issuer={issuer}"
56
+ f"&algorithm=SHA1&digits={DIGITS}&period={STEP}")
57
+
58
+
59
+ def totp_at(secret: str, window: int) -> str:
60
+ """The code for an absolute window number (window = unix_time // STEP)."""
61
+ key = base64.b32decode(secret.strip(), casefold=True)
62
+ mac = hmac.new(key, struct.pack(">Q", window), hashlib.sha1).digest()
63
+ off = mac[-1] & 0x0F
64
+ code = (struct.unpack(">I", mac[off:off + 4])[0] & 0x7FFFFFFF) % (10 ** DIGITS)
65
+ return f"{code:0{DIGITS}d}"
66
+
67
+
68
+ def verify_code(secret: str, code: str, now: float):
69
+ """Accept the current window +/-1 (clock skew). Returns the MATCHED window number, else None."""
70
+ w = int(now // STEP)
71
+ for cand in (w, w - 1, w + 1):
72
+ if hmac.compare_digest(totp_at(secret, cand), str(code).strip()):
73
+ return cand
74
+ return None
75
+
76
+
77
+ # ---------------------------------------------------------------- the gate
78
+ class Airlock:
79
+ def __init__(self, store, audit, *, secret_path, land_fn, validate_fn, approver,
80
+ floor_s: int = 120, ceiling_s: int = 7200, clock=time.time, prop_prefix: str = PROP):
81
+ # floor must exceed worst-case code lifetime (STEP + one window of skew acceptance ~= 90s)
82
+ # so that no code obtained at staging survives to the landing moment.
83
+ self.store = store
84
+ self.audit = audit
85
+ self.secret_path = secret_path
86
+ self.land_fn = land_fn # (prepared, approval) -> land_and_record result
87
+ self.validate_fn = validate_fn # (prepared) -> log-entry seq (early feedback; land re-validates)
88
+ self.approver = approver
89
+ self.floor_s = floor_s
90
+ self.ceiling_s = ceiling_s
91
+ self.clock = clock
92
+ self.prop_prefix = prop_prefix
93
+
94
+ # ---- secret + code consumption ----
95
+ def _secret(self) -> str:
96
+ if not os.path.exists(self.secret_path):
97
+ raise AirlockError("airlock not provisioned — run: admin totp-provision")
98
+ with open(self.secret_path, encoding="utf-8") as f:
99
+ return f.read().strip()
100
+
101
+ def _consume(self, code, purpose, obj, min_window=None) -> int:
102
+ """Verify a code and burn its window for this purpose. Codes are never logged — windows are."""
103
+ w = verify_code(self._secret(), code, self.clock())
104
+ if w is None:
105
+ self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
106
+ "reason": "invalid"})
107
+ raise AirlockError("invalid code")
108
+ if min_window is not None and w <= min_window:
109
+ self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
110
+ "window": w, "min_window": min_window,
111
+ "reason": "not strictly later"})
112
+ raise AirlockError(f"code is from window {w}, which is not strictly later than the staging "
113
+ f"window {min_window} — wait for a fresh code")
114
+ for e in self.audit.events(op="totp_accept"):
115
+ if e["detail"].get("window") == w and e["detail"].get("purpose") == purpose:
116
+ self.audit.append("system", "totp_reject", detail={"purpose": purpose, "object": obj,
117
+ "window": w, "reason": "replay"})
118
+ raise AirlockError(f"a code from window {w} was already used for {purpose} (consume-once)")
119
+ self.audit.append("practitioner", "totp_accept", detail={"window": w, "purpose": purpose, "object": obj})
120
+ return w
121
+
122
+ # ---- state machine (derived from the audit log; TTL is lazy) ----
123
+ def _fold(self, proposal_id):
124
+ cur, ev = "open", None
125
+ full_ref = self.prop_prefix + proposal_id
126
+ for e in self.audit.events():
127
+ d = e["detail"]
128
+ if e["op"] == "airlock_stage" and d.get("proposal") == proposal_id:
129
+ cur, ev = "staged", e
130
+ elif e["op"] == "airlock_revert" and d.get("proposal") == proposal_id:
131
+ cur, ev = "open", e
132
+ elif e["op"] == "land_merge" and d.get("proposal") == full_ref:
133
+ cur, ev = "landed", e
134
+ return cur, ev
135
+
136
+ def state(self, proposal_id) -> dict:
137
+ st, e = self._fold(proposal_id)
138
+ if st == "staged":
139
+ d = e["detail"]
140
+ if self.clock() - d["staged_at"] > self.ceiling_s: # lazy TTL: observed -> reverted
141
+ self.audit.append("system", "airlock_revert", target_ref=self.prop_prefix + proposal_id,
142
+ detail={"proposal": proposal_id, "reason": "ttl",
143
+ "staged_oid": d["staged_oid"]})
144
+ return {"state": "open", "reverted": "ttl"}
145
+ return {"state": "staged", "staged_oid": d["staged_oid"], "staged_at": d["staged_at"],
146
+ "window": d["window"], "changed_paths": d.get("changed_paths", []),
147
+ "lands_after": d["staged_at"] + self.floor_s,
148
+ "expires_at": d["staged_at"] + self.ceiling_s}
149
+ return {"state": st}
150
+
151
+ def staged(self) -> list:
152
+ """Currently staged proposals (cockpit view)."""
153
+ return [{"proposal_id": pid, "staged_oid": st["staged_oid"],
154
+ "lands_after": st["lands_after"], "expires_at": st["expires_at"]}
155
+ for pid, st in self._staged_proposals()]
156
+
157
+ def _staged_proposals(self):
158
+ seen, out = set(), []
159
+ for e in self.audit.events(op="airlock_stage"):
160
+ pid = e["detail"]["proposal"]
161
+ if pid in seen:
162
+ continue
163
+ seen.add(pid)
164
+ st = self.state(pid)
165
+ if st["state"] == "staged":
166
+ out.append((pid, st))
167
+ return out
168
+
169
+ # ---- the three ops ----
170
+ def stage(self, proposal_id, code) -> dict:
171
+ """Code 1: freeze the proposal, prepare the merge, start the review clock."""
172
+ if self.state(proposal_id)["state"] == "staged":
173
+ raise AirlockError(f"{proposal_id} is already staged")
174
+ # prove the proposal stageable BEFORE consuming the code — failures must not burn windows
175
+ prepared = self.store.prepare_merge(self.prop_prefix + proposal_id, self.store.canon_ref)
176
+ seq = self.validate_fn(prepared)
177
+ w = self._consume(code, "stage", proposal_id)
178
+ staged_at = self.clock()
179
+ self.audit.append("practitioner", "airlock_stage", target_ref=self.prop_prefix + proposal_id,
180
+ result_oid=prepared.candidate_oid,
181
+ detail={"proposal": proposal_id, "staged_oid": prepared.candidate_oid,
182
+ "window": w, "staged_at": staged_at, "seq": seq,
183
+ "changed_paths": prepared.summary.changed_paths})
184
+ return {"proposal_id": proposal_id, "staged_oid": prepared.candidate_oid,
185
+ "staged_at": staged_at, "lands_after": staged_at + self.floor_s,
186
+ "expires_at": staged_at + self.ceiling_s,
187
+ "changed_paths": prepared.summary.changed_paths, "log_seq": seq}
188
+
189
+ def land(self, staged_oid_prefix, code) -> dict:
190
+ """Code 2: after the review floor, strictly later window, bound to the staged content."""
191
+ if len(str(staged_oid_prefix)) < 8:
192
+ raise AirlockError("staged oid prefix too short — give at least 8 hex characters")
193
+ matches = [(pid, st) for pid, st in self._staged_proposals()
194
+ if st["staged_oid"].startswith(staged_oid_prefix)]
195
+ if not matches:
196
+ raise AirlockError(f"no staged proposal matches oid prefix {staged_oid_prefix!r} (content-binding)")
197
+ if len(matches) > 1:
198
+ raise AirlockError(f"oid prefix {staged_oid_prefix!r} is ambiguous across staged proposals")
199
+ pid, st = matches[0]
200
+ elapsed = self.clock() - st["staged_at"]
201
+ if elapsed < self.floor_s: # gates before code verification — a floor miss must not burn a window
202
+ raise AirlockError(f"review floor not met: {elapsed:.0f}s since staging, floor is "
203
+ f"{self.floor_s}s — review, then retry with a fresh code")
204
+ w2 = self._consume(code, "land", pid, min_window=st["window"])
205
+ prepared = MergePreparation(candidate_oid=st["staged_oid"], into=self.store.canon_ref,
206
+ proposal_ref=self.prop_prefix + pid,
207
+ summary=MergeSummary(st.get("changed_paths", []), [], []))
208
+ return self.land_fn(prepared, Approval(st["staged_oid"], self.approver, f"airlock-totp-w{w2}"))
209
+
210
+ def revert(self, proposal_id) -> dict:
211
+ """Abort a staged review. FREE — never requires a code (charging for decline would
212
+ incentivize landing). Returns the proposal to open with its entries intact."""
213
+ if self.state(proposal_id)["state"] != "staged":
214
+ raise AirlockError(f"{proposal_id} is not staged")
215
+ self.audit.append("system", "airlock_revert", target_ref=self.prop_prefix + proposal_id,
216
+ detail={"proposal": proposal_id, "reason": "manual"})
217
+ return {"proposal_id": proposal_id, "state": "open"}
stasima/audit_log.py ADDED
@@ -0,0 +1,179 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """
3
+ Audit log — the operation-layer TRUTH, complementary to git's content-layer truth.
4
+
5
+ Records what git can't: ops that produce no commit (read-receipts, denials), the order/timing
6
+ of operations, and outcomes (ok/error). Append-only, hash-chained. SQLite is its source of truth
7
+ (git stays source of truth for the information itself).
8
+
9
+ Scope: writes (state changes) and failures (what's breaking). Successful reads are observability,
10
+ not logged; read-state IS logged (a read-receipt is a write-like, forensic event).
11
+
12
+ The hash chain is tamper-EVIDENCE at this threat model (cooperative, single practitioner, no crypto):
13
+ it detects accidental corruption, deletion, and reordering, and yields one head hash summarizing the
14
+ whole history. It is not forgery-proof (no signature to forge) — signing the head is the additive
15
+ upgrade. Per-canon-land, the head is anchored into git (replicated, durable), so the git substrate
16
+ can witness tampering of the SQLite log.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import json
22
+ import sqlite3
23
+ from abc import ABC, abstractmethod
24
+ from datetime import datetime, timezone
25
+ from typing import Optional
26
+
27
+ from .local_capstore import Identity, PERSP_PREFIX, PROP_PREFIX
28
+
29
+ GENESIS = "0" * 64
30
+ ANCHOR_REF = "refs/cap/audit-anchor"
31
+
32
+ _HASHED = ["seq", "ts", "actor", "op", "target_ref", "target_path",
33
+ "op_id", "result_oid", "outcome", "detail", "prev_hash"]
34
+
35
+
36
+ def _canonical(ev: dict) -> str:
37
+ return json.dumps({k: ev.get(k) for k in _HASHED}, sort_keys=True, default=str, separators=(",", ":"))
38
+
39
+
40
+ def _hash(ev: dict) -> str:
41
+ return hashlib.sha256(_canonical(ev).encode()).hexdigest()
42
+
43
+
44
+ class AuditLog(ABC):
45
+ @abstractmethod
46
+ def append(self, actor: str, op: str, *, target_ref=None, target_path=None,
47
+ op_id=None, result_oid=None, outcome="ok", detail=None) -> dict: ...
48
+ @abstractmethod
49
+ def head(self) -> str: ...
50
+ @abstractmethod
51
+ def count(self) -> int: ...
52
+ @abstractmethod
53
+ def verify(self) -> tuple[bool, Optional[int]]: ...
54
+ @abstractmethod
55
+ def head_at(self, seq: int) -> str: ...
56
+ @abstractmethod
57
+ def events(self, *, op=None, actor=None, op_id=None) -> list[dict]: ...
58
+ @abstractmethod
59
+ def append_read(self, instance_id: str, message_path: str) -> dict: ...
60
+ @abstractmethod
61
+ def is_read(self, instance_id: str, message_path: str) -> bool: ...
62
+
63
+
64
+ class SqliteAuditLog(AuditLog):
65
+ def __init__(self, db_path: str = ":memory:"):
66
+ self.conn = sqlite3.connect(db_path)
67
+ self.conn.row_factory = sqlite3.Row
68
+ self.conn.execute(
69
+ """CREATE TABLE IF NOT EXISTS audit_events (
70
+ seq INTEGER PRIMARY KEY, ts TEXT, actor TEXT, op TEXT,
71
+ target_ref TEXT, target_path TEXT, op_id TEXT, result_oid TEXT,
72
+ outcome TEXT, detail TEXT, prev_hash TEXT, hash TEXT)""")
73
+ self.conn.commit()
74
+
75
+ def _row(self, r: sqlite3.Row) -> dict:
76
+ d = dict(r)
77
+ d["detail"] = json.loads(d["detail"]) if d["detail"] else {}
78
+ return d
79
+
80
+ def append(self, actor, op, *, target_ref=None, target_path=None,
81
+ op_id=None, result_oid=None, outcome="ok", detail=None) -> dict:
82
+ ev = {"seq": self.count() + 1,
83
+ "ts": datetime.now(timezone.utc).isoformat(),
84
+ "actor": actor, "op": op, "target_ref": target_ref, "target_path": target_path,
85
+ "op_id": op_id, "result_oid": result_oid, "outcome": outcome,
86
+ "detail": detail or {}, "prev_hash": self.head()}
87
+ ev["hash"] = _hash(ev)
88
+ self.conn.execute(
89
+ "INSERT INTO audit_events VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
90
+ (ev["seq"], ev["ts"], ev["actor"], ev["op"], ev["target_ref"], ev["target_path"],
91
+ ev["op_id"], ev["result_oid"], ev["outcome"], json.dumps(ev["detail"]), ev["prev_hash"], ev["hash"]))
92
+ self.conn.commit()
93
+ return ev
94
+
95
+ def head(self) -> str:
96
+ r = self.conn.execute("SELECT hash FROM audit_events ORDER BY seq DESC LIMIT 1").fetchone()
97
+ return r["hash"] if r else GENESIS
98
+
99
+ def count(self) -> int:
100
+ return self.conn.execute("SELECT COUNT(*) c FROM audit_events").fetchone()["c"]
101
+
102
+ def verify(self) -> tuple[bool, Optional[int]]:
103
+ prev = GENESIS
104
+ for r in self.conn.execute("SELECT * FROM audit_events ORDER BY seq"):
105
+ d = self._row(r)
106
+ if d["prev_hash"] != prev:
107
+ return False, d["seq"]
108
+ if _hash(d) != d["hash"]:
109
+ return False, d["seq"]
110
+ prev = d["hash"]
111
+ return True, None
112
+
113
+ def head_at(self, seq: int) -> str:
114
+ """Recompute the chain hash up to `seq` from stored fields, chaining the RECOMPUTED prev
115
+ (not the stored prev_hash) so an upstream tamper propagates forward and is detectable."""
116
+ prev = GENESIS
117
+ for r in self.conn.execute("SELECT * FROM audit_events WHERE seq<=? ORDER BY seq", (seq,)):
118
+ d = self._row(r)
119
+ d["prev_hash"] = prev
120
+ prev = _hash(d)
121
+ return prev
122
+
123
+ def events(self, *, op=None, actor=None, op_id=None) -> list[dict]:
124
+ where, params = [], []
125
+ for col, val in (("op", op), ("actor", actor), ("op_id", op_id)):
126
+ if val is not None:
127
+ where.append(f"{col}=?"); params.append(val)
128
+ sql = "SELECT * FROM audit_events" + (" WHERE " + " AND ".join(where) if where else "") + " ORDER BY seq"
129
+ return [self._row(r) for r in self.conn.execute(sql, params)]
130
+
131
+ def append_read(self, instance_id, message_path) -> dict:
132
+ return self.append(instance_id, "read_receipt", target_path=message_path)
133
+
134
+ def is_read(self, instance_id, message_path) -> bool:
135
+ r = self.conn.execute(
136
+ "SELECT 1 FROM audit_events WHERE op='read_receipt' AND actor=? AND target_path=? LIMIT 1",
137
+ (instance_id, message_path)).fetchone()
138
+ return r is not None
139
+
140
+
141
+ # ---------------------------------------------------------------- git integration
142
+ def reconcile_from_git(store, audit: AuditLog) -> int:
143
+ """git-first-then-audit recovery (CAPstore OQ5): for any committed op_id with no audit event,
144
+ backfill one from the self-describing commit. Closes the tolerated failure (handler died after
145
+ the commit, before the audit append)."""
146
+ canon = store.canon_ref
147
+ refs = ([canon] if store.resolve_ref(canon) else [])
148
+ refs += [r.name for r in store.list_refs(PERSP_PREFIX)]
149
+ refs += [r.name for r in store.list_refs(PROP_PREFIX)]
150
+ known = {e["op_id"] for e in audit.events() if e["op_id"]}
151
+ backfilled = 0
152
+ for ref in refs:
153
+ for c in store.commit_ops(ref):
154
+ if c["op_id"] and c["op_id"] not in known:
155
+ audit.append(c["author"], "reconciled_commit", target_ref=ref,
156
+ op_id=c["op_id"], result_oid=c["oid"], detail={"reconciled": True})
157
+ known.add(c["op_id"])
158
+ backfilled += 1
159
+ return backfilled
160
+
161
+
162
+ def anchor_audit_head(store, audit: AuditLog, anchor_ref: str = ANCHOR_REF) -> dict:
163
+ """Write the current chain head into git — call on each canon land. Rides the refs/cap/*
164
+ sync refspec, so the anchor replicates to any mirror. git then witnesses the SQLite log's integrity."""
165
+ payload = {"seq": audit.count(), "head": audit.head()}
166
+ store.commit(anchor_ref, {"audit-head.json": json.dumps(payload, separators=(",", ":")).encode()},
167
+ f"audit anchor @ seq {payload['seq']}", Identity("system"),
168
+ expected_parent=store.resolve_ref(anchor_ref), op_id=f"anchor-{payload['head'][:12]}")
169
+ return payload
170
+
171
+
172
+ def verify_against_anchor(store, audit: AuditLog, anchor_ref: str = ANCHOR_REF) -> Optional[bool]:
173
+ """True/False if the SQLite log still matches the git-anchored checkpoint; None if no anchor yet.
174
+ Recomputes the chain up to the anchored seq and compares to the git-stored head — so tampering of
175
+ the SQLite truth is caught by the replicated git substrate."""
176
+ if store.resolve_ref(anchor_ref) is None:
177
+ return None
178
+ anchor = json.loads(store.read_blob(anchor_ref, "audit-head.json").decode())
179
+ return audit.head_at(anchor["seq"]) == anchor["head"]