tracefork 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ """Spike 0 orchestration: record -> persist -> load -> replay -> verify.
2
+
3
+ Answers exactly one question: can we record a tool-using Anthropic-SDK agent run and
4
+ replay it bit-exact, with proof, for $0 and no network — within a declared
5
+ determinism boundary? The boundary here: a single-process, synchronous agent whose
6
+ only nondeterminism sources are clock and id generation, both routed through the
7
+ NondetSource seam.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import tempfile
14
+
15
+ from .agent import make_client, run_agent
16
+ from .fake_llm import FakeAnthropicTransport
17
+ from .nondet import DivergenceError, DriftingNondet, RecordingNondet, ReplayNondet
18
+ from .tape import Tape
19
+ from .transport import TraceforkTransport
20
+
21
+
22
+ def record_replay_verify(tape_path: str | None = None) -> dict:
23
+ """Run the full spike and return a structured result dict (used by the CLI and tests)."""
24
+ cleanup = False
25
+ if tape_path is None:
26
+ fd, tape_path = tempfile.mkstemp(suffix=".tape.sqlite")
27
+ os.close(fd)
28
+ cleanup = True
29
+
30
+ try:
31
+ # 1. RECORD — real agent + SDK, fake (offline) endpoint, genuine nondeterminism.
32
+ rec_tape = Tape()
33
+ rec_nondet = RecordingNondet()
34
+ rec_transport = TraceforkTransport("record", rec_tape, inner=FakeAnthropicTransport())
35
+ rec_result = run_agent(make_client(rec_transport), rec_nondet)
36
+ rec_tape.draws = rec_nondet.draws
37
+ record_fingerprint = rec_tape.digest()
38
+
39
+ # 2. PERSIST + RELOAD — prove the content-addressed tape round-trips through disk.
40
+ rec_tape.save(tape_path)
41
+ loaded = Tape.load(tape_path)
42
+ assert loaded.digest() == record_fingerprint, "tape changed across save/load"
43
+
44
+ # 3. REPLAY — no network: replay transport has no inner; nondeterminism virtualized.
45
+ rep_nondet = ReplayNondet(loaded.draws)
46
+ rep_transport = TraceforkTransport("replay", loaded) # inner=None -> any real call errors
47
+ rep_result = run_agent(make_client(rep_transport), rep_nondet)
48
+ replay_fingerprint = loaded.digest()
49
+
50
+ # 4. VERIFY — observable output identical, every request hash matched,
51
+ # all recorded draws consumed, no leftover exchanges.
52
+ checks = {
53
+ "output_identical": rep_result == rec_result,
54
+ "fingerprint_match": replay_fingerprint == record_fingerprint,
55
+ "all_request_hashes_matched": rep_transport.matched == len(loaded.exchanges),
56
+ "all_exchanges_consumed": rep_transport.fully_consumed(),
57
+ "all_draws_consumed": rep_nondet.fully_consumed(),
58
+ }
59
+
60
+ # 5. NEGATIVE CONTROL — replay with drifting (fresh) nondeterminism MUST diverge.
61
+ drift_detected = False
62
+ drift_at: str | None = None
63
+ try:
64
+ run_agent(make_client(TraceforkTransport("replay", loaded)), DriftingNondet())
65
+ except DivergenceError as e:
66
+ drift_detected = True
67
+ drift_at = str(e)
68
+ checks["negative_control_detected_drift"] = drift_detected
69
+
70
+ return {
71
+ "exchanges": len(loaded.exchanges),
72
+ "draws": len(loaded.draws),
73
+ "request_hashes_matched": rep_transport.matched,
74
+ "record_fingerprint": record_fingerprint,
75
+ "replay_fingerprint": replay_fingerprint,
76
+ "final_text": rec_result["final_text"],
77
+ "checks": checks,
78
+ "drift_at": drift_at,
79
+ "passed": all(checks.values()),
80
+ }
81
+ finally:
82
+ if cleanup and os.path.exists(tape_path):
83
+ os.remove(tape_path)
84
+
85
+
86
+ def _fmt(result: dict) -> str:
87
+ c = result["checks"]
88
+ ok = "PASS" if result["passed"] else "FAIL"
89
+ lines = [
90
+ "",
91
+ " tracefork — Spike 0: bit-exact record/replay",
92
+ " " + "-" * 52,
93
+ f" recorded exchanges ........ {result['exchanges']}",
94
+ f" nondeterminism draws ...... {result['draws']} (clock + id, virtualized)",
95
+ f" request hashes matched .... {result['request_hashes_matched']}/{result['exchanges']}",
96
+ f" tape fingerprint .......... {result['record_fingerprint'][:24]}…",
97
+ f" replay fingerprint ........ {result['replay_fingerprint'][:24]}…",
98
+ " network calls / spend ..... 0 / $0.00",
99
+ f" agent final answer ........ {result['final_text']!r}",
100
+ "",
101
+ f" [{'x' if c['output_identical'] else ' '}] "
102
+ "replayed trajectory byte-identical to recorded",
103
+ f" [{'x' if c['fingerprint_match'] else ' '}] "
104
+ "tape fingerprint matches after save/load round-trip",
105
+ f" [{'x' if c['all_request_hashes_matched'] else ' '}] "
106
+ "every replayed request hash matched the tape",
107
+ f" [{'x' if c['all_draws_consumed'] else ' '}] "
108
+ "every recorded nondeterminism draw consumed",
109
+ f" [{'x' if c['negative_control_detected_drift'] else ' '}] "
110
+ "negative control: drift was DETECTED, not silently passed",
111
+ "",
112
+ f" RESULT: {ok}",
113
+ "",
114
+ ]
115
+ return "\n".join(lines)
116
+
117
+
118
+ def main() -> int:
119
+ result = record_replay_verify()
120
+ print(_fmt(result))
121
+ return 0 if result["passed"] else 1
122
+
123
+
124
+ if __name__ == "__main__":
125
+ raise SystemExit(main())
@@ -0,0 +1,79 @@
1
+ """Content-addressed, persistable tape.
2
+
3
+ A tape is the recorded artifact of one agent run: the ordered HTTP exchanges
4
+ (request body + response body) plus the ordered nondeterminism draws. Response and
5
+ request bodies are stored content-addressed (keyed by sha256), so identical bytes are
6
+ stored once; an ordered event log preserves sequence. `digest()` is a hash chain over
7
+ the whole tape — the single fingerprint reported in the receipt.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import sqlite3
14
+ from dataclasses import dataclass, field
15
+
16
+
17
+ def sha256_hex(data: bytes) -> str:
18
+ return hashlib.sha256(data).hexdigest()
19
+
20
+
21
+ @dataclass
22
+ class Tape:
23
+ exchanges: list[tuple[bytes, bytes]] = field(default_factory=list)
24
+ draws: list[tuple[str, str]] = field(default_factory=list)
25
+
26
+ def append_exchange(self, request_body: bytes, response_body: bytes) -> None:
27
+ self.exchanges.append((request_body, response_body))
28
+
29
+ def exchange(self, i: int) -> tuple[bytes, bytes]:
30
+ return self.exchanges[i]
31
+
32
+ def digest(self) -> str:
33
+ """sha256 hash chain over draws then exchanges — the tape fingerprint."""
34
+ h = hashlib.sha256()
35
+ for kind, value in self.draws:
36
+ h.update(b"D:" + kind.encode() + b":" + value.encode() + b"\n")
37
+ for req, resp in self.exchanges:
38
+ h.update(b"X:" + sha256_hex(req).encode() + b":" + sha256_hex(resp).encode() + b"\n")
39
+ return h.hexdigest()
40
+
41
+ # --- persistence: content-addressed blobs + ordered event log ---------------
42
+
43
+ def save(self, path: str) -> None:
44
+ con = sqlite3.connect(path)
45
+ try:
46
+ con.executescript(
47
+ """
48
+ DROP TABLE IF EXISTS blobs;
49
+ DROP TABLE IF EXISTS events;
50
+ CREATE TABLE blobs (hash TEXT PRIMARY KEY, data BLOB NOT NULL);
51
+ CREATE TABLE events (seq INTEGER PRIMARY KEY AUTOINCREMENT,
52
+ kind TEXT NOT NULL, a TEXT NOT NULL, b TEXT NOT NULL);
53
+ """
54
+ )
55
+ for kind, value in self.draws:
56
+ con.execute("INSERT INTO events (kind, a, b) VALUES ('draw', ?, ?)", (kind, value))
57
+ for req, resp in self.exchanges:
58
+ rh, sh = sha256_hex(req), sha256_hex(resp)
59
+ con.execute("INSERT OR IGNORE INTO blobs (hash, data) VALUES (?, ?)", (rh, req))
60
+ con.execute("INSERT OR IGNORE INTO blobs (hash, data) VALUES (?, ?)", (sh, resp))
61
+ con.execute("INSERT INTO events (kind, a, b) VALUES ('exchange', ?, ?)", (rh, sh))
62
+ con.commit()
63
+ finally:
64
+ con.close()
65
+
66
+ @classmethod
67
+ def load(cls, path: str) -> Tape:
68
+ con = sqlite3.connect(path)
69
+ try:
70
+ blobs = dict(con.execute("SELECT hash, data FROM blobs").fetchall())
71
+ tape = cls()
72
+ for kind, a, b in con.execute("SELECT kind, a, b FROM events ORDER BY seq").fetchall():
73
+ if kind == "draw":
74
+ tape.draws.append((a, b))
75
+ elif kind == "exchange":
76
+ tape.exchanges.append((bytes(blobs[a]), bytes(blobs[b])))
77
+ return tape
78
+ finally:
79
+ con.close()
@@ -0,0 +1,68 @@
1
+ """The recording/replay httpx transport — the model-I/O capture seam.
2
+
3
+ Record mode: forward each request to an inner transport, capture the request body
4
+ and the full response body into the tape, and return the response unchanged.
5
+
6
+ Replay mode: ignore the network entirely. For each request, pop the next recorded
7
+ exchange, assert the request body is byte-identical to what was recorded (this is the
8
+ divergence detector), and serve the recorded response bytes back. A replay transport
9
+ has no inner transport, so any unexpected/extra request is a hard error rather than a
10
+ silent network call.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import httpx
16
+
17
+ from .nondet import DivergenceError
18
+ from .tape import Tape, sha256_hex
19
+
20
+
21
+ class TraceforkTransport(httpx.BaseTransport):
22
+ def __init__(self, mode: str, tape: Tape, inner: httpx.BaseTransport | None = None) -> None:
23
+ assert mode in ("record", "replay")
24
+ if mode == "record" and inner is None:
25
+ raise ValueError("record mode requires an inner transport")
26
+ self.mode = mode
27
+ self.tape = tape
28
+ self.inner = inner
29
+ self._i = 0
30
+ self.matched = 0 # number of replay request-hashes that matched the tape
31
+
32
+ def handle_request(self, request: httpx.Request) -> httpx.Response:
33
+ body = request.content
34
+
35
+ if self.mode == "record":
36
+ inner_resp = self.inner.handle_request(request) # type: ignore[union-attr]
37
+ resp_body = inner_resp.read()
38
+ self.tape.append_exchange(body, resp_body)
39
+ return httpx.Response(
40
+ inner_resp.status_code,
41
+ headers={"content-type": "application/json"},
42
+ content=resp_body,
43
+ request=request,
44
+ )
45
+
46
+ # replay
47
+ if self._i >= len(self.tape.exchanges):
48
+ raise DivergenceError(
49
+ f"replay made an unrecorded request #{self._i} "
50
+ f"(tape has {len(self.tape.exchanges)} exchanges)"
51
+ )
52
+ rec_req, rec_resp = self.tape.exchange(self._i)
53
+ if sha256_hex(rec_req) != sha256_hex(body):
54
+ raise DivergenceError(
55
+ f"request #{self._i} diverged from the tape "
56
+ f"(recorded {sha256_hex(rec_req)[:12]}, replay {sha256_hex(body)[:12]})"
57
+ )
58
+ self._i += 1
59
+ self.matched += 1
60
+ return httpx.Response(
61
+ 200,
62
+ headers={"content-type": "application/json"},
63
+ content=rec_resp,
64
+ request=request,
65
+ )
66
+
67
+ def fully_consumed(self) -> bool:
68
+ return self.mode == "replay" and self._i == len(self.tape.exchanges)