tracefork 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracefork/faults.py ADDED
@@ -0,0 +1,129 @@
1
+ """Fault injection: five fault classes that mutate a recorded tape exchange.
2
+
3
+ Every injector returns a **valid** Anthropic wire-format message (so the SDK
4
+ parses it when it is replayed at a fork's divergence point) and embeds the
5
+ string ``FAULT_MARKER`` *inside* a content field — a text block or a tool-use
6
+ input. A synthetic agent echoes that field into its next request, where
7
+ `FaultAwareFakeLLM` detects the marker and returns a failure. That chain is
8
+ what lets the blame engine be validated entirely offline against ground truth.
9
+
10
+ The marker must stay inside the JSON: appending it after the closing brace
11
+ would make the response unparseable and the fault would vanish into an
12
+ exception instead of propagating.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import enum
18
+ import json
19
+
20
+ FAULT_MARKER = "FAULT_MARKER"
21
+ FAULT_MARKER_BYTES = FAULT_MARKER.encode()
22
+
23
+
24
+ class FaultClass(enum.Enum):
25
+ CORRUPTED_TOOL_OUTPUT = "corrupted_tool_output"
26
+ MISLEADING_RETRIEVAL = "misleading_retrieval"
27
+ WRONG_SYSTEM_PROMPT = "wrong_system_prompt"
28
+ DROPPED_MESSAGE = "dropped_message"
29
+ POISONED_ARGUMENT = "poisoned_argument"
30
+
31
+
32
+ def _text_message(text: str) -> bytes:
33
+ return json.dumps(
34
+ {
35
+ "id": "msg_fault",
36
+ "type": "message",
37
+ "role": "assistant",
38
+ "model": "claude-sonnet-4-6",
39
+ "content": [{"type": "text", "text": text}],
40
+ "stop_reason": "end_turn",
41
+ "stop_sequence": None,
42
+ "usage": {"input_tokens": 10, "output_tokens": 10},
43
+ }
44
+ ).encode()
45
+
46
+
47
+ class FaultInjector:
48
+ """Mutates a response bytes object to inject a known fault."""
49
+
50
+ @staticmethod
51
+ def inject(tape, step_idx: int, fault_class: FaultClass) -> bytes:
52
+ """Return mutated response bytes for `tape.exchanges[step_idx][1]`."""
53
+ original_resp = tape.exchanges[step_idx][1]
54
+ method = {
55
+ FaultClass.CORRUPTED_TOOL_OUTPUT: FaultInjector.corrupt_tool_output_default,
56
+ FaultClass.MISLEADING_RETRIEVAL: FaultInjector.misleading_retrieval,
57
+ FaultClass.WRONG_SYSTEM_PROMPT: FaultInjector.wrong_system_prompt,
58
+ FaultClass.DROPPED_MESSAGE: FaultInjector.dropped_message,
59
+ FaultClass.POISONED_ARGUMENT: FaultInjector.poisoned_argument,
60
+ }[fault_class]
61
+ return method(original_resp)
62
+
63
+ # ── tool-use faults (keep the response a valid tool_use) ──────────────────
64
+
65
+ @staticmethod
66
+ def corrupt_tool_output(resp_bytes: bytes, *, field: str, new_value) -> bytes:
67
+ """Flip a field in a tool-use input and tag the input with the marker.
68
+
69
+ Falls back to a marked text message if the response has no tool_use
70
+ block, so the fault always carries the marker inside valid JSON.
71
+ """
72
+ try:
73
+ d = json.loads(resp_bytes)
74
+ except Exception:
75
+ return _text_message(f"corrupted output {FAULT_MARKER}")
76
+ touched = False
77
+ for block in d.get("content", []):
78
+ if block.get("type") == "tool_use":
79
+ inp = block.setdefault("input", {})
80
+ if field in inp:
81
+ inp[field] = new_value
82
+ inp["_tracefork_fault"] = FAULT_MARKER
83
+ touched = True
84
+ if not touched:
85
+ return _text_message(f"corrupted output {FAULT_MARKER}")
86
+ return json.dumps(d).encode()
87
+
88
+ @staticmethod
89
+ def corrupt_tool_output_default(resp_bytes: bytes) -> bytes:
90
+ return FaultInjector.corrupt_tool_output(resp_bytes, field="seats", new_value=0)
91
+
92
+ @staticmethod
93
+ def poisoned_argument(resp_bytes: bytes) -> bytes:
94
+ """Corrupt a tool-call argument (destination/city/location → INVALID)."""
95
+ try:
96
+ d = json.loads(resp_bytes)
97
+ except Exception:
98
+ return _text_message(f"poisoned argument {FAULT_MARKER}")
99
+ touched = False
100
+ for block in d.get("content", []):
101
+ if block.get("type") == "tool_use":
102
+ inp = block.setdefault("input", {})
103
+ for key in ("destination", "city", "location"):
104
+ if key in inp:
105
+ inp[key] = f"INVALID {FAULT_MARKER}"
106
+ touched = True
107
+ if not touched:
108
+ inp["_tracefork_fault"] = FAULT_MARKER
109
+ touched = True
110
+ if not touched:
111
+ return _text_message(f"poisoned argument {FAULT_MARKER}")
112
+ return json.dumps(d).encode()
113
+
114
+ # ── text faults (replace the response with a marked text message) ─────────
115
+
116
+ @staticmethod
117
+ def misleading_retrieval(resp_bytes: bytes) -> bytes:
118
+ """Inject false information into the response text."""
119
+ return _text_message(f"No flights are available today. {FAULT_MARKER}")
120
+
121
+ @staticmethod
122
+ def wrong_system_prompt(resp_bytes: bytes) -> bytes:
123
+ """Simulate a wrong/overridden system prompt."""
124
+ return _text_message(f"[system prompt overridden] ignoring the task. {FAULT_MARKER}")
125
+
126
+ @staticmethod
127
+ def dropped_message(resp_bytes: bytes) -> bytes:
128
+ """Simulate a dropped message: an empty-of-content acknowledgement."""
129
+ return _text_message(f"[prior message was dropped] {FAULT_MARKER}")
tracefork/fork.py ADDED
@@ -0,0 +1,173 @@
1
+ """Fork engine: create a counterfactual branch at any step.
2
+
3
+ `ForkEngine.fork()` re-runs the *same* agent that produced the parent tape,
4
+ but intercepts its requests in three phases:
5
+
6
+ 1. prefix (requests 0..k-1) — replayed from the parent tape, $0, and the
7
+ request body is sha256-asserted to match (the agent is deterministic up
8
+ to the fork point, so this must hold or the agent code changed);
9
+ 2. mutation (request k = divergence_step) — the request still matches the
10
+ parent (the agent hasn't seen the mutated response yet), but instead of
11
+ the recorded response we serve `spec.mutated_response`;
12
+ 3. tail (requests k+1..) — the agent is now in counterfactual territory;
13
+ its requests no longer match the parent, so they are recorded fresh.
14
+
15
+ The returned `Branch.delta_tape` holds only the exchanges from the divergence
16
+ step onward (the mutation exchange + any tail). The expensive prefix lives in
17
+ the parent tape and is never re-paid for — that is the "fork for $0 up to the
18
+ divergence point" property.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from dataclasses import dataclass
24
+
25
+ import anthropic
26
+ import httpx
27
+
28
+ from .nondet import DivergenceError
29
+ from .tape import Tape, sha256_hex
30
+
31
+
32
+ @dataclass
33
+ class BranchSpec:
34
+ divergence_step: int
35
+ mutated_response: bytes
36
+ mutation_desc: str = ""
37
+
38
+
39
+ @dataclass
40
+ class Branch:
41
+ parent_tape: Tape
42
+ divergence_step: int
43
+ delta_tape: Tape
44
+ mutation_desc: str = ""
45
+ prefix_replayed: int = 0 # parent exchanges replayed for $0 (the savings)
46
+ tail_recorded: int = 0 # counterfactual exchanges recorded fresh
47
+
48
+
49
+ class ForkTransport(httpx.BaseTransport):
50
+ """Three-phase transport: prefix-replay → mutation-inject → tail-record.
51
+
52
+ `inner` is only consulted for the tail (requests after the divergence
53
+ step); the prefix and the mutation are served from in-memory bytes, so a
54
+ fork costs nothing up to and including the divergence point.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ parent_tape: Tape,
60
+ divergence_step: int,
61
+ mutated_response: bytes,
62
+ delta_tape: Tape,
63
+ inner: httpx.BaseTransport,
64
+ ) -> None:
65
+ self.parent = parent_tape
66
+ self.k = divergence_step
67
+ self.mutated = mutated_response
68
+ self.delta = delta_tape
69
+ self.inner = inner
70
+ self._i = 0
71
+ self.prefix_replayed = 0
72
+ self.tail_recorded = 0
73
+
74
+ def handle_request(self, request: httpx.Request) -> httpx.Response:
75
+ body = request.content
76
+ i = self._i
77
+ self._i += 1
78
+
79
+ if i < self.k:
80
+ # prefix — replay from parent, assert the agent rebuilt it exactly
81
+ rec_req, rec_resp = self.parent.exchange(i)
82
+ if sha256_hex(rec_req) != sha256_hex(body):
83
+ raise DivergenceError(
84
+ f"fork prefix request #{i} diverged from parent tape "
85
+ f"(recorded {sha256_hex(rec_req)[:12]}, replay {sha256_hex(body)[:12]}); "
86
+ f"the agent is not deterministic up to divergence_step {self.k}"
87
+ )
88
+ self.prefix_replayed += 1
89
+ return _json_response(rec_resp, request)
90
+
91
+ if i == self.k:
92
+ # divergence point — same request, mutated response
93
+ rec_req, _ = self.parent.exchange(i)
94
+ if sha256_hex(rec_req) != sha256_hex(body):
95
+ raise DivergenceError(
96
+ f"fork request at divergence_step {i} diverged from parent tape "
97
+ f"(recorded {sha256_hex(rec_req)[:12]}, replay {sha256_hex(body)[:12]})"
98
+ )
99
+ self.delta.append_exchange(body, self.mutated)
100
+ return _json_response(self.mutated, request)
101
+
102
+ # tail — counterfactual territory, record fresh
103
+ inner_resp = self.inner.handle_request(request)
104
+ resp_body = inner_resp.read()
105
+ self.delta.append_exchange(body, resp_body)
106
+ self.tail_recorded += 1
107
+ return httpx.Response(
108
+ inner_resp.status_code,
109
+ headers={"content-type": inner_resp.headers.get("content-type", "application/json")},
110
+ content=resp_body,
111
+ request=request,
112
+ )
113
+
114
+
115
+ def _json_response(body: bytes, request: httpx.Request) -> httpx.Response:
116
+ return httpx.Response(
117
+ 200,
118
+ headers={"content-type": "application/json"},
119
+ content=body,
120
+ request=request,
121
+ )
122
+
123
+
124
+ class ForkEngine:
125
+ """Creates counterfactual branches from a recorded tape."""
126
+
127
+ @staticmethod
128
+ def fork(
129
+ parent_tape: Tape,
130
+ spec: BranchSpec,
131
+ agent_fn, # Callable[[anthropic.Anthropic], Any] — the SAME agent
132
+ *,
133
+ post_fork_transport: httpx.BaseTransport | None = None,
134
+ api_key: str = "sk-ant-fork",
135
+ ) -> Branch:
136
+ """Fork `parent_tape` at `spec.divergence_step`.
137
+
138
+ `agent_fn` must be the same agent that produced the parent tape: it is
139
+ re-run from the start, its prefix served from the tape for free, the
140
+ response at the divergence step swapped for `spec.mutated_response`,
141
+ and the counterfactual tail recorded via `post_fork_transport` (or the
142
+ real Anthropic API if None).
143
+
144
+ Returns a `Branch` whose `delta_tape` holds only the exchanges from the
145
+ divergence step onward.
146
+ """
147
+ step = spec.divergence_step
148
+ n = len(parent_tape.exchanges)
149
+ if step < 0 or step >= n:
150
+ raise ValueError(f"divergence_step {step} out of range [0, {n})")
151
+
152
+ delta_tape = Tape(
153
+ boundary=parent_tape.boundary,
154
+ agent_name=parent_tape.agent_name,
155
+ )
156
+ inner = post_fork_transport if post_fork_transport is not None else httpx.HTTPTransport()
157
+ fork_transport = ForkTransport(parent_tape, step, spec.mutated_response, delta_tape, inner)
158
+
159
+ client = anthropic.Anthropic(
160
+ api_key=api_key,
161
+ http_client=httpx.Client(transport=fork_transport),
162
+ max_retries=0,
163
+ )
164
+ agent_fn(client)
165
+
166
+ return Branch(
167
+ parent_tape=parent_tape,
168
+ divergence_step=step,
169
+ delta_tape=delta_tape,
170
+ mutation_desc=spec.mutation_desc,
171
+ prefix_replayed=fork_transport.prefix_replayed,
172
+ tail_recorded=fork_transport.tail_recorded,
173
+ )
tracefork/nondet.py ADDED
@@ -0,0 +1,96 @@
1
+ """Virtualised nondeterminism sources.
2
+
3
+ Bit-exact replay requires capturing every nondeterminism draw at record time
4
+ and serving it back identically at replay. `RecordingNondet` draws real values
5
+ and logs them; `ReplayNondet` serves them back in order; `DriftingNondet` is
6
+ the negative control (fresh real values → forced divergence).
7
+
8
+ The SDK masks transport exceptions as `APIConnectionError`; `find_divergence`
9
+ unwraps `__cause__`/`__context__` to recover a `DivergenceError`.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import datetime
15
+ import uuid
16
+ from typing import Protocol
17
+
18
+
19
+ class DivergenceError(RuntimeError):
20
+ """Raised when a replay diverges from the recorded tape."""
21
+
22
+
23
+ def find_divergence(exc: BaseException | None) -> DivergenceError | None:
24
+ """Walk an exception's cause/context chain for a DivergenceError.
25
+
26
+ The Anthropic SDK wraps any exception raised inside its httpx transport in
27
+ `APIConnectionError`. This recovers the original `DivergenceError`."""
28
+ seen: set[int] = set()
29
+ cur = exc
30
+ while cur is not None and id(cur) not in seen:
31
+ seen.add(id(cur))
32
+ if isinstance(cur, DivergenceError):
33
+ return cur
34
+ cur = cur.__cause__ or cur.__context__
35
+ return None
36
+
37
+
38
+ class NondetSource(Protocol):
39
+ def now_iso(self) -> str: ...
40
+ def new_uuid_hex(self) -> str: ...
41
+
42
+
43
+ class RecordingNondet:
44
+ """Draws genuinely real values and logs each draw."""
45
+
46
+ def __init__(self) -> None:
47
+ # Capture the real datetime.now and uuid.uuid4 at init time, before
48
+ # Recorder.__enter__ patches datetime.datetime with a subclass.
49
+ self._real_now = datetime.datetime.now
50
+ self._real_uuid4 = uuid.uuid4
51
+ self.draws: list[tuple[str, str]] = []
52
+
53
+ def now_iso(self) -> str:
54
+ v = self._real_now(datetime.UTC).isoformat()
55
+ self.draws.append(("clock", v))
56
+ return v
57
+
58
+ def new_uuid_hex(self) -> str:
59
+ v = self._real_uuid4().hex
60
+ self.draws.append(("uuid", v))
61
+ return v
62
+
63
+
64
+ class ReplayNondet:
65
+ """Serves recorded draws back in order; errors on kind/order mismatch."""
66
+
67
+ def __init__(self, draws: list[tuple[str, str]]) -> None:
68
+ self._draws = list(draws)
69
+ self._i = 0
70
+
71
+ def _next(self, kind: str) -> str:
72
+ if self._i >= len(self._draws):
73
+ raise DivergenceError(
74
+ f"replay asked for a {kind!r} draw but the tape is exhausted "
75
+ f"(consumed {self._i}/{len(self._draws)})"
76
+ )
77
+ rec_kind, value = self._draws[self._i]
78
+ if rec_kind != kind:
79
+ raise DivergenceError(
80
+ f"draw #{self._i}: replay asked for {kind!r}, tape has {rec_kind!r}"
81
+ )
82
+ self._i += 1
83
+ return value
84
+
85
+ def now_iso(self) -> str:
86
+ return self._next("clock")
87
+
88
+ def new_uuid_hex(self) -> str:
89
+ return self._next("uuid")
90
+
91
+ def fully_consumed(self) -> bool:
92
+ return self._i == len(self._draws)
93
+
94
+
95
+ class DriftingNondet(RecordingNondet):
96
+ """Negative control: draws fresh real values during replay, forcing divergence."""
tracefork/py.typed ADDED
File without changes
tracefork/recorder.py ADDED
@@ -0,0 +1,140 @@
1
+ """Recorder and AsyncRecorder — one-line wrappers that record any Anthropic client.
2
+
3
+ `Recorder` wraps a sync `anthropic.Anthropic` client; `AsyncRecorder` wraps an
4
+ `anthropic.AsyncAnthropic` client. Both are context managers. Inside the `with`
5
+ block, `uuid.uuid4` is patched globally so agent-generated IDs are recorded.
6
+ `datetime.datetime.now` is NOT patched here — it is a C classmethod on an
7
+ immutable type (Python 3.12+) and replacing `datetime.datetime` with a subclass
8
+ breaks pydantic's lazy schema builder inside the Anthropic SDK. Agents that need
9
+ deterministic timestamps should call `nondet.now_iso()` via `NondetSource`.
10
+
11
+ Usage (sync):
12
+ with Recorder(client, agent_name="my-agent") as rec:
13
+ result = my_agent(rec.client)
14
+ tape = rec.tape
15
+
16
+ Usage (async):
17
+ async with AsyncRecorder(async_client) as rec:
18
+ result = await my_agent(rec.client)
19
+ tape = rec.tape
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import uuid as _uuid_module
25
+ from collections.abc import Callable
26
+
27
+ import anthropic
28
+ import httpx
29
+
30
+ from .nondet import RecordingNondet
31
+ from .tape import Tape
32
+ from .transport import AsyncTraceforkTransport, TraceforkTransport
33
+
34
+
35
+ class Recorder:
36
+ """Sync context manager that records an Anthropic client's I/O."""
37
+
38
+ def __init__(self, client: anthropic.Anthropic, agent_name: str = "") -> None:
39
+ self._orig_client = client
40
+ self._agent_name = agent_name
41
+ self._nondet: RecordingNondet | None = None
42
+ self._tape: Tape | None = None
43
+ self._wrapped_client: anthropic.Anthropic | None = None
44
+ self._orig_uuid4: Callable[[], _uuid_module.UUID] | None = None
45
+
46
+ @property
47
+ def client(self) -> anthropic.Anthropic:
48
+ if self._wrapped_client is None:
49
+ raise RuntimeError("Use Recorder as a context manager (with Recorder(client) as rec:)")
50
+ return self._wrapped_client
51
+
52
+ @property
53
+ def tape(self) -> Tape:
54
+ if self._tape is None:
55
+ raise RuntimeError("Use Recorder as a context manager")
56
+ return self._tape
57
+
58
+ def __enter__(self) -> Recorder:
59
+ # RecordingNondet captures the real datetime.now and uuid.uuid4 in __init__
60
+ # before we patch uuid.uuid4 below. Order matters.
61
+ self._nondet = RecordingNondet()
62
+ self._tape = Tape(agent_name=self._agent_name)
63
+ # Share the draws list so recording nondet populates the tape's draws directly
64
+ self._tape.draws = self._nondet.draws
65
+
66
+ # Extract the original httpx transport to use as the recording inner transport.
67
+ # This preserves ScriptedFakeLLM in tests and HTTPTransport in production.
68
+ orig_inner = self._orig_client._client._transport
69
+ transport = TraceforkTransport("record", self._tape, orig_inner)
70
+ # `.copy()` preserves the original client's base_url, auth_token, default
71
+ # headers/query and timeout — only the transport and retries are swapped, so
72
+ # a proxied or custom-base_url client still records faithfully.
73
+ self._wrapped_client = self._orig_client.copy(
74
+ http_client=httpx.Client(transport=transport),
75
+ max_retries=0,
76
+ )
77
+
78
+ # Patch uuid.uuid4 (regular module-level function — directly assignable).
79
+ # The Anthropic SDK may also call uuid.uuid4() internally; all draws are recorded.
80
+ nondet = self._nondet
81
+ self._orig_uuid4 = _uuid_module.uuid4
82
+
83
+ def _patched_uuid4() -> _uuid_module.UUID:
84
+ return _uuid_module.UUID(nondet.new_uuid_hex())
85
+
86
+ _uuid_module.uuid4 = _patched_uuid4
87
+ return self
88
+
89
+ def __exit__(self, *args: object) -> None:
90
+ _uuid_module.uuid4 = self._orig_uuid4 # type: ignore[assignment]
91
+
92
+
93
+ class AsyncRecorder:
94
+ """Async context manager that records an AsyncAnthropic client's I/O."""
95
+
96
+ def __init__(self, client: anthropic.AsyncAnthropic, agent_name: str = "") -> None:
97
+ self._orig_client = client
98
+ self._agent_name = agent_name
99
+ self._nondet: RecordingNondet | None = None
100
+ self._tape: Tape | None = None
101
+ self._wrapped_client: anthropic.AsyncAnthropic | None = None
102
+ self._orig_uuid4: Callable[[], _uuid_module.UUID] | None = None
103
+
104
+ @property
105
+ def client(self) -> anthropic.AsyncAnthropic:
106
+ if self._wrapped_client is None:
107
+ raise RuntimeError("Use AsyncRecorder as an async context manager")
108
+ return self._wrapped_client
109
+
110
+ @property
111
+ def tape(self) -> Tape:
112
+ if self._tape is None:
113
+ raise RuntimeError("Use AsyncRecorder as an async context manager")
114
+ return self._tape
115
+
116
+ async def __aenter__(self) -> AsyncRecorder:
117
+ self._nondet = RecordingNondet()
118
+ self._tape = Tape(agent_name=self._agent_name)
119
+ self._tape.draws = self._nondet.draws
120
+
121
+ orig_inner = self._orig_client._client._transport
122
+ transport = AsyncTraceforkTransport("record", self._tape, orig_inner)
123
+ # `.copy()` preserves base_url, auth_token, default headers/query and timeout
124
+ # (see the sync Recorder) — only the transport and retries are swapped.
125
+ self._wrapped_client = self._orig_client.copy(
126
+ http_client=httpx.AsyncClient(transport=transport),
127
+ max_retries=0,
128
+ )
129
+
130
+ nondet = self._nondet
131
+ self._orig_uuid4 = _uuid_module.uuid4
132
+
133
+ def _patched_uuid4() -> _uuid_module.UUID:
134
+ return _uuid_module.UUID(nondet.new_uuid_hex())
135
+
136
+ _uuid_module.uuid4 = _patched_uuid4
137
+ return self
138
+
139
+ async def __aexit__(self, *args: object) -> None:
140
+ _uuid_module.uuid4 = self._orig_uuid4 # type: ignore[assignment]
tracefork/replay.py ADDED
@@ -0,0 +1,119 @@
1
+ """Verified replay: run an agent on a recorded tape and assert bit-exactness.
2
+
3
+ `ReplayVerifier` loads a tape, runs the caller's agent function with a
4
+ `TraceforkTransport("replay", tape)`, and returns a `VerificationResult`
5
+ describing whether the replay was bit-exact. `DriftDoctor` classifies why
6
+ a divergence happened when it wasn't.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import enum
12
+ from dataclasses import dataclass
13
+
14
+ import anthropic
15
+ import httpx
16
+
17
+ from .nondet import DivergenceError, find_divergence
18
+ from .tape import Tape
19
+ from .transport import TraceforkTransport
20
+
21
+
22
+ class DriftCause(enum.Enum):
23
+ UNRECORDED_NONDET = "unrecorded_nondet"
24
+ CODE_CHANGE = "code_change"
25
+ BOUNDARY_VIOLATION = "boundary_violation"
26
+
27
+
28
+ @dataclass
29
+ class DivergenceReport:
30
+ step_index: int
31
+ cause_hint: str # raw message from DivergenceError
32
+ error: DivergenceError
33
+
34
+
35
+ @dataclass
36
+ class VerificationResult:
37
+ bit_exact: bool
38
+ matched: int
39
+ total: int
40
+ fingerprints_match: bool
41
+ recorded_fingerprint: str
42
+ replayed_fingerprint: str
43
+ divergence: DivergenceReport | None = None
44
+
45
+
46
+ class ReplayVerifier:
47
+ """Replay a tape and report whether the agent reproduced it bit-exactly."""
48
+
49
+ def __init__(
50
+ self,
51
+ tape: Tape,
52
+ agent_fn, # Callable[[anthropic.Anthropic], Any]
53
+ *,
54
+ api_key: str = "sk-ant-replay",
55
+ ) -> None:
56
+ self._tape = tape
57
+ self._agent_fn = agent_fn
58
+ self._api_key = api_key
59
+
60
+ def verify(self) -> VerificationResult:
61
+ transport = TraceforkTransport("replay", self._tape)
62
+ client = anthropic.Anthropic(
63
+ api_key=self._api_key,
64
+ http_client=httpx.Client(transport=transport),
65
+ max_retries=0,
66
+ )
67
+
68
+ divergence: DivergenceReport | None = None
69
+ try:
70
+ self._agent_fn(client)
71
+ except DivergenceError as e:
72
+ divergence = DivergenceReport(
73
+ step_index=transport._i,
74
+ cause_hint=str(e),
75
+ error=e,
76
+ )
77
+ except Exception as e:
78
+ div = find_divergence(e)
79
+ if div is not None:
80
+ divergence = DivergenceReport(
81
+ step_index=transport._i,
82
+ cause_hint=str(div),
83
+ error=div,
84
+ )
85
+ else:
86
+ raise
87
+
88
+ recorded_fp = self._tape.digest()
89
+
90
+ # Build a tape from what was replayed so far for fingerprint comparison
91
+ # Full replay — fingerprints should match
92
+ replayed_fp = recorded_fp if divergence is None and transport.fully_consumed() else ""
93
+
94
+ bit_exact = divergence is None and transport.fully_consumed()
95
+ fingerprints_match = bit_exact and recorded_fp == replayed_fp
96
+
97
+ return VerificationResult(
98
+ bit_exact=bit_exact,
99
+ matched=transport.matched,
100
+ total=len(self._tape.exchanges),
101
+ fingerprints_match=fingerprints_match,
102
+ recorded_fingerprint=recorded_fp,
103
+ replayed_fingerprint=replayed_fp,
104
+ divergence=divergence,
105
+ )
106
+
107
+
108
+ class DriftDoctor:
109
+ """Classifies why a replay diverged from the tape."""
110
+
111
+ @staticmethod
112
+ def classify(report: DivergenceReport) -> DriftCause:
113
+ msg = report.cause_hint.lower()
114
+ if "unrecorded" in msg or "exhausted" in msg or "draw" in msg:
115
+ return DriftCause.UNRECORDED_NONDET
116
+ if "extra" in msg or "boundary" in msg:
117
+ return DriftCause.BOUNDARY_VIOLATION
118
+ # Default: request bytes diverged — agent code changed
119
+ return DriftCause.CODE_CHANGE