tracefork 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracefork/transport.py ADDED
@@ -0,0 +1,137 @@
1
+ """Recording/replay httpx transports — sync and async, streaming SSE capable.
2
+
3
+ Record mode: forward to the inner transport, buffer the full response body
4
+ (works for both streaming SSE and non-streaming JSON — httpx buffers both
5
+ identically via .read()/.aread()), append to the tape, return the response.
6
+
7
+ Replay mode: for each request, sha256-assert its *body* matches the tape record,
8
+ then serve the recorded bytes back. A replay transport has no inner transport;
9
+ any unrecorded request is a hard error. The matched surface is the request body;
10
+ request headers (e.g. ``anthropic-beta`` / ``anthropic-version``) are out of scope
11
+ for the bit-exactness claim — see the README's determinism-boundary note.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import httpx
17
+
18
+ from .nondet import DivergenceError
19
+ from .tape import Tape, sha256_hex
20
+
21
+
22
+ class TraceforkTransport(httpx.BaseTransport):
23
+ """Sync recording/replay transport."""
24
+
25
+ def __init__(
26
+ self,
27
+ mode: str,
28
+ tape: Tape,
29
+ inner: httpx.BaseTransport | None = None,
30
+ ) -> None:
31
+ assert mode in ("record", "replay")
32
+ if mode == "record" and inner is None:
33
+ raise ValueError("record mode requires an inner transport")
34
+ self.mode = mode
35
+ self.tape = tape
36
+ self.inner = inner
37
+ self._i = 0
38
+ self.matched = 0
39
+
40
+ def handle_request(self, request: httpx.Request) -> httpx.Response:
41
+ body = request.content
42
+
43
+ if self.mode == "record":
44
+ inner_resp = self.inner.handle_request(request) # type: ignore[union-attr]
45
+ resp_body = inner_resp.read()
46
+ self.tape.append_exchange(body, resp_body)
47
+ return httpx.Response(
48
+ inner_resp.status_code,
49
+ headers={
50
+ "content-type": inner_resp.headers.get("content-type", "application/json")
51
+ },
52
+ content=resp_body,
53
+ request=request,
54
+ )
55
+
56
+ # replay
57
+ if self._i >= len(self.tape.exchanges):
58
+ raise DivergenceError(
59
+ f"replay made unrecorded request #{self._i} "
60
+ f"(tape has {len(self.tape.exchanges)} exchanges)"
61
+ )
62
+ rec_req, rec_resp = self.tape.exchange(self._i)
63
+ if sha256_hex(rec_req) != sha256_hex(body):
64
+ raise DivergenceError(
65
+ f"request #{self._i} diverged from tape "
66
+ f"(recorded {sha256_hex(rec_req)[:12]}, replay {sha256_hex(body)[:12]})"
67
+ )
68
+ self._i += 1
69
+ self.matched += 1
70
+ return httpx.Response(
71
+ 200,
72
+ headers={"content-type": "application/json"},
73
+ content=rec_resp,
74
+ request=request,
75
+ )
76
+
77
+ def fully_consumed(self) -> bool:
78
+ return self.mode == "replay" and self._i == len(self.tape.exchanges)
79
+
80
+
81
+ class AsyncTraceforkTransport(httpx.AsyncBaseTransport):
82
+ """Async recording/replay transport — identical logic to sync variant."""
83
+
84
+ def __init__(
85
+ self,
86
+ mode: str,
87
+ tape: Tape,
88
+ inner: httpx.AsyncBaseTransport | None = None,
89
+ ) -> None:
90
+ assert mode in ("record", "replay")
91
+ if mode == "record" and inner is None:
92
+ raise ValueError("record mode requires an inner transport")
93
+ self.mode = mode
94
+ self.tape = tape
95
+ self.inner = inner
96
+ self._i = 0
97
+ self.matched = 0
98
+
99
+ async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
100
+ body = request.content
101
+
102
+ if self.mode == "record":
103
+ inner_resp = await self.inner.handle_async_request(request) # type: ignore[union-attr]
104
+ resp_body = await inner_resp.aread()
105
+ self.tape.append_exchange(body, resp_body)
106
+ return httpx.Response(
107
+ inner_resp.status_code,
108
+ headers={
109
+ "content-type": inner_resp.headers.get("content-type", "application/json")
110
+ },
111
+ content=resp_body,
112
+ request=request,
113
+ )
114
+
115
+ # replay
116
+ if self._i >= len(self.tape.exchanges):
117
+ raise DivergenceError(
118
+ f"replay made unrecorded request #{self._i} "
119
+ f"(tape has {len(self.tape.exchanges)} exchanges)"
120
+ )
121
+ rec_req, rec_resp = self.tape.exchange(self._i)
122
+ if sha256_hex(rec_req) != sha256_hex(body):
123
+ raise DivergenceError(
124
+ f"request #{self._i} diverged from tape "
125
+ f"(recorded {sha256_hex(rec_req)[:12]}, replay {sha256_hex(body)[:12]})"
126
+ )
127
+ self._i += 1
128
+ self.matched += 1
129
+ return httpx.Response(
130
+ 200,
131
+ headers={"content-type": "application/json"},
132
+ content=rec_resp,
133
+ request=request,
134
+ )
135
+
136
+ def fully_consumed(self) -> bool:
137
+ return self.mode == "replay" and self._i == len(self.tape.exchanges)
tracefork/validate.py ADDED
@@ -0,0 +1,177 @@
1
+ """Self-validation: run the blame engine on fault-injected runs with known
2
+ ground truth and measure how often it fingers the right step.
3
+
4
+ Fully offline and $0. Each run:
5
+ 1. record a clean two-step tape with a synthetic agent;
6
+ 2. inject a known fault into step 0 (the "root cause");
7
+ 3. run the blame engine — forking re-runs the synthetic agent, which echoes
8
+ each response into its next request, so the fault marker reaches the
9
+ fault-aware tail and flips the outcome;
10
+ 4. score a hit when blame ranks the fault step #1 (top-1 precision).
11
+
12
+ A negative control runs blame with a no-op perturbation and asserts the
13
+ flip-rate stays near zero — otherwise a high "precision" would be meaningless.
14
+
15
+ The synthetic agent is the same callable during recording and every fork, so
16
+ the fork prefix replays bit-for-bit (the determinism contract blame relies on).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ from dataclasses import dataclass
23
+
24
+ import anthropic
25
+ import httpx
26
+
27
+ from .blame import BlameEngine, StringMatchOracle
28
+ from .faults import FAULT_MARKER_BYTES, FaultClass, FaultInjector
29
+ from .synthetic import FaultAwareFakeLLM, ScriptedFakeLLM
30
+ from .tape import Tape
31
+ from .transport import TraceforkTransport
32
+ from .wire import make_text_response, make_tool_use_response
33
+
34
+ SUCCESS_RESP = make_text_response("SUCCESS — confirmed")
35
+ FAIL_RESP = make_text_response("FAIL — cancelled")
36
+ TOOL_RESP = make_tool_use_response("check_availability", {"seats": 3, "destination": "Tokyo"})
37
+
38
+
39
+ def _serialize_response(msg) -> str:
40
+ """Flatten an Anthropic message's content to a deterministic string, so the
41
+ agent can echo it (markers and all) into its next request."""
42
+ parts: list[str] = []
43
+ for block in msg.content:
44
+ t = getattr(block, "type", None)
45
+ if t == "text":
46
+ parts.append(block.text)
47
+ elif t == "tool_use":
48
+ parts.append(f"{block.name} {json.dumps(block.input, sort_keys=True)}")
49
+ return " | ".join(parts) or "(empty)"
50
+
51
+
52
+ def synthetic_agent(client: anthropic.Anthropic) -> str:
53
+ """Two-turn agent: ask, then confirm — echoing turn 1's response into the
54
+ turn-2 request so an injected fault propagates to the outcome."""
55
+ r1 = client.messages.create(
56
+ model="claude-sonnet-4-6",
57
+ max_tokens=100,
58
+ messages=[{"role": "user", "content": "book a flight to Tokyo"}],
59
+ )
60
+ echoed = _serialize_response(r1)
61
+ r2 = client.messages.create(
62
+ model="claude-sonnet-4-6",
63
+ max_tokens=100,
64
+ messages=[
65
+ {"role": "user", "content": "book a flight to Tokyo"},
66
+ {"role": "assistant", "content": echoed},
67
+ {"role": "user", "content": "confirm"},
68
+ ],
69
+ )
70
+ return _serialize_response(r2)
71
+
72
+
73
+ def _record_clean_tape() -> Tape:
74
+ fake = ScriptedFakeLLM([TOOL_RESP, SUCCESS_RESP])
75
+ tape = Tape(agent_name="synthetic_booking_agent")
76
+ transport = TraceforkTransport("record", tape, fake)
77
+ client = anthropic.Anthropic(
78
+ api_key="sk-ant-fake",
79
+ http_client=httpx.Client(transport=transport),
80
+ max_retries=0,
81
+ )
82
+ synthetic_agent(client)
83
+ return tape
84
+
85
+
86
+ @dataclass
87
+ class ValidationReport:
88
+ fault_class: FaultClass
89
+ n_runs: int
90
+ top1_correct: int
91
+ top1_precision: float
92
+ negative_control_max_flip: float = 0.0
93
+
94
+
95
+ class ValidationRunner:
96
+ """Runs offline fault-injection validation for a single fault class."""
97
+
98
+ def __init__(self, fault_class: FaultClass, *, k: int = 3, n_runs: int = 5) -> None:
99
+ self._fault_class = fault_class
100
+ self._k = k
101
+ self._n_runs = n_runs
102
+
103
+ def run(self) -> ValidationReport:
104
+ oracle = StringMatchOracle(success_re=r"SUCCESS", failure_re=r"FAIL")
105
+ fault_step = 0
106
+ top1_correct = 0
107
+ max_flip_control = 0.0
108
+
109
+ for _run in range(self._n_runs):
110
+ tape = _record_clean_tape()
111
+ mutated_resp = FaultInjector.inject(tape, fault_step, self._fault_class)
112
+
113
+ # Scope note: this is a positive-vs-inert control — the faulted step gets a
114
+ # flip-capable tail, every other step an inert one. It proves the engine ranks
115
+ # a genuinely outcome-flipping step first (test_blame.py injects the flip at the
116
+ # *final* step to show it isn't hardwired to step 0), not that it discriminates
117
+ # among multiple competing causes on a long tape. See README → Validation scope.
118
+ def perturb_factory(step_idx: int, _mutated=mutated_resp, _fault=fault_step):
119
+ if step_idx == _fault:
120
+ # Inject the fault; the tail flips when it sees the marker.
121
+ return _mutated, FaultAwareFakeLLM(
122
+ normal_responses=[SUCCESS_RESP] * 10,
123
+ fault_responses=[FAIL_RESP] * 10,
124
+ fault_marker=FAULT_MARKER_BYTES,
125
+ )
126
+ # Other steps: a benign perturbation that should not flip.
127
+ return SUCCESS_RESP, ScriptedFakeLLM([SUCCESS_RESP] * 10)
128
+
129
+ report = BlameEngine.rank(
130
+ tape,
131
+ synthetic_agent,
132
+ oracle,
133
+ perturb_factory=perturb_factory,
134
+ k=self._k,
135
+ budget_usd=100.0,
136
+ )
137
+ top = report.top()
138
+ if top is not None and top.step_index == fault_step:
139
+ top1_correct += 1
140
+
141
+ # Negative control: no real perturbation anywhere → expect no flips.
142
+ def null_perturb_factory(step_idx: int):
143
+ return SUCCESS_RESP, ScriptedFakeLLM([SUCCESS_RESP] * 10)
144
+
145
+ ctrl = BlameEngine.rank(
146
+ tape,
147
+ synthetic_agent,
148
+ oracle,
149
+ perturb_factory=null_perturb_factory,
150
+ k=self._k,
151
+ budget_usd=100.0,
152
+ )
153
+ for r in ctrl.results:
154
+ max_flip_control = max(max_flip_control, r.flip_rate)
155
+
156
+ precision = top1_correct / self._n_runs if self._n_runs > 0 else 0.0
157
+ return ValidationReport(
158
+ fault_class=self._fault_class,
159
+ n_runs=self._n_runs,
160
+ top1_correct=top1_correct,
161
+ top1_precision=precision,
162
+ negative_control_max_flip=max_flip_control,
163
+ )
164
+
165
+
166
+ def run_all_fault_classes(k: int = 3, n_runs: int = 5) -> dict:
167
+ """Run validation for all five fault classes; return a report dict."""
168
+ results = {}
169
+ for fc in FaultClass:
170
+ report = ValidationRunner(fc, k=k, n_runs=n_runs).run()
171
+ results[fc.value] = {
172
+ "top1_precision": report.top1_precision,
173
+ "top1_correct": report.top1_correct,
174
+ "n_runs": report.n_runs,
175
+ "negative_control_max_flip": report.negative_control_max_flip,
176
+ }
177
+ return results
@@ -0,0 +1,209 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>tracefork — time-travel debugger</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ :root {
10
+ --bg: #0d1117; --surface: #161b22; --border: #30363d;
11
+ --text: #c9d1d9; --muted: #6e7681; --green: #3fb950;
12
+ --blue: #58a6ff; --orange: #f0883e; --purple: #d2a8ff;
13
+ --red: #f85149; --yellow: #e3b341;
14
+ }
15
+ body { background: var(--bg); color: var(--text); font-family: ui-monospace, monospace; font-size: 13px; }
16
+ header { padding: 12px 20px; border-bottom: 1px solid var(--border); display: flex; align-items: center; gap: 12px; }
17
+ header h1 { font-size: 15px; color: var(--green); }
18
+ header .run-meta { color: var(--muted); font-size: 11px; }
19
+ .layout { display: grid; grid-template-columns: 280px 1fr 320px; height: calc(100vh - 45px); }
20
+ .panel { border-right: 1px solid var(--border); overflow-y: auto; }
21
+ .panel:last-child { border-right: none; }
22
+ .panel-header { padding: 10px 14px; background: var(--surface); border-bottom: 1px solid var(--border);
23
+ font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.05em; }
24
+
25
+ /* Panel 1 — Timeline */
26
+ .exchange-item { padding: 10px 14px; border-bottom: 1px solid var(--border); cursor: pointer; }
27
+ .exchange-item:hover, .exchange-item.active { background: var(--surface); }
28
+ .exchange-item .step { font-size: 10px; color: var(--muted); }
29
+ .exchange-item .role { font-size: 12px; margin: 3px 0; }
30
+ .role-user { color: var(--blue); }
31
+ .role-assistant { color: var(--purple); }
32
+ .role-tool { color: var(--orange); }
33
+ .exchange-item .preview { font-size: 11px; color: var(--muted); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
34
+ .blame-badge { float: right; font-size: 10px; padding: 2px 6px; border-radius: 10px; background: #1a3a1a; color: var(--green); }
35
+ .blame-badge.high { background: #3a1a1a; color: var(--red); }
36
+
37
+ /* Panel 2 — Detail */
38
+ .detail-empty { padding: 40px; color: var(--muted); text-align: center; }
39
+ .detail-section { padding: 14px; border-bottom: 1px solid var(--border); }
40
+ .detail-section h3 { font-size: 11px; color: var(--muted); margin-bottom: 8px; text-transform: uppercase; }
41
+ pre { background: var(--surface); padding: 10px; border-radius: 4px; overflow-x: auto; font-size: 11px; line-height: 1.6; }
42
+ .key { color: var(--blue); }
43
+ .string { color: var(--green); }
44
+ .number { color: var(--yellow); }
45
+
46
+ /* Panel 3 — Blame */
47
+ .blame-empty { padding: 40px; color: var(--muted); text-align: center; }
48
+ .blame-row { padding: 8px 14px; border-bottom: 1px solid var(--border); display: flex; gap: 8px; align-items: center; }
49
+ .blame-rank { width: 20px; color: var(--muted); font-size: 11px; }
50
+ .blame-info { flex: 1; }
51
+ .blame-step { font-size: 12px; }
52
+ .blame-ci { font-size: 10px; color: var(--muted); }
53
+ .blame-bar-wrap { width: 80px; }
54
+ .blame-bar { height: 6px; background: var(--green); border-radius: 3px; }
55
+ .blame-bar.decisive { background: var(--red); }
56
+ .blame-rate { font-size: 11px; text-align: right; min-width: 50px; }
57
+ .blame-rate.decisive { color: var(--red); font-weight: bold; }
58
+ .blame-header-row { padding: 6px 14px; background: var(--surface); border-bottom: 1px solid var(--border);
59
+ display: flex; gap: 8px; font-size: 10px; color: var(--muted); }
60
+
61
+ .loading { padding: 40px; text-align: center; color: var(--muted); }
62
+ .error { padding: 20px; color: var(--red); }
63
+ .tag { display: inline-block; padding: 1px 6px; border-radius: 3px; font-size: 10px; margin-left: 6px; }
64
+ .tag-live { background: #1a3a1a; color: var(--green); }
65
+ .tag-static { background: #1a2a3a; color: var(--blue); }
66
+ </style>
67
+ </head>
68
+ <body>
69
+ <header>
70
+ <h1>tracefork</h1>
71
+ <span class="run-meta" id="run-meta">loading…</span>
72
+ <span class="tag" id="mode-tag"></span>
73
+ </header>
74
+ <div class="layout">
75
+ <div class="panel" id="timeline-panel">
76
+ <div class="panel-header">Timeline</div>
77
+ <div id="timeline-content"><div class="loading">loading…</div></div>
78
+ </div>
79
+ <div class="panel" id="detail-panel">
80
+ <div class="panel-header">Exchange Detail</div>
81
+ <div id="detail-content"><div class="detail-empty">← select an exchange</div></div>
82
+ </div>
83
+ <div class="panel" id="blame-panel">
84
+ <div class="panel-header">Blame</div>
85
+ <div id="blame-content"><div class="blame-empty">run tracefork blame to populate</div></div>
86
+ </div>
87
+ </div>
88
+
89
+ <script>
90
+ // ── data source ───────────────────────────────────────────────────────────
91
+ let DATA = null;
92
+
93
+ async function loadData() {
94
+ if (window.__TRACEFORK_DATA__) {
95
+ // static mode: data injected at report-generation time
96
+ document.getElementById('mode-tag').textContent = 'static';
97
+ document.getElementById('mode-tag').className = 'tag tag-static';
98
+ return window.__TRACEFORK_DATA__;
99
+ }
100
+ if (window.__TRACEFORK_SERVER_URL__ !== undefined) {
101
+ // live mode: fetch from the serving origin (empty base → same-origin, any port)
102
+ document.getElementById('mode-tag').textContent = 'live';
103
+ document.getElementById('mode-tag').className = 'tag tag-live';
104
+ const url = window.__TRACEFORK_SERVER_URL__;
105
+ const runId = new URLSearchParams(location.search).get('run_id');
106
+ const resp = await fetch(`${url}/api/run/${runId}`);
107
+ if (!resp.ok) throw new Error(`server ${resp.status}: run not found`);
108
+ return resp.json();
109
+ }
110
+ throw new Error('No data source configured');
111
+ }
112
+
113
+ // ── render ─────────────────────────────────────────────────────────────────
114
+ function renderTimeline(data) {
115
+ const blame = data.blame || {};
116
+ const html = data.exchanges.map((ex, i) => {
117
+ const flipRate = blame[i] ? blame[i].flip_rate : null;
118
+ const isDecisive = flipRate !== null && flipRate >= 0.7;
119
+ const badgeHtml = flipRate !== null
120
+ ? `<span class="blame-badge${isDecisive ? ' high' : ''}">${Math.round(flipRate*100)}%</span>`
121
+ : '';
122
+ const roleClass = ex.role === 'user' ? 'role-user' : ex.role === 'assistant' ? 'role-assistant' : 'role-tool';
123
+ return `<div class="exchange-item" data-i="${i}" onclick="selectExchange(${i})">
124
+ ${badgeHtml}
125
+ <div class="step">exchange ${i}</div>
126
+ <div class="role ${roleClass}">${escape(ex.role || 'unknown')}</div>
127
+ <div class="preview">${escape(ex.preview || '')}</div>
128
+ </div>`;
129
+ }).join('');
130
+ document.getElementById('timeline-content').innerHTML = html;
131
+ }
132
+
133
+ function renderBlame(data) {
134
+ const blame = data.blame;
135
+ if (!blame || Object.keys(blame).length === 0) return;
136
+ const entries = Object.entries(blame).sort((a, b) => b[1].flip_rate - a[1].flip_rate);
137
+ const headerHtml = `<div class="blame-header-row">
138
+ <span style="width:20px">rank</span>
139
+ <span style="flex:1">step</span>
140
+ <span style="width:80px">bar</span>
141
+ <span style="min-width:50px;text-align:right">flip</span>
142
+ </div>`;
143
+ const rowsHtml = entries.map(([step, info], rank) => {
144
+ const rate = info.flip_rate;
145
+ const isDecisive = rate >= 0.7;
146
+ const pct = Math.round(rate * 100);
147
+ return `<div class="blame-row">
148
+ <span class="blame-rank">${rank+1}</span>
149
+ <div class="blame-info">
150
+ <div class="blame-step">step-${step}</div>
151
+ <div class="blame-ci">95% CI [${info.ci_lo.toFixed(2)}, ${info.ci_hi.toFixed(2)}]</div>
152
+ </div>
153
+ <div class="blame-bar-wrap"><div class="blame-bar${isDecisive ? ' decisive' : ''}" style="width:${pct}%"></div></div>
154
+ <div class="blame-rate${isDecisive ? ' decisive' : ''}">${pct}%</div>
155
+ </div>`;
156
+ }).join('');
157
+ document.getElementById('blame-content').innerHTML = headerHtml + rowsHtml;
158
+ }
159
+
160
+ let _selected = -1;
161
+ function selectExchange(i) {
162
+ if (_selected >= 0) {
163
+ document.querySelector(`[data-i="${_selected}"]`)?.classList.remove('active');
164
+ }
165
+ _selected = i;
166
+ document.querySelector(`[data-i="${i}"]`)?.classList.add('active');
167
+ const ex = DATA.exchanges[i];
168
+ const reqJson = syntaxHighlight(JSON.stringify(ex.request || {}, null, 2));
169
+ const respJson = syntaxHighlight(JSON.stringify(ex.response_preview || {}, null, 2));
170
+ document.getElementById('detail-content').innerHTML = `
171
+ <div class="detail-section">
172
+ <h3>Request (exchange ${i})</h3>
173
+ <pre>${reqJson}</pre>
174
+ </div>
175
+ <div class="detail-section">
176
+ <h3>Response</h3>
177
+ <pre>${respJson}</pre>
178
+ </div>`;
179
+ }
180
+
181
+ function syntaxHighlight(json) {
182
+ return json
183
+ .replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
184
+ .replace(/("(\\u[\da-fA-F]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g,
185
+ m => {
186
+ let cls = 'number';
187
+ if (/^"/.test(m)) cls = /:$/.test(m) ? 'key' : 'string';
188
+ return `<span class="${cls}">${m}</span>`;
189
+ });
190
+ }
191
+
192
+ function escape(s) { return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
193
+
194
+ // ── boot ───────────────────────────────────────────────────────────────────
195
+ (async () => {
196
+ try {
197
+ DATA = await loadData();
198
+ document.getElementById('run-meta').textContent =
199
+ `${DATA.agent_name || 'unknown agent'} · ${DATA.exchanges.length} exchanges · ${DATA.created_at || ''}`;
200
+ renderTimeline(DATA);
201
+ renderBlame(DATA);
202
+ if (DATA.exchanges.length > 0) selectExchange(0);
203
+ } catch (e) {
204
+ document.getElementById('timeline-content').innerHTML = `<div class="error">${e.message}</div>`;
205
+ }
206
+ })();
207
+ </script>
208
+ </body>
209
+ </html>
tracefork/wire.py ADDED
@@ -0,0 +1,76 @@
1
+ """Anthropic wire-format response builders.
2
+
3
+ Real Anthropic Messages-API JSON, used in three places:
4
+ - the offline test fakes (`tests/fakes.py` re-exports these),
5
+ - the blame engine's perturbation responses,
6
+ - the fault-injection validation suite.
7
+
8
+ Keeping them in the package (not in tests/) means production code never
9
+ imports from the test tree.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+
16
+ from .tape import sha256_hex
17
+
18
+
19
+ def make_text_response(
20
+ text: str,
21
+ *,
22
+ model: str = "claude-sonnet-4-6",
23
+ input_tokens: int = 100,
24
+ output_tokens: int = 20,
25
+ ) -> bytes:
26
+ """Return Anthropic wire-format JSON bytes for a final text response."""
27
+ rid = "msg_" + sha256_hex((text + model).encode())[:20]
28
+ return json.dumps(
29
+ {
30
+ "id": rid,
31
+ "type": "message",
32
+ "role": "assistant",
33
+ "model": model,
34
+ "content": [{"type": "text", "text": text}],
35
+ "stop_reason": "end_turn",
36
+ "stop_sequence": None,
37
+ "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens},
38
+ }
39
+ ).encode()
40
+
41
+
42
+ def make_tool_use_response(
43
+ tool_name: str,
44
+ tool_input: dict,
45
+ *,
46
+ model: str = "claude-sonnet-4-6",
47
+ preamble: str = "",
48
+ input_tokens: int = 100,
49
+ output_tokens: int = 30,
50
+ ) -> bytes:
51
+ """Return Anthropic wire-format JSON bytes for a tool_use response."""
52
+ content: list[dict] = []
53
+ if preamble:
54
+ content.append({"type": "text", "text": preamble})
55
+ toolu_id = "toolu_" + sha256_hex((tool_name + json.dumps(tool_input)).encode())[:18]
56
+ content.append(
57
+ {
58
+ "type": "tool_use",
59
+ "id": toolu_id,
60
+ "name": tool_name,
61
+ "input": tool_input,
62
+ }
63
+ )
64
+ rid = "msg_" + sha256_hex((tool_name + model).encode())[:20]
65
+ return json.dumps(
66
+ {
67
+ "id": rid,
68
+ "type": "message",
69
+ "role": "assistant",
70
+ "model": model,
71
+ "content": content,
72
+ "stop_reason": "tool_use",
73
+ "stop_sequence": None,
74
+ "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens},
75
+ }
76
+ ).encode()