tracefork 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracefork-0.1.0/.editorconfig +17 -0
- tracefork-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +33 -0
- tracefork-0.1.0/.github/ISSUE_TEMPLATE/config.yml +5 -0
- tracefork-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +26 -0
- tracefork-0.1.0/.github/dependabot.yml +10 -0
- tracefork-0.1.0/.github/pull_request_template.md +16 -0
- tracefork-0.1.0/.github/workflows/ci.yml +36 -0
- tracefork-0.1.0/.github/workflows/release.yml +31 -0
- tracefork-0.1.0/.gitignore +31 -0
- tracefork-0.1.0/.pre-commit-config.yaml +13 -0
- tracefork-0.1.0/CHANGELOG.md +45 -0
- tracefork-0.1.0/CLAUDE.md +112 -0
- tracefork-0.1.0/CODE_OF_CONDUCT.md +132 -0
- tracefork-0.1.0/CONTRIBUTING.md +79 -0
- tracefork-0.1.0/LICENSE +21 -0
- tracefork-0.1.0/PKG-INFO +235 -0
- tracefork-0.1.0/README.md +198 -0
- tracefork-0.1.0/SECURITY.md +48 -0
- tracefork-0.1.0/SPIKE0.md +103 -0
- tracefork-0.1.0/docs/demo.png +0 -0
- tracefork-0.1.0/examples/demo_report.py +81 -0
- tracefork-0.1.0/experiments/validation_report_committed.json +14 -0
- tracefork-0.1.0/pyproject.toml +94 -0
- tracefork-0.1.0/src/tracefork/__init__.py +6 -0
- tracefork-0.1.0/src/tracefork/blame.py +296 -0
- tracefork-0.1.0/src/tracefork/cli.py +367 -0
- tracefork-0.1.0/src/tracefork/constants.py +24 -0
- tracefork-0.1.0/src/tracefork/faults.py +129 -0
- tracefork-0.1.0/src/tracefork/fork.py +173 -0
- tracefork-0.1.0/src/tracefork/nondet.py +96 -0
- tracefork-0.1.0/src/tracefork/py.typed +0 -0
- tracefork-0.1.0/src/tracefork/recorder.py +140 -0
- tracefork-0.1.0/src/tracefork/replay.py +119 -0
- tracefork-0.1.0/src/tracefork/report.py +131 -0
- tracefork-0.1.0/src/tracefork/server.py +73 -0
- tracefork-0.1.0/src/tracefork/store.py +123 -0
- tracefork-0.1.0/src/tracefork/synthetic.py +104 -0
- tracefork-0.1.0/src/tracefork/tape.py +135 -0
- tracefork-0.1.0/src/tracefork/transport.py +137 -0
- tracefork-0.1.0/src/tracefork/validate.py +177 -0
- tracefork-0.1.0/src/tracefork/wire.py +76 -0
- tracefork-0.1.0/src/tracefork_spike/__init__.py +7 -0
- tracefork-0.1.0/src/tracefork_spike/__main__.py +3 -0
- tracefork-0.1.0/src/tracefork_spike/agent.py +91 -0
- tracefork-0.1.0/src/tracefork_spike/fake_llm.py +106 -0
- tracefork-0.1.0/src/tracefork_spike/nondet.py +97 -0
- tracefork-0.1.0/src/tracefork_spike/spike.py +125 -0
- tracefork-0.1.0/src/tracefork_spike/tape.py +79 -0
- tracefork-0.1.0/src/tracefork_spike/transport.py +68 -0
- tracefork-0.1.0/tests/__init__.py +0 -0
- tracefork-0.1.0/tests/fakes.py +26 -0
- tracefork-0.1.0/tests/test_blame.py +175 -0
- tracefork-0.1.0/tests/test_cli.py +81 -0
- tracefork-0.1.0/tests/test_faults.py +143 -0
- tracefork-0.1.0/tests/test_fork.py +118 -0
- tracefork-0.1.0/tests/test_recorder.py +125 -0
- tracefork-0.1.0/tests/test_replay.py +124 -0
- tracefork-0.1.0/tests/test_report.py +116 -0
- tracefork-0.1.0/tests/test_spike0.py +112 -0
- tracefork-0.1.0/tests/test_tape.py +93 -0
- tracefork-0.1.0/tests/test_transport.py +115 -0
- tracefork-0.1.0/uv.lock +874 -0
- tracefork-0.1.0/web/report.html +209 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
root = true
|
|
2
|
+
|
|
3
|
+
[*]
|
|
4
|
+
charset = utf-8
|
|
5
|
+
end_of_line = lf
|
|
6
|
+
insert_final_newline = true
|
|
7
|
+
trim_trailing_whitespace = true
|
|
8
|
+
indent_style = space
|
|
9
|
+
|
|
10
|
+
[*.py]
|
|
11
|
+
indent_size = 4
|
|
12
|
+
|
|
13
|
+
[*.{yml,yaml,toml,json,md}]
|
|
14
|
+
indent_size = 2
|
|
15
|
+
|
|
16
|
+
[*.md]
|
|
17
|
+
trim_trailing_whitespace = false
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Report a problem
|
|
4
|
+
labels: bug
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Description
|
|
8
|
+
|
|
9
|
+
A clear, concise description of the bug.
|
|
10
|
+
|
|
11
|
+
## Reproduction
|
|
12
|
+
|
|
13
|
+
- Command(s) run (e.g. `tracefork replay ...`, `tracefork fork ...`, `tracefork blame ...`):
|
|
14
|
+
- Tape involved (attach or describe how it was recorded, if applicable):
|
|
15
|
+
- Minimal steps to reproduce:
|
|
16
|
+
|
|
17
|
+
## Expected vs actual
|
|
18
|
+
|
|
19
|
+
**Expected:**
|
|
20
|
+
|
|
21
|
+
**Actual:**
|
|
22
|
+
|
|
23
|
+
## Environment
|
|
24
|
+
|
|
25
|
+
- `tracefork --version`:
|
|
26
|
+
- Python version:
|
|
27
|
+
- OS:
|
|
28
|
+
|
|
29
|
+
## Determinism-boundary note
|
|
30
|
+
|
|
31
|
+
Did the agent under trace read all time/id nondeterminism only via `NondetSource`
|
|
32
|
+
(no direct `datetime.now()`, `uuid`, or `random` calls)? If unsure, say so — this
|
|
33
|
+
affects whether replay/fork/blame can be expected to be bit-exact.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an idea
|
|
4
|
+
labels: enhancement
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Problem
|
|
8
|
+
|
|
9
|
+
What problem are you trying to solve? What's missing or painful today?
|
|
10
|
+
|
|
11
|
+
## Proposed solution
|
|
12
|
+
|
|
13
|
+
Describe the feature or change you'd like to see.
|
|
14
|
+
|
|
15
|
+
## Alternatives
|
|
16
|
+
|
|
17
|
+
What alternatives or workarounds have you considered?
|
|
18
|
+
|
|
19
|
+
## Scope/invariants impact
|
|
20
|
+
|
|
21
|
+
Does this affect any of tracefork's core invariants? Please note if it touches:
|
|
22
|
+
|
|
23
|
+
- Offline / $0 / no-key operation for tests, spike, `validate`, or the demo
|
|
24
|
+
- The determinism boundary (agent reads time/ids only via `NondetSource`)
|
|
25
|
+
- The verifier's hash-check proof (vs. assertion) or the drift negative control
|
|
26
|
+
- Packaging/PyPI metadata or the public CLI surface
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## What & why
|
|
2
|
+
|
|
3
|
+
<!-- Summarize the change and the motivation behind it. -->
|
|
4
|
+
|
|
5
|
+
## Checklist
|
|
6
|
+
|
|
7
|
+
- [ ] `uv run pytest -q` is green (65 offline tests, $0, no key)
|
|
8
|
+
- [ ] `uv run ruff check .` is green
|
|
9
|
+
- [ ] `uv run ruff format --check .` is green
|
|
10
|
+
- [ ] `uv run mypy src/tracefork` is green
|
|
11
|
+
- [ ] `uv run tracefork validate --check` is green (if this PR touches `blame.py`, `fork.py`, or `faults.py`)
|
|
12
|
+
- [ ] Determinism-boundary invariants respected: the agent reads time/ids only via
|
|
13
|
+
`NondetSource`; no new networked test; the verifier still proves via hash-check
|
|
14
|
+
(not assert); the drift negative control still fails as expected
|
|
15
|
+
- [ ] Commit messages are conventional-commit style, with no `Co-Authored-By: Claude`
|
|
16
|
+
or other AI-authorship trailer
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.12", "3.13"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v3
|
|
19
|
+
with:
|
|
20
|
+
python-version: ${{ matrix.python-version }}
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: uv sync --extra dev
|
|
23
|
+
- name: Ruff lint
|
|
24
|
+
run: uv run ruff check .
|
|
25
|
+
- name: Ruff format check
|
|
26
|
+
run: uv run ruff format --check .
|
|
27
|
+
- name: Type check
|
|
28
|
+
run: uv run mypy src/tracefork
|
|
29
|
+
- name: Tests + coverage
|
|
30
|
+
run: uv run pytest -q --cov --cov-report=term-missing
|
|
31
|
+
- name: Self-validation regression gate
|
|
32
|
+
run: uv run tracefork validate --check
|
|
33
|
+
- name: Build distribution
|
|
34
|
+
run: uv build
|
|
35
|
+
- name: Twine check
|
|
36
|
+
run: uv run --with twine twine check dist/*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes tracefork to PyPI via Trusted Publishing (OIDC) — no API token stored.
|
|
4
|
+
# Fires when a GitHub Release is published, or on manual dispatch (e.g. to publish an
|
|
5
|
+
# already-tagged version). Requires a matching trusted publisher configured on PyPI:
|
|
6
|
+
# PyPI project: tracefork · owner: pratik916 · repo: tracefork
|
|
7
|
+
# workflow: release.yml · environment: pypi
|
|
8
|
+
|
|
9
|
+
on:
|
|
10
|
+
release:
|
|
11
|
+
types: [published]
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
publish:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
environment:
|
|
18
|
+
name: pypi
|
|
19
|
+
url: https://pypi.org/p/tracefork
|
|
20
|
+
permissions:
|
|
21
|
+
id-token: write # OIDC token for PyPI Trusted Publishing
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
- name: Install uv
|
|
25
|
+
uses: astral-sh/setup-uv@v3
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
- name: Build sdist and wheel
|
|
29
|
+
run: uv build
|
|
30
|
+
- name: Publish to PyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
.venv/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.pytest_cache/
|
|
7
|
+
|
|
8
|
+
# Spike tape artifacts (regenerated by running the spike)
|
|
9
|
+
*.tape.sqlite
|
|
10
|
+
/tmp_tapes/
|
|
11
|
+
|
|
12
|
+
# Runtime artifacts (CLI output, regenerated on demand)
|
|
13
|
+
/store.db
|
|
14
|
+
/report.html
|
|
15
|
+
/blame_*.json
|
|
16
|
+
/validation_report.json
|
|
17
|
+
/examples/demo_report.html
|
|
18
|
+
|
|
19
|
+
# Build, coverage, and tool caches (regenerated on demand)
|
|
20
|
+
/dist/
|
|
21
|
+
.coverage
|
|
22
|
+
.coverage.*
|
|
23
|
+
htmlcov/
|
|
24
|
+
.mypy_cache/
|
|
25
|
+
.ruff_cache/
|
|
26
|
+
|
|
27
|
+
# Local-only planning scaffolding (workspace convention)
|
|
28
|
+
docs/superpowers/
|
|
29
|
+
.beads/
|
|
30
|
+
planning/
|
|
31
|
+
.superpowers/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.6.9
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
9
|
+
rev: v1.11.2
|
|
10
|
+
hooks:
|
|
11
|
+
- id: mypy
|
|
12
|
+
files: ^src/tracefork/
|
|
13
|
+
additional_dependencies: [anthropic, types-requests]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-07-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Record/replay** at the Anthropic SDK's httpx transport boundary
|
|
15
|
+
(`TraceforkTransport` / `AsyncTraceforkTransport`), streaming-SSE capable, with
|
|
16
|
+
bit-exact replay proven by sha256-checking every replayed request body against the
|
|
17
|
+
recorded tape — and drift detection that fails loudly on divergence rather than
|
|
18
|
+
silently falling back to the network.
|
|
19
|
+
- **Content-addressed tape format** (`Tape`) — sha256 blobs plus an ordered event log,
|
|
20
|
+
JSON + base64 (never pickle), persistable to SQLite, with a hash-chain `digest()`
|
|
21
|
+
fingerprint.
|
|
22
|
+
- **Nondeterminism virtualization** (`NondetSource`) — the only path through which an
|
|
23
|
+
agent reads time and ids, with `RecordingNondet`, `ReplayNondet`, and a `DriftingNondet`
|
|
24
|
+
negative control that proves the divergence detector actually detects divergence.
|
|
25
|
+
- **Three-phase fork engine** (`ForkEngine`, `ForkTransport`) — prefix-replay ($0),
|
|
26
|
+
mutation-injection (swap a response), and tail-record (the recorded counterfactual
|
|
27
|
+
continuation), re-running the same agent that produced the original tape.
|
|
28
|
+
- **Causal blame engine** (`BlameEngine`) — forks each step `k` times, re-runs the agent,
|
|
29
|
+
grades outcomes via an `Oracle`, and ranks steps by flip-rate with Wilson score
|
|
30
|
+
confidence intervals; a `BudgetGovernor` estimates dollar cost from the pricing table
|
|
31
|
+
and refuses to exceed a caller-supplied budget before making any real API calls.
|
|
32
|
+
- **Fault-injection self-validation suite** (`faults.py`, `validate.py`) — five fault
|
|
33
|
+
classes with markers embedded in valid Anthropic JSON, scored end-to-end offline
|
|
34
|
+
against a synthetic fault-aware agent: **1.00 top-1 precision** across all five classes,
|
|
35
|
+
with an enforced negative-control threshold so the proof isn't vacuous.
|
|
36
|
+
- **Single-file web report/UI** (`report.py`, `server.py`, `web/report.html`) — a
|
|
37
|
+
dependency-free, three-panel HTML report (timeline, exchange detail, blame ranking)
|
|
38
|
+
either rendered statically or served live via FastAPI (`serve`, 127.0.0.1, no CORS).
|
|
39
|
+
- **CLI** (`cli.py`, Typer) — `replay`, `verify`, `fork`, `blame`, `report`, `serve`,
|
|
40
|
+
`validate`.
|
|
41
|
+
- `src/tracefork_spike/` — the original Spike 0 that de-risked bit-exact, no-key replay
|
|
42
|
+
within the declared determinism boundary.
|
|
43
|
+
|
|
44
|
+
[Unreleased]: https://github.com/pratik916/tracefork/compare/v0.1.0...HEAD
|
|
45
|
+
[0.1.0]: https://github.com/pratik916/tracefork/releases/tag/v0.1.0
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file guides Claude Code when working in the `tracefork` repository.
|
|
4
|
+
|
|
5
|
+
## What this is
|
|
6
|
+
|
|
7
|
+
`tracefork` is a time-travel debugger for AI agents: record an agent run to a
|
|
8
|
+
content-addressed **tape**, replay it **bit-exact for $0** (hash-verified), fork any
|
|
9
|
+
step, and measure causal blame with confidence intervals — the instrument itself
|
|
10
|
+
validated against runs with injected, known root-cause faults.
|
|
11
|
+
|
|
12
|
+
**Current state: v1 built.** All five product pillars work offline and are tested
|
|
13
|
+
(65 tests, $0): streaming-capable record/replay with drift detection, the three-phase
|
|
14
|
+
fork engine, the causal blame engine with Wilson CIs and a budget governor, the
|
|
15
|
+
single-file web report/UI, and the fault-injection self-validation suite (5 fault
|
|
16
|
+
classes at 1.00 top-1 precision). `src/tracefork_spike/` keeps the original Spike 0 that
|
|
17
|
+
de-risked the load-bearing assumption (bit-exact, no-key replay within a declared
|
|
18
|
+
determinism boundary). Design/feature list: `../ideas/2026-06-11-tracefork-features.md`;
|
|
19
|
+
spike finding: `SPIKE0.md`.
|
|
20
|
+
|
|
21
|
+
## Commands
|
|
22
|
+
|
|
23
|
+
Python is **3.12 via uv**. The tests, the spike, `validate`, the demo, and
|
|
24
|
+
record/replay/fork are offline and $0 — **no `ANTHROPIC_API_KEY`, no network**. Only
|
|
25
|
+
`blame` against a *real* run hits the live API (budget-capped). Always prefix `uv run`.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv sync --extra dev # install (anthropic, zstandard, typer, fastapi, uvicorn + pytest)
|
|
29
|
+
uv run pytest -q # full offline suite (65 tests)
|
|
30
|
+
uv run pytest tests/test_faults.py::test_validation_runner_fingers_fault_step -q # one test
|
|
31
|
+
uv run tracefork validate # self-validation: blame vs injected, known faults
|
|
32
|
+
uv run tracefork validate --check # regression-gate vs experiments/validation_report_committed.json
|
|
33
|
+
uv run python examples/demo_report.py # write examples/demo_report.html (the README screenshot)
|
|
34
|
+
uv run python -m tracefork_spike # the original Spike 0 bit-exact replay receipt
|
|
35
|
+
uv run tracefork --help # replay, verify, fork, report, serve, blame, validate
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Architecture (the parts that span files)
|
|
39
|
+
|
|
40
|
+
The spine is a **record/replay seam at the Anthropic SDK's httpx boundary**, plus a
|
|
41
|
+
**nondeterminism-virtualization seam** the agent reads time/ids through. Bit-exactness
|
|
42
|
+
is the contract between them.
|
|
43
|
+
|
|
44
|
+
The product lives in `src/tracefork/`:
|
|
45
|
+
|
|
46
|
+
- `nondet.py` — `NondetSource` is the *only* way the agent gets time/ids.
|
|
47
|
+
`RecordingNondet` draws real values and logs them; `ReplayNondet` serves them back in
|
|
48
|
+
order; `DriftingNondet` is the negative control (fresh values → forced divergence).
|
|
49
|
+
`find_divergence()` unwraps a `DivergenceError` from the `APIConnectionError` the SDK
|
|
50
|
+
wraps transport exceptions in — **keep this; without it a real divergence looks like a
|
|
51
|
+
network blip.**
|
|
52
|
+
- `transport.py` — `TraceforkTransport` (sync) + `AsyncTraceforkTransport` (async) are the
|
|
53
|
+
capture seam, streaming-SSE capable (buffer via `.read()`/`.aread()`). Record mode tees
|
|
54
|
+
request+response bytes into the tape; replay mode serves recorded bytes and
|
|
55
|
+
sha256-asserts each request body matches (the divergence detector). A replay transport
|
|
56
|
+
has **no inner transport**, so any unrecorded request is a hard error.
|
|
57
|
+
- `tape.py` — `Tape` is content-addressed (sha256 blobs) + an ordered event log,
|
|
58
|
+
JSON+base64 in memory, persistable to SQLite, with a hash-chain `digest()` fingerprint.
|
|
59
|
+
(`to_bytes`/`from_bytes` are JSON, **not pickle** — no arbitrary-code-execution risk.)
|
|
60
|
+
- `recorder.py` — `Recorder` context manager wraps a real `anthropic.Anthropic` at its
|
|
61
|
+
`_client._transport` seam (via `client.copy(http_client=...)`, so base_url / auth_token /
|
|
62
|
+
default headers are preserved). Patches `uuid.uuid4` globally; **does not** patch
|
|
63
|
+
`datetime.datetime` (immutable C type in 3.12+, and a subclass breaks the SDK's pydantic
|
|
64
|
+
schema builder) — agents needing deterministic clocks read `NondetSource` directly.
|
|
65
|
+
- `fork.py` — `ForkTransport` runs three phases: prefix-replay ($0, request asserted to
|
|
66
|
+
match the parent), mutation-injection (same request, swapped response), tail-record (the
|
|
67
|
+
counterfactual continuation). `Branch` carries `prefix_replayed`/`tail_recorded` counts.
|
|
68
|
+
`ForkEngine.fork()` re-runs the **same** agent that produced the tape.
|
|
69
|
+
- `store.py` — `TapeStore`, SQLite persistence for tapes + the branch DAG.
|
|
70
|
+
- `blame.py` — `BlameEngine.rank()` forks each step `k` times, re-runs the agent, grades
|
|
71
|
+
via an `Oracle`, counts flips vs. the parent outcome; `wilson_ci()` for intervals;
|
|
72
|
+
`BudgetGovernor` estimates tail-call cost from `constants.PRICING_TABLE` before spend and
|
|
73
|
+
`rank()` raises `BudgetExceededError` if the estimate exceeds `budget_usd`.
|
|
74
|
+
- `faults.py` / `validate.py` — 5 fault classes (valid JSON, marker **inside** a content
|
|
75
|
+
field) + the self-validation runner; a synthetic agent echoes each response forward so an
|
|
76
|
+
injected fault propagates to a fault-aware tail. `run_all_fault_classes()` scores top-1.
|
|
77
|
+
**Scope (don't overstate):** the fixture is a positive-vs-inert control on a short tape —
|
|
78
|
+
it proves the engine is genuinely causal (not a fixed-slot artifact), not that it
|
|
79
|
+
discriminates among competing causes on long tapes. See README → Validation scope.
|
|
80
|
+
- `report.py` / `server.py` / `web/report.html` — the single-file, dependency-free
|
|
81
|
+
three-panel UI; `report.py` injects tape JSON (HTML-escaped against `</script>`
|
|
82
|
+
breakout), `server.py` is FastAPI same-origin (no CORS, binds 127.0.0.1).
|
|
83
|
+
- `wire.py` / `synthetic.py` — Anthropic wire-format builders and the offline
|
|
84
|
+
Scripted/FaultAware fake transports, in the **package** so production never imports from
|
|
85
|
+
`tests/`; `tests/fakes.py` re-exports them.
|
|
86
|
+
- `cli.py` — Typer entry point for all seven commands.
|
|
87
|
+
|
|
88
|
+
`src/tracefork_spike/` holds the original Spike 0 (`fake_llm.py`, `agent.py`, `spike.py`):
|
|
89
|
+
record → save → load → replay → verify + negative control, with its own tests.
|
|
90
|
+
|
|
91
|
+
## Invariants / conventions
|
|
92
|
+
|
|
93
|
+
- **Offline and $0 is non-negotiable** for the whole test suite, the spike, `validate`,
|
|
94
|
+
and the demo — no key, no network. The synthetic transports (`synthetic.py`) are the
|
|
95
|
+
seam; add to them rather than reaching for the real API. (`blame` on a real run is the
|
|
96
|
+
one budget-capped exception.)
|
|
97
|
+
- **The agent must read time/ids only through `NondetSource`** — any direct
|
|
98
|
+
`datetime.now()` / `uuid` / `random` breaks the determinism boundary and the
|
|
99
|
+
bit-exactness claim.
|
|
100
|
+
- **The verifier proves, not asserts** — every request body is hash-checked against the
|
|
101
|
+
tape; the negative control must keep failing (drift detected) or the proof is vacuous.
|
|
102
|
+
- **Declared determinism boundary (v1):** single-process (sync **or** asyncio), clock +
|
|
103
|
+
id nondeterminism captured through `NondetSource`. Threads/subprocess are out of scope;
|
|
104
|
+
fork and blame additionally assume the agent rebuilds its prefix deterministically (the
|
|
105
|
+
property replay proves) — see `SPIKE0.md`.
|
|
106
|
+
- **No `Co-Authored-By: Claude` trailer** on commits in this repo (public portfolio repo,
|
|
107
|
+
sole-author attribution).
|
|
108
|
+
- **Model IDs / pricing / SDK usage:** consult the `claude-api` skill before writing or
|
|
109
|
+
editing any Anthropic integration code rather than relying on memory.
|
|
110
|
+
- `docs/superpowers/`, `.beads/`, `planning/` are gitignored local scaffolding (but
|
|
111
|
+
`docs/demo.png` is committed). Runtime artifacts (`store.db`, `report.html`,
|
|
112
|
+
`blame_*.json`, `validation_report.json`, `examples/demo_report.html`) are gitignored.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity
|
|
10
|
+
and orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment for our
|
|
18
|
+
community include:
|
|
19
|
+
|
|
20
|
+
* Demonstrating empathy and kindness toward other people
|
|
21
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
|
22
|
+
* Giving and gracefully accepting constructive feedback
|
|
23
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
|
24
|
+
and learning from the experience
|
|
25
|
+
* Focusing on what is best not just for us as individuals, but for the
|
|
26
|
+
overall community
|
|
27
|
+
|
|
28
|
+
Examples of unacceptable behavior include:
|
|
29
|
+
|
|
30
|
+
* The use of sexualized language or imagery, and sexual attention or advances
|
|
31
|
+
of any kind
|
|
32
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
33
|
+
* Public or private harassment
|
|
34
|
+
* Publishing others' private information, such as a physical or email address,
|
|
35
|
+
without their explicit permission
|
|
36
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
|
37
|
+
professional setting
|
|
38
|
+
|
|
39
|
+
## Enforcement Responsibilities
|
|
40
|
+
|
|
41
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
42
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
43
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
44
|
+
or harmful.
|
|
45
|
+
|
|
46
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
|
47
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
|
48
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
|
49
|
+
decisions when appropriate.
|
|
50
|
+
|
|
51
|
+
## Scope
|
|
52
|
+
|
|
53
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
54
|
+
an individual is officially representing the community in public spaces.
|
|
55
|
+
Examples of representing our community include using an official e-mail
|
|
56
|
+
address, posting via an official social media account, or acting as an
|
|
57
|
+
appointed representative at an online or offline event.
|
|
58
|
+
|
|
59
|
+
## Enforcement
|
|
60
|
+
|
|
61
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
62
|
+
reported to the community leaders responsible for enforcement at
|
|
63
|
+
**godofcode.pratik@gmail.com**. All complaints will be reviewed and
|
|
64
|
+
investigated promptly and fairly.
|
|
65
|
+
|
|
66
|
+
All community leaders are obligated to respect the privacy and security of the
|
|
67
|
+
reporter of any incident.
|
|
68
|
+
|
|
69
|
+
## Enforcement Guidelines
|
|
70
|
+
|
|
71
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
|
72
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
|
73
|
+
|
|
74
|
+
### 1. Correction
|
|
75
|
+
|
|
76
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
|
77
|
+
unprofessional or unwelcome in the community.
|
|
78
|
+
|
|
79
|
+
**Consequence**: A private, written warning from community leaders, providing
|
|
80
|
+
clarity around the nature of the violation and an explanation of why the
|
|
81
|
+
behavior was inappropriate. A public apology may be requested.
|
|
82
|
+
|
|
83
|
+
### 2. Warning
|
|
84
|
+
|
|
85
|
+
**Community Impact**: A violation through a single incident or series
|
|
86
|
+
of actions.
|
|
87
|
+
|
|
88
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
|
89
|
+
interaction with the people involved, including unsolicited interaction with
|
|
90
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
|
91
|
+
includes avoiding interactions in community spaces as well as external channels
|
|
92
|
+
like social media. Violating these terms may lead to a temporary or
|
|
93
|
+
permanent ban.
|
|
94
|
+
|
|
95
|
+
### 3. Temporary Ban
|
|
96
|
+
|
|
97
|
+
**Community Impact**: A serious violation of community standards, including
|
|
98
|
+
sustained inappropriate behavior.
|
|
99
|
+
|
|
100
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
|
101
|
+
communication with the community for a specified period of time. No public or
|
|
102
|
+
private interaction with the people involved, including unsolicited
|
|
103
|
+
interaction with those enforcing the Code of Conduct, is allowed during this
|
|
104
|
+
period. Violating these terms may lead to a permanent ban.
|
|
105
|
+
|
|
106
|
+
### 4. Permanent Ban
|
|
107
|
+
|
|
108
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
|
109
|
+
standards, including sustained inappropriate behavior, harassment of an
|
|
110
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
|
111
|
+
|
|
112
|
+
**Consequence**: A permanent ban from any sort of public interaction within
|
|
113
|
+
the community.
|
|
114
|
+
|
|
115
|
+
## Attribution
|
|
116
|
+
|
|
117
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|
118
|
+
version 2.1, available at
|
|
119
|
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
|
120
|
+
|
|
121
|
+
Community Impact Guidelines were inspired by
|
|
122
|
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
|
123
|
+
|
|
124
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
|
125
|
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
|
126
|
+
[https://www.contributor-covenant.org/translations][translations].
|
|
127
|
+
|
|
128
|
+
[homepage]: https://www.contributor-covenant.org
|
|
129
|
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
|
130
|
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
|
131
|
+
[FAQ]: https://www.contributor-covenant.org/faq
|
|
132
|
+
[translations]: https://www.contributor-covenant.org/translations
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Contributing to tracefork
|
|
2
|
+
|
|
3
|
+
Thanks for considering a contribution. tracefork is a small, offline-first project —
|
|
4
|
+
please keep changes in that spirit.
|
|
5
|
+
|
|
6
|
+
## Dev setup
|
|
7
|
+
|
|
8
|
+
Python **3.12** via [uv](https://docs.astral.sh/uv/).
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
uv sync --extra dev
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
This installs the runtime deps (`anthropic`, `zstandard`, `typer`, `fastapi`, `uvicorn`)
|
|
15
|
+
plus the dev toolchain (`pytest`, `pytest-asyncio`, `pytest-cov`, `ruff`, `mypy`).
|
|
16
|
+
|
|
17
|
+
## Running everything locally
|
|
18
|
+
|
|
19
|
+
All of the following are offline and **$0** — no `ANTHROPIC_API_KEY`, no network:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv run pytest -q # full offline suite (65 tests, $0, no key)
|
|
23
|
+
uv run tracefork validate # self-validation: blame vs injected, known faults
|
|
24
|
+
uv run tracefork validate --check # regression-gate vs experiments/validation_report_committed.json
|
|
25
|
+
uv run ruff check . # lint
|
|
26
|
+
uv run ruff format --check . # format check
|
|
27
|
+
uv run mypy src/tracefork # type check
|
|
28
|
+
uv run python examples/demo_report.py # generate the demo report (examples/demo_report.html)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Run the full local gate before opening a PR:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv run ruff check . && uv run ruff format --check . && uv run mypy src/tracefork && uv run pytest -q
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
CI runs this same gate (plus `tracefork validate --check` and a package build) on every
|
|
38
|
+
pull request.
|
|
39
|
+
|
|
40
|
+
## Invariants a PR must respect
|
|
41
|
+
|
|
42
|
+
These are load-bearing for the project's claims and are enforced by review, not just CI:
|
|
43
|
+
|
|
44
|
+
- **Offline and $0 stays non-negotiable** for the whole test suite, the spike, `validate`,
|
|
45
|
+
and the demo. If your change needs a new kind of model response or behavior, add it to
|
|
46
|
+
the offline fakes in `src/tracefork/synthetic.py` rather than hitting the real API. The
|
|
47
|
+
only intentionally networked path is `blame` against a real run, and that is
|
|
48
|
+
budget-capped by `BudgetGovernor`.
|
|
49
|
+
- **The agent reads time/ids only through `NondetSource`.** Any direct
|
|
50
|
+
`datetime.now()` / `uuid` / `random` call in agent code breaks the determinism boundary
|
|
51
|
+
and invalidates the bit-exactness claim that replay, fork, and blame all depend on.
|
|
52
|
+
- **The verifier proves, it does not assert.** Every replayed request body is
|
|
53
|
+
sha256-checked against the tape (`transport.py`); don't weaken this to a soft
|
|
54
|
+
comparison. The drift negative control (`DriftingNondet`) must keep failing — if a
|
|
55
|
+
change makes it pass, that's a regression in the divergence detector, not a fix.
|
|
56
|
+
|
|
57
|
+
If you're touching `src/tracefork/recorder.py`, `transport.py`, `fork.py`, or `blame.py`,
|
|
58
|
+
re-read the relevant section of `CLAUDE.md` first — it documents the seams these files
|
|
59
|
+
depend on.
|
|
60
|
+
|
|
61
|
+
## Commit style
|
|
62
|
+
|
|
63
|
+
- [Conventional commits](https://www.conventionalcommits.org/) (`feat:`, `fix:`, `chore:`,
|
|
64
|
+
`docs:`, `ci:`, `test:`, `refactor:`).
|
|
65
|
+
- No `Co-Authored-By: Claude` trailer or any other AI-authorship marker on commits or PR
|
|
66
|
+
descriptions — attribute commits to yourself only.
|
|
67
|
+
|
|
68
|
+
## PR flow
|
|
69
|
+
|
|
70
|
+
1. Branch off `main`.
|
|
71
|
+
2. Make your change; keep it additive where possible (see `CLAUDE.md` for the project's
|
|
72
|
+
architecture and invariants).
|
|
73
|
+
3. Make sure the full local gate above is green.
|
|
74
|
+
4. Open a PR against `main`. CI must pass before merge.
|
|
75
|
+
|
|
76
|
+
## Questions
|
|
77
|
+
|
|
78
|
+
Open an issue using the templates in `.github/ISSUE_TEMPLATE/`, or see `SECURITY.md` if
|
|
79
|
+
you're reporting a vulnerability rather than a bug.
|
tracefork-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Pratik Soni
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|