warrantos 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. warrantos-0.9.2/LICENSE +21 -0
  2. warrantos-0.9.2/PKG-INFO +307 -0
  3. warrantos-0.9.2/README.md +249 -0
  4. warrantos-0.9.2/pyproject.toml +90 -0
  5. warrantos-0.9.2/setup.cfg +4 -0
  6. warrantos-0.9.2/tests/test_append_only.py +418 -0
  7. warrantos-0.9.2/tests/test_attest_cli.py +120 -0
  8. warrantos-0.9.2/tests/test_attestation.py +107 -0
  9. warrantos-0.9.2/tests/test_boundary.py +79 -0
  10. warrantos-0.9.2/tests/test_cbom.py +255 -0
  11. warrantos-0.9.2/tests/test_classification.py +173 -0
  12. warrantos-0.9.2/tests/test_classifier_corpus.py +95 -0
  13. warrantos-0.9.2/tests/test_claude_code_verify_hook.py +168 -0
  14. warrantos-0.9.2/tests/test_clean_room.py +226 -0
  15. warrantos-0.9.2/tests/test_cli.py +485 -0
  16. warrantos-0.9.2/tests/test_context_admissibility.py +171 -0
  17. warrantos-0.9.2/tests/test_context_cli.py +94 -0
  18. warrantos-0.9.2/tests/test_eval.py +1142 -0
  19. warrantos-0.9.2/tests/test_footer.py +263 -0
  20. warrantos-0.9.2/tests/test_gates.py +424 -0
  21. warrantos-0.9.2/tests/test_grade.py +482 -0
  22. warrantos-0.9.2/tests/test_ledger.py +747 -0
  23. warrantos-0.9.2/tests/test_ledger_write.py +177 -0
  24. warrantos-0.9.2/tests/test_local_llm_grader.py +265 -0
  25. warrantos-0.9.2/tests/test_mcp_server.py +330 -0
  26. warrantos-0.9.2/tests/test_merkle.py +150 -0
  27. warrantos-0.9.2/tests/test_metrics.py +322 -0
  28. warrantos-0.9.2/tests/test_overrides.py +281 -0
  29. warrantos-0.9.2/tests/test_pathguard.py +414 -0
  30. warrantos-0.9.2/tests/test_provenance.py +261 -0
  31. warrantos-0.9.2/tests/test_retention.py +191 -0
  32. warrantos-0.9.2/tests/test_review.py +113 -0
  33. warrantos-0.9.2/tests/test_review_roles.py +257 -0
  34. warrantos-0.9.2/tests/test_roles.py +107 -0
  35. warrantos-0.9.2/tests/test_salience.py +211 -0
  36. warrantos-0.9.2/tests/test_shadow_observe.py +146 -0
  37. warrantos-0.9.2/tests/test_status.py +127 -0
  38. warrantos-0.9.2/tests/test_verify.py +267 -0
  39. warrantos-0.9.2/tests/test_verify_ssrf.py +238 -0
  40. warrantos-0.9.2/tests/test_warrant_bundle.py +126 -0
  41. warrantos-0.9.2/tests/test_warrantos_cli.py +787 -0
  42. warrantos-0.9.2/tests/test_web_verify.py +170 -0
  43. warrantos-0.9.2/tests/test_web_verify_behaviour.py +194 -0
  44. warrantos-0.9.2/tests/test_writer_pack.py +168 -0
  45. warrantos-0.9.2/warrantos/__init__.py +13 -0
  46. warrantos-0.9.2/warrantos/cli/__init__.py +1 -0
  47. warrantos-0.9.2/warrantos/cli/provenance_cli.py +628 -0
  48. warrantos-0.9.2/warrantos/cli/warrantos_cli.py +1719 -0
  49. warrantos-0.9.2/warrantos/hooks/__init__.py +1 -0
  50. warrantos-0.9.2/warrantos/hooks/claude_code_verify_hook.py +188 -0
  51. warrantos-0.9.2/warrantos/hooks/provenance_check.py +317 -0
  52. warrantos-0.9.2/warrantos/provenance/__init__.py +59 -0
  53. warrantos-0.9.2/warrantos/provenance/attestation.py +143 -0
  54. warrantos-0.9.2/warrantos/provenance/boundary.py +22 -0
  55. warrantos-0.9.2/warrantos/provenance/cbom.py +270 -0
  56. warrantos-0.9.2/warrantos/provenance/classification.py +353 -0
  57. warrantos-0.9.2/warrantos/provenance/clean_room.py +297 -0
  58. warrantos-0.9.2/warrantos/provenance/context.py +30 -0
  59. warrantos-0.9.2/warrantos/provenance/context_admissibility.py +646 -0
  60. warrantos-0.9.2/warrantos/provenance/extract.py +75 -0
  61. warrantos-0.9.2/warrantos/provenance/footer.py +93 -0
  62. warrantos-0.9.2/warrantos/provenance/gates.py +537 -0
  63. warrantos-0.9.2/warrantos/provenance/grade.py +890 -0
  64. warrantos-0.9.2/warrantos/provenance/ledger.py +425 -0
  65. warrantos-0.9.2/warrantos/provenance/ledger_write.py +453 -0
  66. warrantos-0.9.2/warrantos/provenance/mcp_server.py +534 -0
  67. warrantos-0.9.2/warrantos/provenance/merkle.py +183 -0
  68. warrantos-0.9.2/warrantos/provenance/metrics.py +366 -0
  69. warrantos-0.9.2/warrantos/provenance/overrides.py +352 -0
  70. warrantos-0.9.2/warrantos/provenance/pathguard.py +94 -0
  71. warrantos-0.9.2/warrantos/provenance/retention.py +385 -0
  72. warrantos-0.9.2/warrantos/provenance/review.py +147 -0
  73. warrantos-0.9.2/warrantos/provenance/review_roles.py +95 -0
  74. warrantos-0.9.2/warrantos/provenance/roles.py +244 -0
  75. warrantos-0.9.2/warrantos/provenance/salience.py +223 -0
  76. warrantos-0.9.2/warrantos/provenance/status.py +618 -0
  77. warrantos-0.9.2/warrantos/provenance/verify.py +345 -0
  78. warrantos-0.9.2/warrantos/provenance/warrant_bundle.py +138 -0
  79. warrantos-0.9.2/warrantos/provenance/writer_pack.py +220 -0
  80. warrantos-0.9.2/warrantos.egg-info/PKG-INFO +307 -0
  81. warrantos-0.9.2/warrantos.egg-info/SOURCES.txt +83 -0
  82. warrantos-0.9.2/warrantos.egg-info/dependency_links.txt +1 -0
  83. warrantos-0.9.2/warrantos.egg-info/entry_points.txt +5 -0
  84. warrantos-0.9.2/warrantos.egg-info/requires.txt +8 -0
  85. warrantos-0.9.2/warrantos.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Juan Vega / Prometheus Policy Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,307 @@
1
+ Metadata-Version: 2.4
2
+ Name: warrantos
3
+ Version: 0.9.2
4
+ Summary: WarrantOS: a governance harness for AI-assisted writing that ships clean prose and a separate auditable provenance ledger
5
+ Author: Juan Vega
6
+ Maintainer: Juan Vega
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Juan Vega / Prometheus Policy Lab
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+
29
+ Project-URL: Homepage, https://github.com/jvega017/claude-provenance
30
+ Project-URL: Repository, https://github.com/jvega017/claude-provenance
31
+ Project-URL: Issues, https://github.com/jvega017/claude-provenance/issues
32
+ Project-URL: Changelog, https://github.com/jvega017/claude-provenance/blob/main/CHANGELOG.md
33
+ Project-URL: Documentation, https://github.com/jvega017/claude-provenance/blob/main/docs/OVERVIEW.md
34
+ Keywords: claude,provenance,governance,ai-safety,cbom,warrantos,mcp
35
+ Classifier: Development Status :: 4 - Beta
36
+ Classifier: Intended Audience :: Developers
37
+ Classifier: Intended Audience :: Information Technology
38
+ Classifier: License :: OSI Approved :: MIT License
39
+ Classifier: Operating System :: OS Independent
40
+ Classifier: Programming Language :: Python :: 3
41
+ Classifier: Programming Language :: Python :: 3.8
42
+ Classifier: Programming Language :: Python :: 3.9
43
+ Classifier: Programming Language :: Python :: 3.10
44
+ Classifier: Programming Language :: Python :: 3.11
45
+ Classifier: Programming Language :: Python :: 3.12
46
+ Classifier: Programming Language :: Python :: 3.13
47
+ Classifier: Topic :: Software Development :: Quality Assurance
48
+ Classifier: Topic :: Text Processing :: Markup
49
+ Requires-Python: >=3.11
50
+ Description-Content-Type: text/markdown
51
+ License-File: LICENSE
52
+ Provides-Extra: mcp
53
+ Requires-Dist: mcp>=1.0.0; extra == "mcp"
54
+ Provides-Extra: attestation
55
+ Requires-Dist: cryptography>=41.0; extra == "attestation"
56
+ Provides-Extra: dev
57
+ Dynamic: license-file
58
+
59
+ # claude-provenance
60
+
61
+ [![ci](https://github.com/jvega017/claude-provenance/actions/workflows/ci.yml/badge.svg)](https://github.com/jvega017/claude-provenance/actions/workflows/ci.yml)
62
+ [![layers: 20B / 0P](https://img.shields.io/badge/layers-20B%20%2F%200P-brightgreen)](docs/STATUS.md)
63
+ ![version: 0.9.2](https://img.shields.io/badge/version-0.9.2-orange)
64
+ ![python: 3.11--3.13](https://img.shields.io/badge/python-3.11--3.13-blue)
65
+ ![deps: stdlib only](https://img.shields.io/badge/deps-stdlib%20only-green)
66
+
67
+ ## No claim ships without a warrant.
68
+
69
+ WarrantOS does not detect truth, and it does not try to. It enforces that every claim in an AI-assisted document carries a warrant: a source, an explicit `[CITE NEEDED]`, or a `BLOCK` on the record. A four-state verdict (`PASS` / `HOLD` / `BLOCK` / `NOT_ASSESSABLE`) gates the output before it ships in `enforce` mode (the default `report` mode logs every miss without blocking), and every miss is written to an append-only ledger, tamper-evident against a previously distributed checkpoint, that you can hand an auditor.
70
+
71
+ It also catches the other way an AI document betrays itself: **internal scaffold and conversational residue that bleeds from the chat into the final artefact**. "Certainly! Here's the revised version", "As an AI language model, I cannot verify", "based on the information provided", "I hope this helps, let me know if you would like me to expand", a stray `[TODO: ...]` placeholder. A clean artefact carries its evidence and none of the machinery that produced it. WarrantOS blocks the machinery from shipping.
72
+
73
+ It governs the artefact, not the model. It runs at the writer's desk, on one document, before it ships, with zero infrastructure: stdlib-only, MIT, no API, no account. Governance platforms watch the system after the fact; WarrantOS gates the output before the fact.
74
+
75
+ Built in a personal capacity by an independent policy researcher for the people who publish AI-assisted writing under their own name and carry the reputational liability for a fabricated citation: research-integrity, policy, and academic-governance practitioners. It is a personal open-source project, not associated with, funded by, or endorsed by any employer or government. It is informed by the working paper *From Citation to Epistemic Governance* (Prometheus Policy Lab, in preparation): it operationalises that paper's problem framing, the gap between citation as attribution and citation as evidence, rather than its formal model.
76
+
77
+ **The honest demo.** I ran WarrantOS over the first draft of my own daily policy brief, before remediation. It returned `BLOCK`: 14 claims, 0 supported, 7 boundary violations (illustrative figures from one unremediated draft, not a fixed benchmark). That is the gate working as designed on an unremediated draft: it names the epistemic debt so it can be paid down before the artefact ships, instead of going out silently. A governance tool worth trusting is one that holds its own author to that standard.
78
+
79
+ Under the hood, `claude-provenance` wraps AI-assisted writing in an eight-layer pipeline so the final artefact ships clean prose, while a separate audit ledger carries the sources, the feedback, the review history, the transformations, and the structured overrides that produced it. The per-layer status dashboard tells you exactly what is built and what is not.
80
+
81
+ > **v0.9.2.** Build state: **20 BUILT / 0 PARTIAL** (was 13 BUILT / 3 PARTIAL / 2 STARTER / 2 NOT_BUILT at v0.9.1). All five output-integrity gates (G1-G5) and all eight foundation rows are BUILT. The final three rows closed in v0.9.2: **F-policy** (the normative spec `docs/SPEC.md` and a machine-readable six-role registry are now committed), **F-compliance** (a self-assessment control mapping to ISO/IEC 42001 and the NIST AI RMF in `docs/COMPLIANCE.md` — a documented mapping, explicitly **not** certified conformance), and **F-metrics** (shadow-log aggregation via the `warrantos metrics` command). Adopter-specific configuration (sensitivity tiers, retention windows) and an automated SPEC-ID conformance check remain future work, stated plainly in those docs. See [`docs/STATUS.md`](docs/STATUS.md) before evaluating scope.
82
+
83
+ ## Quickstart
84
+
85
+ Install from PyPI:
86
+
87
+ ```bash
88
+ pip install warrantos # MCP server extra: pip install "warrantos[mcp]"
89
+ ```
90
+
91
+ To run the bundled demo below, use a source checkout (it ships the examples):
92
+
93
+ ```bash
94
+ git clone https://github.com/jvega017/claude-provenance.git
95
+ cd claude-provenance
96
+ python -m pip install -e ".[mcp]"
97
+
98
+ # Run the bundled demo: writes per-run artefacts under .warrant/runs/
99
+ warrantos check examples/quickstart-demo/draft.md \
100
+ --context examples/quickstart-demo/context.json \
101
+ --actor-identity examples/quickstart-demo/actor.json \
102
+ --profile final-prose
103
+ ```
104
+
105
+ Expected verdict: `HOLD` with one unsupported load-bearing claim. The bundled command exercises Layer 1 classification, Layer 4 admissibility, Layer 7 G1 (boundary), Layer 7 G2 detection, CBOM assembly, and the four-state verdict consolidator; add `--verify` to run the G2 verifier and `--writer-model`/`--verifier-model` to run G3. G4 (safety and contamination) and G5 (evaluation and calibration) are BUILT but are not exercised by this minimal demo.
106
+
107
+ | Where to go next | Doc |
108
+ |---|---|
109
+ | Five-minute tour with explanation of each output line | [`docs/QUICKSTART.md`](docs/QUICKSTART.md) |
110
+ | Per-layer conformance dashboard (BUILT / PARTIAL) | [`docs/STATUS.md`](docs/STATUS.md) |
111
+ | Whole-repository tour | [`docs/OVERVIEW.md`](docs/OVERVIEW.md) |
112
+ | Connect to Claude Code or Claude Desktop as MCP tools | [`docs/MCP-CONFIG.md`](docs/MCP-CONFIG.md) |
113
+ | Verify without an Anthropic API key (local LLM, Stop hook) | [`docs/NO-API-KEY.md`](docs/NO-API-KEY.md) |
114
+ | Cost model and spend control | [`docs/COST.md`](docs/COST.md) |
115
+ | Architecture and layer map | [`docs/STACK.md`](docs/STACK.md) |
116
+
117
+ ## Tooling map
118
+
119
+ | Entry point | What it does | When to use |
120
+ |---|---|---|
121
+ | `warrantos` | Full pipeline (classify > admissibility > gates > verdict > CBOM) | Default. This is the one. |
122
+ | `warrantos-mcp` | Stdio MCP server exposing four tools to Claude Code / Claude Desktop | When you want Claude to call the pipeline as tools |
123
+ | `warrantos-verify-hook` | Claude Code Stop-hook entry point for in-session verification | When you want the loop closed without a separate API key |
124
+ | `warrantos attest` | Bundle a checked run into a portable, signed `.warrant` artefact | When an artefact needs to travel with a verifiable audit proof |
125
+ | `warrantos verify-external` | Verify a `.warrant` offline; exits non-zero on failure | In CI, or for any third party with only the file |
126
+ | `web/verify.html` | Zero-backend browser verifier for a `.warrant` | When a reader has no install and only the file |
127
+ | `provenance` | Legacy v0.3 citation-only CLI | Kept for v0.3 users; new users should use `warrantos` |
128
+
129
+ ## The four-verdict model
130
+
131
+ | Verdict | Trigger | Action |
132
+ |---|---|---|
133
+ | `PASS` | No boundary violation, no unsupported load-bearing claim, no contradicted verifier verdict, actor identity present for final-prose | Ship the artefact |
134
+ | `HOLD` | Unsupported or unverifiable load-bearing claim, or a same-actor writer/reviewer override on a final-artefact profile (separation of duties) | Add a citation, downgrade the claim, or obtain an independent review |
135
+ | `BLOCK` | Boundary violation in final-prose, a contradicted verifier verdict, or a same-actor override on the strict `audit` profile | Rewrite the offending text, or obtain an independent reviewer |
136
+ | `NOT_ASSESSABLE` | Final-prose without `--actor-identity` | Supply actor identity or use a non-final-prose profile |
137
+
138
+ `NOT_ASSESSABLE` is deliberate. Most tools binary-ise into pass/fail. The fourth state names the case where the artefact is missing the metadata required to certify, instead of certifying on incomplete information.
139
+
140
+ The four verdicts are exercised end-to-end in the [`examples/`](examples/) gallery: one runnable case per verdict, plus a `tools/run_gallery.py` thesis demo that runs all four and asserts each example produces its documented verdict. CI runs the same demo on every push.
141
+
142
+ ## The honest pitch
143
+
144
+ `claude-provenance` does not guarantee that AI-assisted writing is correct. No tool can. It guarantees five operational properties instead:
145
+
146
+ 1. **Unsourced claims are expensive, not invisible.** The detector logs every unsupported factual sentence; the ledger keeps the count over time.
147
+ 2. **Process material cannot leak into final prose silently.** The Layer 7 G1 boundary gate blocks "based on your feedback" and the rest of the lexical-residue pattern set under the `final-prose` profile.
148
+ 3. **Overrides cannot reach the public artefact without a structured rationale.** Empty `risk_accepted` or `compensating_control` blocks the write; SQLite `BEFORE UPDATE` triggers (INV-004) prevent silent post-hoc edits.
149
+ 4. **Separation of duties is a verdict-layer property.** When an override records the writer and reviewer as the same actor, `consolidate_verdict()` acts on it: a final-artefact profile (`final-prose`, `paper-full`, `methodology`, `consultation_report`, `audit`) is downgraded to `HOLD`, and the strict `audit` profile to `BLOCK`. An independent reviewer is required to certify `PASS`. Enforced on both the CLI and MCP paths; the helper `enforce_single_actor_rule` and the reader-facing footer surface the flag for a human reader (SPEC-L8-S003).
150
+ 5. **The four-state verdict refuses to certify on incomplete information.** `NOT_ASSESSABLE` fires when the metadata required to certify is missing, instead of `PASS` masking the gap.
151
+
152
+ What this does **not** guarantee: that the underlying model produced correct text, or that a cited source is the strongest available source. Data Classification and Retention/Tombstones are now BUILT, but they ship with default tiers and windows: adopters must still configure their own sensitivity taxonomy and retention policy for their domain.
153
+
154
+ ## What landed in v0.9.0b1
155
+
156
+ User-outcome language; SPEC IDs in [`CHANGELOG.md`](CHANGELOG.md).
157
+
158
+ - One CLI runs the full pipeline end-to-end (`warrantos check`).
159
+ - Human overrides cannot be recorded without a written risk-acceptance rationale and a compensating-control note. The check is at the write path, so the row does not exist if the rationale is missing. SQLite `BEFORE UPDATE` and `BEFORE DELETE` triggers on every ledger table mean recorded rows cannot be silently edited or deleted later (storage-level append-only, installed by default, not application-level discipline). This covers the SQLite ledger the hook writes misses to; the per-run JSON artefacts under `.warrant/runs/` are working output, not the append-only ledger.
160
+ - Separation-of-duties helper (`provenance/overrides.py::enforce_single_actor_rule`) detects a reviewer-equals-writer pair when an override is recorded and surfaces it in the reader-facing footer. The same check is wired into `consolidate_verdict()` on the CLI and MCP paths: a final-artefact profile is downgraded to `HOLD`, the strict `audit` profile to `BLOCK`.
161
+ - MCP server exposes four tools (`warrant_check`, `warrant_classify`, `warrant_record_override`, `warrant_get_run`) callable from any MCP host.
162
+ - Shadow-mode observer runs over an already-published artefact in read-only mode. Never blocks. Never modifies production scripts.
163
+ - `warrantos status` reports a per-layer build state, and `docs/STATUS.md` carries the rendered table.
164
+ - Empirical calibration: the prose-boundary gate ships a `prompt-template` profile after a 10-brief calibration pass produced unactionable false positives under the `brief-light` profile.
165
+
166
+ ## How it works: the Provenance Loop
167
+
168
+ The Provenance Loop is the original v0.3 mental model: **Extract** the claim, **Bind** a source to it, **Verify** the source supports the claim, **Adjudicate** the verdict, **Ledger** the result. In v0.9 the loop is one component of the eight-layer WarrantOS pipeline, specifically Layer 2 (Ledger) and Layer 7-G2 (Source and Warrant Check). For the full architecture see [`docs/OVERVIEW.md`](docs/OVERVIEW.md); for the loop itself see [`docs/PROVENANCE-LOOP.md`](docs/PROVENANCE-LOOP.md).
169
+
170
+ ## Offline-verifiable warrants
171
+
172
+ A verdict you have to trust is weaker than one you can recompute. WarrantOS turns a checked run into a portable, tamper-evident `.warrant` bundle that a third party verifies offline, with no access to your ledger and no network call.
173
+
174
+ - **Tamper-evident ledger.** A deterministic, RFC 6962 style Merkle tree (`provenance.merkle`, pure stdlib) over the audit entries. One root digest fixes the entire ledger state: any insert, edit, delete, or reorder changes it.
175
+ - **Signed checkpoint and portable bundle.** `create_warrant()` packages the prose digest, the CBOM, the relevant ledger entries, and an Ed25519-signed checkpoint into one `.warrant` file. Signing uses the optional `[attestation]` extra; the integrity check needs nothing beyond the standard library.
176
+ - **Fail-closed verification.** `warrantos verify-external` recomputes the Merkle root and matches the checkpoint. An unsigned or signature-unavailable bundle is overall `INVALID` unless `--allow-unsigned` is passed explicitly. A client-side browser verifier (`web/verify.html`) is validated against the Python verifier by a differential test over the supported value domain, and renders all untrusted fields as inert text under a strict CSP.
177
+
178
+ ```bash
179
+ warrantos attest final.md --run-dir .warrant/runs/<id> --out final.warrant
180
+ warrantos verify-external final.warrant --prose final.md # exits non-zero on any failure
181
+ ```
182
+
183
+ Full detail in [`docs/VERIFICATION.md`](docs/VERIFICATION.md). The envelope is project-defined, with a DSSE/COSE migration under consideration.
184
+
185
+ ## Why this exists
186
+
187
+ This plugin is an operational companion to a working paper, *From Citation to
188
+ Epistemic Governance* (Prometheus Policy Lab, in preparation). It takes the
189
+ paper's problem framing and burden-of-proof stance, not its formal apparatus:
190
+ the provenance tuple, the five-valued confidence scale, and the warrant-decay
191
+ model are the paper's contribution, not this tool's. The argument is that the
192
+ AI failures that matter most in high-stakes work are often not model-capability
193
+ failures but epistemic ones: the model states something with confidence and no
194
+ traceable source, a human under time pressure ships it, and the error was
195
+ never about model size. The fix is a loop that refuses to let an unsourced or
196
+ unverified claim pass silently.
197
+
198
+ ## Two axes: detection and verification
199
+
200
+ `claude-provenance` separates two questions that most tools conflate.
201
+
202
+ **Axis 1, detection (in-session, stdlib only, zero network).** The hook reads
203
+ what the model wrote and classifies each factual sentence as **supported** (a
204
+ source is present in its own sentence or the line directly below it),
205
+ **tagged** (an explicit `[CITE NEEDED]`, treated as honest), or
206
+ **unsupported** (nothing). A source two or more sentences away does not rescue
207
+ a claim: that bleed was the v0 false negative and is closed by design. This
208
+ axis stays a fast tripwire that never does network I/O and never breaks the
209
+ session.
210
+
211
+ **Axis 2, verification (out of band).** The verifier takes a detected claim,
212
+ fetches the cited URL, and assigns one of: **verified**, **contradicted**,
213
+ **not_addressed**, **unverifiable** (a citation exists but cannot be
214
+ machine-checked, for example an `(Author, Year)` with no URL), **skipped**, or
215
+ **error**. By default this uses an offline token-overlap heuristic. If
216
+ `ANTHROPIC_API_KEY` is set it uses an LLM grader, and on any failure it falls
217
+ back to the heuristic. The verifier is never called from the blocking hook.
218
+
219
+ The detector catches the cheap, common failure. The verifier targets the
220
+ expensive one: a claim that is confidently cited and wrong. Detecting an
221
+ outright `contradiction`, as opposed to mere non-support, needs a configured
222
+ LLM grader; the offline default flags `unsupported` and `unverifiable` but
223
+ never emits `contradicted`.
224
+
225
+ ## Install as a Claude Code plugin (legacy v0.3 hook)
226
+
227
+ The Claude Code plugin currently wires the **legacy v0.3** in-session Stop hook (`hooks/provenance_check.py`), not the WarrantOS surfaces. It remains a fast, stdlib-only citation tripwire for live Claude Code sessions. For the v0.9 WarrantOS pipeline (CLI + MCP server + per-layer dashboard + four-state verdict) use the source install above; the WarrantOS plugin wiring is a v0.10 design item.
228
+
229
+ ```
230
+ /plugin marketplace add /path/to/claude-provenance
231
+ /plugin install claude-provenance
232
+ ```
233
+
234
+ The plugin install gives you the in-session Stop hook and slash commands (`/provenance-report`, `/provenance-verify`). Requires Python 3.11+ on `PATH`. No third-party packages for the core; the `[mcp]` extra adds the `mcp` SDK.
235
+
236
+ ## Configuration
237
+
238
+ Environment variables. See [`docs/COST.md`](docs/COST.md) for spend-control flags and [`docs/NO-API-KEY.md`](docs/NO-API-KEY.md) for local-LLM and Stop-hook configuration.
239
+
240
+ | Variable | Values | Default |
241
+ |-------------------------------------|----------------------------|-------------------------------|
242
+ | `PROVENANCE_MODE` | `report`, `enforce`, `off` | `report` |
243
+ | `PROVENANCE_DB` | path to SQLite file | `./.provenance/provenance.db` |
244
+ | `WARRANTOS_DB` | path to SQLite file | `./.warrant/provenance.db` |
245
+ | `ANTHROPIC_API_KEY` | API key | unset (verifier stays offline)|
246
+ | `PROVENANCE_GRADER_MODEL` | model id | `claude-haiku-4-5-20251001` |
247
+ | `PROVENANCE_LOCAL_GRADER_URL` | URL | unset (use heuristic) |
248
+ | `PROVENANCE_LOCAL_GRADER_MODEL` | model name | `llama3.2` |
249
+
250
+ `PROVENANCE_MODE` controls the legacy Stop hook: **report** logs every run and prints a summary, non-blocking; **enforce** blocks the end of a turn or a file write when an unsupported factual claim is present; **off** disables the hook. The Stop hook is loop-safe and never blocks the same turn twice. With no API key the verifier degrades to the offline heuristic with no error.
251
+
252
+ ## Legacy v0.3 CLI
253
+
254
+ The `provenance` entry point is kept for users on the v0.3 mental model (citations only). New users should use `warrantos` instead, which wraps detection, verification, admissibility, gates, and the override ledger as one pipeline. The legacy CLI runs the detection-and-verification loop over a file, a directory, or stdin, outside a live session:
255
+
256
+ ```
257
+ python cli/provenance_cli.py path/to/draft.md # offline detection
258
+ python cli/provenance_cli.py --verify path/to/draft.md # fetch and grade
259
+ python cli/provenance_cli.py --ci docs/ # exit 1 on a miss
260
+ python cli/provenance_cli.py --cbom --context context.json final.md
261
+ ```
262
+
263
+ `--ci` exits 1 if any claim is `contradicted` or `unsupported`. `--json` emits machine-readable output. CBOM mode (`--cbom`) classifies context material and scans final prose for process leakage such as "based on your feedback".
264
+
265
+ In a Claude session, `/provenance-report` summarises the ledger and `/provenance-verify` runs the verification stage.
266
+
267
+ ## Governance: epistemic debt
268
+
269
+ The ledger is the point, not a side effect. `provenance/ledger.py` computes an
270
+ **epistemic-debt** metric (load-bearing unsupported claims, normalised, with a
271
+ direction over the last runs) and exports an evidence matrix to Markdown or
272
+ CSV. Load-bearing is scored by `provenance/salience.py`: a statutory reference
273
+ inside a recommendation is weighted above a date in passing. The governance
274
+ question is not "is this sentence cited" but "is our AI-assisted output getting
275
+ more or less sourced over time", and the ledger answers it.
276
+
277
+ ## Evaluation
278
+
279
+ `eval/run_eval.py` runs the detector against the seed corpus and prints precision, recall and F1 at run time. The harness also runs a grader-precision-recall evaluation against a 60-item labelled corpus (`eval/corpus/grader.jsonl`) and reports per-class metrics, a five-by-six confusion matrix and a governance-framed caveat block. An evaluation-only cross-model backend (`python eval/run_eval.py --grader codex`) drives a local Codex CLI for a same-task different-model probe; it is never auto-selected and never run in CI. The numbers are corpus-dependent and not a claim of general accuracy. See [`eval/README.md`](eval/README.md), which states the limits, the analytic nature of the contradicted-class zero for the offline heuristic, and the relevant prior art (McCoy, Pavlick and Linzen 2019; Gao et al. 2023) plainly. The corpora are regression and illustration seeds, not validated benchmarks.
280
+
281
+ ## Tests
282
+
283
+ Stdlib only, no test dependencies. From the repo root:
284
+
285
+ ```
286
+ python -m unittest discover -s tests -v
287
+ ```
288
+
289
+ The suite covers detection (every trigger, inline and adjacent sourcing, the closed v0 false negative); the loop-safety guard; enforce-mode blocking; the verifier (mocked network, LLM-failure fallback, no-key path); the CBOM v0.2 schema (`actor_identity`, `classification_overrides`, `override_ledger_refs`); the four-state verdict including `NOT_ASSESSABLE`; the override ledger (SPEC-L8-S004 write-path validation, SPEC-L8-S003 separation-of-duties, INV-004 append-only triggers); the writer pack and clean-room generation; the five output-integrity gates G1-G5; the MCP server dispatch and the in-process API; the Claude Code Stop hook with loop-safety sentinel; the local LLM grader path; the per-layer status dashboard; and the grader-eval path (`sys.modules`-isolated under a unique spec name). The CI matrix runs the full suite on Python 3.11 through 3.13. See the CI badge above for the live count and pass status.
290
+
291
+ The rule that an internal error must never break the session is enforced throughout.
292
+
293
+ ## Release status
294
+
295
+ Current increment: **v0.9.2**. It builds on v0.9.1 (verdict-layer separation of duties, the cryptographic-verifiability wave: Merkle ledger, Ed25519 attestation, portable `.warrant`, offline and browser verifiers, AI scaffold-residue detection, append-only triggers installed by default, and pre-launch security hardening) and adds: claim detection expanded from 5 to 11 triggers (decision, causal, comparative, superlative, named-body), per-profile unsupported-claim HOLD thresholds, improved verdict transparency, a `ClaudeCliGrader` that verifies through a Claude subscription with no API spend, the four formerly STARTER/NOT_BUILT rows moved to BUILT (G4, G5, Data Classification, Retention/Tombstones), a Python floor lifted to 3.11, and a CI smoke-test fix. See [`CHANGELOG.md`](CHANGELOG.md) and [`docs/STATUS.md`](docs/STATUS.md). Next: tag and publish to PyPI via Trusted Publishing, then close the three remaining PARTIAL rows (F-policy, F-compliance, F-metrics) once the normative SPEC document is committed.
296
+
297
+ ## Limits, stated plainly
298
+
299
+ The detector is a heuristic and will produce false positives and false
300
+ negatives. Offline verification only checks token overlap, not meaning. A
301
+ correctly sourced claim can still be misleading or selectively cited. This
302
+ tool makes an unsourced or unchecked claim expensive instead of invisible. It
303
+ does not replace human review and does not claim to.
304
+
305
+ ## Licence
306
+
307
+ MIT. Built by Juan Vega, Prometheus Policy Lab.
@@ -0,0 +1,249 @@
1
+ # claude-provenance
2
+
3
+ [![ci](https://github.com/jvega017/claude-provenance/actions/workflows/ci.yml/badge.svg)](https://github.com/jvega017/claude-provenance/actions/workflows/ci.yml)
4
+ [![layers: 20B / 0P](https://img.shields.io/badge/layers-20B%20%2F%200P-brightgreen)](docs/STATUS.md)
5
+ ![version: 0.9.2](https://img.shields.io/badge/version-0.9.2-orange)
6
+ ![python: 3.11--3.13](https://img.shields.io/badge/python-3.11--3.13-blue)
7
+ ![deps: stdlib only](https://img.shields.io/badge/deps-stdlib%20only-green)
8
+
9
+ ## No claim ships without a warrant.
10
+
11
+ WarrantOS does not detect truth, and it does not try to. It enforces that every claim in an AI-assisted document carries a warrant: a source, an explicit `[CITE NEEDED]`, or a `BLOCK` on the record. A four-state verdict (`PASS` / `HOLD` / `BLOCK` / `NOT_ASSESSABLE`) gates the output before it ships in `enforce` mode (the default `report` mode logs every miss without blocking), and every miss is written to an append-only ledger, tamper-evident against a previously distributed checkpoint, that you can hand an auditor.
12
+
13
+ It also catches the other way an AI document betrays itself: **internal scaffold and conversational residue that bleeds from the chat into the final artefact**. "Certainly! Here's the revised version", "As an AI language model, I cannot verify", "based on the information provided", "I hope this helps, let me know if you would like me to expand", a stray `[TODO: ...]` placeholder. A clean artefact carries its evidence and none of the machinery that produced it. WarrantOS blocks the machinery from shipping.
14
+
15
+ It governs the artefact, not the model. It runs at the writer's desk, on one document, before it ships, with zero infrastructure: stdlib-only, MIT, no API, no account. Governance platforms watch the system after the fact; WarrantOS gates the output before the fact.
16
+
17
+ Built in a personal capacity by an independent policy researcher for the people who publish AI-assisted writing under their own name and carry the reputational liability for a fabricated citation: research-integrity, policy, and academic-governance practitioners. It is a personal open-source project, not associated with, funded by, or endorsed by any employer or government. It is informed by the working paper *From Citation to Epistemic Governance* (Prometheus Policy Lab, in preparation): it operationalises that paper's problem framing, the gap between citation as attribution and citation as evidence, rather than its formal model.
18
+
19
+ **The honest demo.** I ran WarrantOS over the first draft of my own daily policy brief, before remediation. It returned `BLOCK`: 14 claims, 0 supported, 7 boundary violations (illustrative figures from one unremediated draft, not a fixed benchmark). That is the gate working as designed on an unremediated draft: it names the epistemic debt so it can be paid down before the artefact ships, instead of going out silently. A governance tool worth trusting is one that holds its own author to that standard.
20
+
21
+ Under the hood, `claude-provenance` wraps AI-assisted writing in an eight-layer pipeline so the final artefact ships clean prose, while a separate audit ledger carries the sources, the feedback, the review history, the transformations, and the structured overrides that produced it. The per-layer status dashboard tells you exactly what is built and what is not.
22
+
23
+ > **v0.9.2.** Build state: **20 BUILT / 0 PARTIAL** (was 13 BUILT / 3 PARTIAL / 2 STARTER / 2 NOT_BUILT at v0.9.1). All five output-integrity gates (G1-G5) and all eight foundation rows are BUILT. The final three rows closed in v0.9.2: **F-policy** (the normative spec `docs/SPEC.md` and a machine-readable six-role registry are now committed), **F-compliance** (a self-assessment control mapping to ISO/IEC 42001 and the NIST AI RMF in `docs/COMPLIANCE.md` — a documented mapping, explicitly **not** certified conformance), and **F-metrics** (shadow-log aggregation via the `warrantos metrics` command). Adopter-specific configuration (sensitivity tiers, retention windows) and an automated SPEC-ID conformance check remain future work, stated plainly in those docs. See [`docs/STATUS.md`](docs/STATUS.md) before evaluating scope.
24
+
25
+ ## Quickstart
26
+
27
+ Install from PyPI:
28
+
29
+ ```bash
30
+ pip install warrantos # MCP server extra: pip install "warrantos[mcp]"
31
+ ```
32
+
33
+ To run the bundled demo below, use a source checkout (it ships the examples):
34
+
35
+ ```bash
36
+ git clone https://github.com/jvega017/claude-provenance.git
37
+ cd claude-provenance
38
+ python -m pip install -e ".[mcp]"
39
+
40
+ # Run the bundled demo: writes per-run artefacts under .warrant/runs/
41
+ warrantos check examples/quickstart-demo/draft.md \
42
+ --context examples/quickstart-demo/context.json \
43
+ --actor-identity examples/quickstart-demo/actor.json \
44
+ --profile final-prose
45
+ ```
46
+
47
+ Expected verdict: `HOLD` with one unsupported load-bearing claim. The bundled command exercises Layer 1 classification, Layer 4 admissibility, Layer 7 G1 (boundary), Layer 7 G2 detection, CBOM assembly, and the four-state verdict consolidator; add `--verify` to run the G2 verifier and `--writer-model`/`--verifier-model` to run G3. G4 (safety and contamination) and G5 (evaluation and calibration) are BUILT but are not exercised by this minimal demo.
48
+
49
+ | Where to go next | Doc |
50
+ |---|---|
51
+ | Five-minute tour with explanation of each output line | [`docs/QUICKSTART.md`](docs/QUICKSTART.md) |
52
+ | Per-layer conformance dashboard (BUILT / PARTIAL) | [`docs/STATUS.md`](docs/STATUS.md) |
53
+ | Whole-repository tour | [`docs/OVERVIEW.md`](docs/OVERVIEW.md) |
54
+ | Connect to Claude Code or Claude Desktop as MCP tools | [`docs/MCP-CONFIG.md`](docs/MCP-CONFIG.md) |
55
+ | Verify without an Anthropic API key (local LLM, Stop hook) | [`docs/NO-API-KEY.md`](docs/NO-API-KEY.md) |
56
+ | Cost model and spend control | [`docs/COST.md`](docs/COST.md) |
57
+ | Architecture and layer map | [`docs/STACK.md`](docs/STACK.md) |
58
+
59
+ ## Tooling map
60
+
61
+ | Entry point | What it does | When to use |
62
+ |---|---|---|
63
+ | `warrantos` | Full pipeline (classify > admissibility > gates > verdict > CBOM) | Default. This is the one. |
64
+ | `warrantos-mcp` | Stdio MCP server exposing four tools to Claude Code / Claude Desktop | When you want Claude to call the pipeline as tools |
65
+ | `warrantos-verify-hook` | Claude Code Stop-hook entry point for in-session verification | When you want the loop closed without a separate API key |
66
+ | `warrantos attest` | Bundle a checked run into a portable, signed `.warrant` artefact | When an artefact needs to travel with a verifiable audit proof |
67
+ | `warrantos verify-external` | Verify a `.warrant` offline; exits non-zero on failure | In CI, or for any third party with only the file |
68
+ | `web/verify.html` | Zero-backend browser verifier for a `.warrant` | When a reader has no install and only the file |
69
+ | `provenance` | Legacy v0.3 citation-only CLI | Kept for v0.3 users; new users should use `warrantos` |
70
+
71
+ ## The four-verdict model
72
+
73
+ | Verdict | Trigger | Action |
74
+ |---|---|---|
75
+ | `PASS` | No boundary violation, no unsupported load-bearing claim, no contradicted verifier verdict, actor identity present for final-prose | Ship the artefact |
76
+ | `HOLD` | Unsupported or unverifiable load-bearing claim, or a same-actor writer/reviewer override on a final-artefact profile (separation of duties) | Add a citation, downgrade the claim, or obtain an independent review |
77
+ | `BLOCK` | Boundary violation in final-prose, a contradicted verifier verdict, or a same-actor override on the strict `audit` profile | Rewrite the offending text, or obtain an independent reviewer |
78
+ | `NOT_ASSESSABLE` | Final-prose without `--actor-identity` | Supply actor identity or use a non-final-prose profile |
79
+
80
+ `NOT_ASSESSABLE` is deliberate. Most tools binary-ise into pass/fail. The fourth state names the case where the artefact is missing the metadata required to certify, instead of certifying on incomplete information.
81
+
82
+ The four verdicts are exercised end-to-end in the [`examples/`](examples/) gallery: one runnable case per verdict, plus a `tools/run_gallery.py` thesis demo that runs all four and asserts each example produces its documented verdict. CI runs the same demo on every push.
83
+
84
+ ## The honest pitch
85
+
86
+ `claude-provenance` does not guarantee that AI-assisted writing is correct. No tool can. It guarantees five operational properties instead:
87
+
88
+ 1. **Unsourced claims are expensive, not invisible.** The detector logs every unsupported factual sentence; the ledger keeps the count over time.
89
+ 2. **Process material cannot leak into final prose silently.** The Layer 7 G1 boundary gate blocks "based on your feedback" and the rest of the lexical-residue pattern set under the `final-prose` profile.
90
+ 3. **Overrides cannot reach the public artefact without a structured rationale.** Empty `risk_accepted` or `compensating_control` blocks the write; SQLite `BEFORE UPDATE` triggers (INV-004) prevent silent post-hoc edits.
91
+ 4. **Separation of duties is a verdict-layer property.** When an override records the writer and reviewer as the same actor, `consolidate_verdict()` acts on it: a final-artefact profile (`final-prose`, `paper-full`, `methodology`, `consultation_report`, `audit`) is downgraded to `HOLD`, and the strict `audit` profile to `BLOCK`. An independent reviewer is required to certify `PASS`. Enforced on both the CLI and MCP paths; the helper `enforce_single_actor_rule` and the reader-facing footer surface the flag for a human reader (SPEC-L8-S003).
92
+ 5. **The four-state verdict refuses to certify on incomplete information.** `NOT_ASSESSABLE` fires when the metadata required to certify is missing, instead of `PASS` masking the gap.
93
+
94
+ What this does **not** guarantee: that the underlying model produced correct text, or that a cited source is the strongest available source. Data Classification and Retention/Tombstones are now BUILT, but they ship with default tiers and windows: adopters must still configure their own sensitivity taxonomy and retention policy for their domain.
95
+
96
+ ## What landed in v0.9.0b1
97
+
98
+ User-outcome language; SPEC IDs in [`CHANGELOG.md`](CHANGELOG.md).
99
+
100
+ - One CLI runs the full pipeline end-to-end (`warrantos check`).
101
+ - Human overrides cannot be recorded without a written risk-acceptance rationale and a compensating-control note. The check is at the write path, so the row does not exist if the rationale is missing. SQLite `BEFORE UPDATE` and `BEFORE DELETE` triggers on every ledger table mean recorded rows cannot be silently edited or deleted later (storage-level append-only, installed by default, not application-level discipline). This covers the SQLite ledger the hook writes misses to; the per-run JSON artefacts under `.warrant/runs/` are working output, not the append-only ledger.
102
+ - Separation-of-duties helper (`provenance/overrides.py::enforce_single_actor_rule`) detects a reviewer-equals-writer pair when an override is recorded and surfaces it in the reader-facing footer. The same check is wired into `consolidate_verdict()` on the CLI and MCP paths: a final-artefact profile is downgraded to `HOLD`, the strict `audit` profile to `BLOCK`.
103
+ - MCP server exposes four tools (`warrant_check`, `warrant_classify`, `warrant_record_override`, `warrant_get_run`) callable from any MCP host.
104
+ - Shadow-mode observer runs over an already-published artefact in read-only mode. Never blocks. Never modifies production scripts.
105
+ - `warrantos status` reports a per-layer build state, and `docs/STATUS.md` carries the rendered table.
106
+ - Empirical calibration: the prose-boundary gate ships a `prompt-template` profile after a 10-brief calibration pass produced unactionable false positives under the `brief-light` profile.
107
+
108
+ ## How it works: the Provenance Loop
109
+
110
+ The Provenance Loop is the original v0.3 mental model: **Extract** the claim, **Bind** a source to it, **Verify** the source supports the claim, **Adjudicate** the verdict, **Ledger** the result. In v0.9 the loop is one component of the eight-layer WarrantOS pipeline, specifically Layer 2 (Ledger) and Layer 7-G2 (Source and Warrant Check). For the full architecture see [`docs/OVERVIEW.md`](docs/OVERVIEW.md); for the loop itself see [`docs/PROVENANCE-LOOP.md`](docs/PROVENANCE-LOOP.md).
111
+
112
+ ## Offline-verifiable warrants
113
+
114
+ A verdict you have to trust is weaker than one you can recompute. WarrantOS turns a checked run into a portable, tamper-evident `.warrant` bundle that a third party verifies offline, with no access to your ledger and no network call.
115
+
116
+ - **Tamper-evident ledger.** A deterministic, RFC 6962 style Merkle tree (`provenance.merkle`, pure stdlib) over the audit entries. One root digest fixes the entire ledger state: any insert, edit, delete, or reorder changes it.
117
+ - **Signed checkpoint and portable bundle.** `create_warrant()` packages the prose digest, the CBOM, the relevant ledger entries, and an Ed25519-signed checkpoint into one `.warrant` file. Signing uses the optional `[attestation]` extra; the integrity check needs nothing beyond the standard library.
118
+ - **Fail-closed verification.** `warrantos verify-external` recomputes the Merkle root and matches the checkpoint. An unsigned or signature-unavailable bundle is overall `INVALID` unless `--allow-unsigned` is passed explicitly. A client-side browser verifier (`web/verify.html`) is validated against the Python verifier by a differential test over the supported value domain, and renders all untrusted fields as inert text under a strict CSP.
119
+
120
+ ```bash
121
+ warrantos attest final.md --run-dir .warrant/runs/<id> --out final.warrant
122
+ warrantos verify-external final.warrant --prose final.md # exits non-zero on any failure
123
+ ```
124
+
125
+ Full detail in [`docs/VERIFICATION.md`](docs/VERIFICATION.md). The envelope is project-defined, with a DSSE/COSE migration under consideration.
126
+
127
+ ## Why this exists
128
+
129
+ This plugin is an operational companion to a working paper, *From Citation to
130
+ Epistemic Governance* (Prometheus Policy Lab, in preparation). It takes the
131
+ paper's problem framing and burden-of-proof stance, not its formal apparatus:
132
+ the provenance tuple, the five-valued confidence scale, and the warrant-decay
133
+ model are the paper's contribution, not this tool's. The argument is that the
134
+ AI failures that matter most in high-stakes work are often not model-capability
135
+ failures but epistemic ones: the model states something with confidence and no
136
+ traceable source, a human under time pressure ships it, and the error was
137
+ never about model size. The fix is a loop that refuses to let an unsourced or
138
+ unverified claim pass silently.
139
+
140
+ ## Two axes: detection and verification
141
+
142
+ `claude-provenance` separates two questions that most tools conflate.
143
+
144
+ **Axis 1, detection (in-session, stdlib only, zero network).** The hook reads
145
+ what the model wrote and classifies each factual sentence as **supported** (a
146
+ source is present in its own sentence or the line directly below it),
147
+ **tagged** (an explicit `[CITE NEEDED]`, treated as honest), or
148
+ **unsupported** (nothing). A source two or more sentences away does not rescue
149
+ a claim: that bleed was the v0 false negative and is closed by design. This
150
+ axis stays a fast tripwire that never does network I/O and never breaks the
151
+ session.
152
+
153
+ **Axis 2, verification (out of band).** The verifier takes a detected claim,
154
+ fetches the cited URL, and assigns one of: **verified**, **contradicted**,
155
+ **not_addressed**, **unverifiable** (a citation exists but cannot be
156
+ machine-checked, for example an `(Author, Year)` with no URL), **skipped**, or
157
+ **error**. By default this uses an offline token-overlap heuristic. If
158
+ `ANTHROPIC_API_KEY` is set it uses an LLM grader, and on any failure it falls
159
+ back to the heuristic. The verifier is never called from the blocking hook.
160
+
161
+ The detector catches the cheap, common failure. The verifier targets the
162
+ expensive one: a claim that is confidently cited and wrong. Detecting an
163
+ outright `contradiction`, as opposed to mere non-support, needs a configured
164
+ LLM grader; the offline default flags `unsupported` and `unverifiable` but
165
+ never emits `contradicted`.
166
+
167
+ ## Install as a Claude Code plugin (legacy v0.3 hook)
168
+
169
+ The Claude Code plugin currently wires the **legacy v0.3** in-session Stop hook (`hooks/provenance_check.py`), not the WarrantOS surfaces. It remains a fast, stdlib-only citation tripwire for live Claude Code sessions. For the v0.9 WarrantOS pipeline (CLI + MCP server + per-layer dashboard + four-state verdict) use the source install above; the WarrantOS plugin wiring is a v0.10 design item.
170
+
171
+ ```
172
+ /plugin marketplace add /path/to/claude-provenance
173
+ /plugin install claude-provenance
174
+ ```
175
+
176
+ The plugin install gives you the in-session Stop hook and slash commands (`/provenance-report`, `/provenance-verify`). Requires Python 3.11+ on `PATH`. No third-party packages for the core; the `[mcp]` extra adds the `mcp` SDK.
177
+
178
+ ## Configuration
179
+
180
+ Environment variables. See [`docs/COST.md`](docs/COST.md) for spend-control flags and [`docs/NO-API-KEY.md`](docs/NO-API-KEY.md) for local-LLM and Stop-hook configuration.
181
+
182
+ | Variable | Values | Default |
183
+ |-------------------------------------|----------------------------|-------------------------------|
184
+ | `PROVENANCE_MODE` | `report`, `enforce`, `off` | `report` |
185
+ | `PROVENANCE_DB` | path to SQLite file | `./.provenance/provenance.db` |
186
+ | `WARRANTOS_DB` | path to SQLite file | `./.warrant/provenance.db` |
187
+ | `ANTHROPIC_API_KEY` | API key | unset (verifier stays offline)|
188
+ | `PROVENANCE_GRADER_MODEL` | model id | `claude-haiku-4-5-20251001` |
189
+ | `PROVENANCE_LOCAL_GRADER_URL` | URL | unset (use heuristic) |
190
+ | `PROVENANCE_LOCAL_GRADER_MODEL` | model name | `llama3.2` |
191
+
192
+ `PROVENANCE_MODE` controls the legacy Stop hook: **report** logs every run and prints a summary, non-blocking; **enforce** blocks the end of a turn or a file write when an unsupported factual claim is present; **off** disables the hook. The Stop hook is loop-safe and never blocks the same turn twice. With no API key the verifier degrades to the offline heuristic with no error.
193
+
194
+ ## Legacy v0.3 CLI
195
+
196
+ The `provenance` entry point is kept for users on the v0.3 mental model (citations only). New users should use `warrantos` instead, which wraps detection, verification, admissibility, gates, and the override ledger as one pipeline. The legacy CLI runs the detection-and-verification loop over a file, a directory, or stdin, outside a live session:
197
+
198
+ ```
199
+ python cli/provenance_cli.py path/to/draft.md # offline detection
200
+ python cli/provenance_cli.py --verify path/to/draft.md # fetch and grade
201
+ python cli/provenance_cli.py --ci docs/ # exit 1 on a miss
202
+ python cli/provenance_cli.py --cbom --context context.json final.md
203
+ ```
204
+
205
+ `--ci` exits 1 if any claim is `contradicted` or `unsupported`. `--json` emits machine-readable output. CBOM mode (`--cbom`) classifies context material and scans final prose for process leakage such as "based on your feedback".
206
+
207
+ In a Claude session, `/provenance-report` summarises the ledger and `/provenance-verify` runs the verification stage.
208
+
209
+ ## Governance: epistemic debt
210
+
211
+ The ledger is the point, not a side effect. `provenance/ledger.py` computes an
212
+ **epistemic-debt** metric (load-bearing unsupported claims, normalised, with a
213
+ direction over the last runs) and exports an evidence matrix to Markdown or
214
+ CSV. Load-bearing is scored by `provenance/salience.py`: a statutory reference
215
+ inside a recommendation is weighted above a date in passing. The governance
216
+ question is not "is this sentence cited" but "is our AI-assisted output getting
217
+ more or less sourced over time", and the ledger answers it.
218
+
219
+ ## Evaluation
220
+
221
+ `eval/run_eval.py` runs the detector against the seed corpus and prints precision, recall and F1 at run time. The harness also runs a grader-precision-recall evaluation against a 60-item labelled corpus (`eval/corpus/grader.jsonl`) and reports per-class metrics, a five-by-six confusion matrix and a governance-framed caveat block. An evaluation-only cross-model backend (`python eval/run_eval.py --grader codex`) drives a local Codex CLI for a same-task different-model probe; it is never auto-selected and never run in CI. The numbers are corpus-dependent and not a claim of general accuracy. See [`eval/README.md`](eval/README.md), which states the limits, the analytic nature of the contradicted-class zero for the offline heuristic, and the relevant prior art (McCoy, Pavlick and Linzen 2019; Gao et al. 2023) plainly. The corpora are regression and illustration seeds, not validated benchmarks.
222
+
223
+ ## Tests
224
+
225
+ Stdlib only, no test dependencies. From the repo root:
226
+
227
+ ```
228
+ python -m unittest discover -s tests -v
229
+ ```
230
+
231
+ The suite covers detection (every trigger, inline and adjacent sourcing, the closed v0 false negative); the loop-safety guard; enforce-mode blocking; the verifier (mocked network, LLM-failure fallback, no-key path); the CBOM v0.2 schema (`actor_identity`, `classification_overrides`, `override_ledger_refs`); the four-state verdict including `NOT_ASSESSABLE`; the override ledger (SPEC-L8-S004 write-path validation, SPEC-L8-S003 separation-of-duties, INV-004 append-only triggers); the writer pack and clean-room generation; the five output-integrity gates G1-G5; the MCP server dispatch and the in-process API; the Claude Code Stop hook with loop-safety sentinel; the local LLM grader path; the per-layer status dashboard; and the grader-eval path (`sys.modules`-isolated under a unique spec name). The CI matrix runs the full suite on Python 3.11 through 3.13. See the CI badge above for the live count and pass status.
232
+
233
+ The rule that an internal error must never break the session is enforced throughout.
234
+
235
+ ## Release status
236
+
237
+ Current increment: **v0.9.2**. It builds on v0.9.1 (verdict-layer separation of duties, the cryptographic-verifiability wave: Merkle ledger, Ed25519 attestation, portable `.warrant`, offline and browser verifiers, AI scaffold-residue detection, append-only triggers installed by default, and pre-launch security hardening) and adds: claim detection expanded from 5 to 11 triggers (decision, causal, comparative, superlative, named-body), per-profile unsupported-claim HOLD thresholds, improved verdict transparency, a `ClaudeCliGrader` that verifies through a Claude subscription with no API spend, the four formerly STARTER/NOT_BUILT rows moved to BUILT (G4, G5, Data Classification, Retention/Tombstones), a Python floor lifted to 3.11, and a CI smoke-test fix. See [`CHANGELOG.md`](CHANGELOG.md) and [`docs/STATUS.md`](docs/STATUS.md). Next: tag and publish to PyPI via Trusted Publishing, then close the three remaining PARTIAL rows (F-policy, F-compliance, F-metrics) once the normative SPEC document is committed.
238
+
239
+ ## Limits, stated plainly
240
+
241
+ The detector is a heuristic and will produce false positives and false
242
+ negatives. Offline verification only checks token overlap, not meaning. A
243
+ correctly sourced claim can still be misleading or selectively cited. This
244
+ tool makes an unsourced or unchecked claim expensive instead of invisible. It
245
+ does not replace human review and does not claim to.
246
+
247
+ ## Licence
248
+
249
+ MIT. Built by Juan Vega, Prometheus Policy Lab.