sum-engine 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. sum_engine-0.5.0/PKG-INFO +313 -0
  2. sum_engine-0.5.0/README.md +252 -0
  3. sum_engine-0.5.0/pyproject.toml +171 -0
  4. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_cli/main.py +798 -24
  5. sum_engine-0.5.0/sum_engine.egg-info/PKG-INFO +313 -0
  6. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine.egg-info/SOURCES.txt +24 -1
  7. sum_engine-0.5.0/sum_engine.egg-info/entry_points.txt +3 -0
  8. sum_engine-0.5.0/sum_engine.egg-info/requires.txt +45 -0
  9. sum_engine-0.5.0/sum_engine_internal/adapters/__init__.py +3 -0
  10. sum_engine-0.5.0/sum_engine_internal/adapters/format_pivot.py +235 -0
  11. sum_engine-0.5.0/sum_engine_internal/algorithms/chunked_corpus.py +226 -0
  12. sum_engine-0.5.0/sum_engine_internal/algorithms/minhash.py +157 -0
  13. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/semantic_arithmetic.py +122 -5
  14. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/syntactic_sieve.py +118 -3
  15. sum_engine-0.5.0/sum_engine_internal/ensemble/data/__init__.py +11 -0
  16. sum_engine-0.5.0/sum_engine_internal/ensemble/data/common_english_2000.txt +2000 -0
  17. sum_engine-0.5.0/sum_engine_internal/ensemble/data/common_english_5000.txt +5000 -0
  18. sum_engine-0.5.0/sum_engine_internal/ensemble/live_llm_adapter.py +463 -0
  19. sum_engine-0.5.0/sum_engine_internal/ensemble/llm_dispatch.py +341 -0
  20. sum_engine-0.5.0/sum_engine_internal/ensemble/s25_interventions.py +258 -0
  21. sum_engine-0.5.0/sum_engine_internal/ensemble/slider_renderer.py +953 -0
  22. sum_engine-0.5.0/sum_engine_internal/ensemble/tome_sliders.py +276 -0
  23. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/jcs.py +56 -11
  24. sum_engine-0.5.0/sum_engine_internal/infrastructure/jose_envelope.py +406 -0
  25. sum_engine-0.5.0/sum_engine_internal/mcp_server/__init__.py +32 -0
  26. sum_engine-0.5.0/sum_engine_internal/mcp_server/__main__.py +11 -0
  27. sum_engine-0.5.0/sum_engine_internal/mcp_server/errors.py +120 -0
  28. sum_engine-0.5.0/sum_engine_internal/mcp_server/server.py +781 -0
  29. sum_engine-0.5.0/sum_engine_internal/merkle_sidecar/__init__.py +37 -0
  30. sum_engine-0.5.0/sum_engine_internal/merkle_sidecar/tree.py +247 -0
  31. sum_engine-0.5.0/sum_engine_internal/render_receipt/__init__.py +41 -0
  32. sum_engine-0.5.0/sum_engine_internal/render_receipt/verifier.py +176 -0
  33. sum_engine-0.5.0/sum_engine_internal/research/__init__.py +21 -0
  34. sum_engine-0.5.0/sum_engine_internal/research/sheaf_laplacian.py +231 -0
  35. sum_engine-0.5.0/sum_engine_internal/trust_root/__init__.py +42 -0
  36. sum_engine-0.5.0/sum_engine_internal/trust_root/verifier.py +83 -0
  37. sum_engine-0.3.0/PKG-INFO +0 -620
  38. sum_engine-0.3.0/README.md +0 -576
  39. sum_engine-0.3.0/pyproject.toml +0 -99
  40. sum_engine-0.3.0/sum_engine.egg-info/PKG-INFO +0 -620
  41. sum_engine-0.3.0/sum_engine.egg-info/entry_points.txt +0 -2
  42. sum_engine-0.3.0/sum_engine.egg-info/requires.txt +0 -23
  43. sum_engine-0.3.0/sum_engine_internal/ensemble/live_llm_adapter.py +0 -183
  44. sum_engine-0.3.0/sum_engine_internal/ensemble/tome_sliders.py +0 -104
  45. {sum_engine-0.3.0 → sum_engine-0.5.0}/LICENSE +0 -0
  46. {sum_engine-0.3.0 → sum_engine-0.5.0}/setup.cfg +0 -0
  47. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_cli/__init__.py +0 -0
  48. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine.egg-info/dependency_links.txt +0 -0
  49. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine.egg-info/top_level.txt +0 -0
  50. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/__init__.py +0 -0
  51. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/__init__.py +0 -0
  52. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/causal_discovery.py +0 -0
  53. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/predicate_canon.py +0 -0
  54. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/algorithms/zk_semantics.py +0 -0
  55. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/__init__.py +0 -0
  56. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/automated_scientist.py +0 -0
  57. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/autonomous_agent.py +0 -0
  58. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/causal_triggers.py +0 -0
  59. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/confidence_calibrator.py +0 -0
  60. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/epistemic_arbiter.py +0 -0
  61. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/epistemic_loop.py +0 -0
  62. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/extraction_validator.py +0 -0
  63. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/gauge_orchestrator.py +0 -0
  64. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/llm_entailment.py +0 -0
  65. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/mass_semantic_engine.py +0 -0
  66. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/ouroboros.py +0 -0
  67. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/semantic_dedup.py +0 -0
  68. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/tome_generator.py +0 -0
  69. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/vector_bridge.py +0 -0
  70. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/ensemble/venn_abers.py +0 -0
  71. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/__init__.py +0 -0
  72. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/akashic_ledger.py +0 -0
  73. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/canonical_codec.py +0 -0
  74. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/key_manager.py +0 -0
  75. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/p2p_mesh.py +0 -0
  76. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/prov_o.py +0 -0
  77. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/provenance.py +0 -0
  78. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/rate_limiter.py +0 -0
  79. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/resource_guards.py +0 -0
  80. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/scheme_registry.py +0 -0
  81. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/state_encoding.py +0 -0
  82. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/telemetry.py +0 -0
  83. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/tome_parser.py +0 -0
  84. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/verifiable_credential.py +0 -0
  85. {sum_engine-0.3.0 → sum_engine-0.5.0}/sum_engine_internal/infrastructure/zig_bridge.py +0 -0
@@ -0,0 +1,313 @@
1
+ Metadata-Version: 2.4
2
+ Name: sum-engine
3
+ Version: 0.5.0
4
+ Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
5
+ Author: ototao
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/OtotaO/SUM
8
+ Project-URL: Repository, https://github.com/OtotaO/SUM
9
+ Project-URL: Proof Boundary, https://github.com/OtotaO/SUM/blob/main/docs/PROOF_BOUNDARY.md
10
+ Project-URL: Feature Catalog, https://github.com/OtotaO/SUM/blob/main/docs/FEATURE_CATALOG.md
11
+ Keywords: knowledge-graph,verifiable-credentials,attestation,godel-encoding,semantic-web,agent-cli
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
+ Classifier: Topic :: Security :: Cryptography
20
+ Classifier: Environment :: Console
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: cryptography>=41.0.0
25
+ Requires-Dist: sympy>=1.12
26
+ Provides-Extra: sieve
27
+ Requires-Dist: spacy>=3.7.0; extra == "sieve"
28
+ Provides-Extra: llm
29
+ Requires-Dist: openai<3.0.0,>=1.40.0; extra == "llm"
30
+ Requires-Dist: pydantic>=2.0.0; extra == "llm"
31
+ Provides-Extra: anthropic
32
+ Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
33
+ Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
34
+ Provides-Extra: receipt-verify
35
+ Requires-Dist: joserfc>=1.0.0; extra == "receipt-verify"
36
+ Provides-Extra: mcp
37
+ Requires-Dist: mcp>=1.0.0; extra == "mcp"
38
+ Provides-Extra: research
39
+ Requires-Dist: numpy>=1.24.0; extra == "research"
40
+ Requires-Dist: scipy>=1.10.0; extra == "research"
41
+ Provides-Extra: omni-format
42
+ Requires-Dist: markitdown==0.1.5; extra == "omni-format"
43
+ Provides-Extra: dev
44
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
45
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
46
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
47
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
48
+ Requires-Dist: types-setuptools; extra == "dev"
49
+ Requires-Dist: PyJWT>=2.8.0; extra == "dev"
50
+ Requires-Dist: build>=1.0.0; extra == "dev"
51
+ Requires-Dist: hypothesis>=6.0.0; extra == "dev"
52
+ Provides-Extra: all
53
+ Requires-Dist: sum-engine[sieve]; extra == "all"
54
+ Requires-Dist: sum-engine[llm]; extra == "all"
55
+ Requires-Dist: sum-engine[anthropic]; extra == "all"
56
+ Requires-Dist: sum-engine[receipt-verify]; extra == "all"
57
+ Requires-Dist: sum-engine[mcp]; extra == "all"
58
+ Requires-Dist: sum-engine[omni-format]; extra == "all"
59
+ Requires-Dist: sum-engine[dev]; extra == "all"
60
+ Dynamic: license-file
61
+
62
+ # SUM — verifiable bidirectional knowledge distillation
63
+
64
+ [![CI](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml/badge.svg)](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml)
65
+ [![PyPI — sum-engine](https://img.shields.io/pypi/v/sum-engine.svg?label=PyPI%20sum-engine)](https://pypi.org/project/sum-engine/)
66
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
67
+ [![Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
68
+
69
+ > **A cross-runtime trust surface for LLM-rendered text.** Three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes. Every render through the hosted Worker carries a detached-JWS receipt (`sum.render_receipt.v1`) that any third party can verify offline against `/.well-known/jwks.json`. Live at https://sum-demo.ototao.workers.dev.
70
+
71
+ That is the load-bearing claim and what makes SUM different from a generic summarisation tool. The cryptographic side is **mechanically proven** — three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries; SUM does not blur the line between the two. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
72
+
73
+ Headline supporting numbers (each links to its source of truth):
74
+
75
+ | Claim | Status | Source |
76
+ |---|---|---|
77
+ | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
78
+ | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
79
+ | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
80
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
81
+ | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
82
+
83
+ A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
84
+
85
+ ---
86
+
87
+ ## Verify it yourself in 60 seconds
88
+
89
+ The trust loop: hit the live Worker, get back a tome plus a detached Ed25519 JWS over the JCS-canonicalised receipt payload, fetch the issuer JWKS, verify.
90
+
91
+ ```bash
92
+ # 1. JWKS — single Ed25519 OKP JWK, application/jwk-set+json
93
+ curl -sS https://sum-demo.ototao.workers.dev/.well-known/jwks.json | jq .
94
+ # → {"keys":[{"crv":"Ed25519","kty":"OKP","x":"...","alg":"EdDSA","use":"sig","kid":"sum-render-2026-04-27-1"}]}
95
+
96
+ # 2. Render — tome + render_receipt (signed JWS over JCS payload)
97
+ curl -sS -X POST https://sum-demo.ototao.workers.dev/api/render \
98
+ -H 'content-type: application/json' \
99
+ -d '{"triples":[["alice","graduated","2012"],["alice","born","1990"]],
100
+ "slider_position":{"density":1.0,"length":0.5,"formality":0.7,"audience":0.5,"perspective":0.5}}' \
101
+ | jq '.render_receipt | {schema, kid, payload, jws_segments: (.jws | split(".") | length)}'
102
+ ```
103
+
104
+ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §A.5; the same format is reachable from Python (`joserfc` + `jcs`), Go, and Rust per §3.
105
+
106
+ ---
107
+
108
+ ## What ships today
109
+
110
+ | Surface | Status | Verifies |
111
+ |---|---|---|
112
+ | `pip install 'sum-engine[sieve]'` — `sum attest` / `sum verify` / `sum render` / `sum resolve` / `sum ledger` / `sum inspect` / `sum schema` | shipped on PyPI ≥ 0.4.1 | structural reconstruction; HMAC-SHA256 + Ed25519 signatures (W3C VC 2.0 `eddsa-jcs-2022`); bidirectional `sum attest` ↔ `sum render` symmetry from the shell |
113
+ | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/.well-known/jwks.json` → JWKS; `/api/qid` → Wikidata resolver |
114
+ | Single-file browser demo (`single_file_demo/index.html`) | shipped | paste prose → in-browser attest → CanonicalBundle JSON; same bytes verify under `node standalone_verifier/verify.js` (Chrome / Firefox / Safari with WebCrypto Ed25519 support) |
115
+ | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
116
+ | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned via the Worker (Anthropic, Cloudflare AI Gateway optional) | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable |
117
+ | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
118
+
119
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
120
+
121
+ ### LLM narrative round-trip — closed across measured corpora (2026-04-28)
122
+
123
+ The hardest measurement in `PROOF_BOUNDARY.md` is the full LLM narrative round-trip (`text → LLM-extract → axioms → LLM-generate → prose' → LLM-extract → axioms'`). The unprompted-pipeline baseline on `seed_v1` was **drift = 107.75% / exact-match recall = 0.12** — facts preserved, keys not.
124
+
125
+ A two-layer generator-side intervention (canonical-first generator prompt + constrained-decoding extractor with vocab-pinned `Literal` enums + lemma-exclusion of source-predicate lemmas from the canonical-padding set) now closes this across every measured corpus shape:
126
+
127
+ | Corpus | n_docs | axioms / doc | combined recall | drift_pct | full recall |
128
+ |---|---:|---:|---:|---:|---:|
129
+ | seed_v1 (single-fact SVO) | 50 | 1 | **1.0000** | 0.00 | 50 / 50 |
130
+ | seed_v2 (7 difficulty parse patterns + multi-fact) | 20 | 1–2 | **0.9750** | 5.00 | 19 / 20 |
131
+ | seed_long_paragraphs (16-topic multi-paragraph) | 16 | 11–28 | **0.9972** | 0.57 | 15 / 16 |
132
+
133
+ The combined intervention lands **≥ 0.97 recall and ≤ 5 % drift on every measured corpus shape** — single-fact short-form, multi-fact difficulty-pattern, and multi-paragraph dense-prose. The §2.5 closure is corpus-independent. The remaining gap on each corpus traces to upstream LLM source-extraction artifacts (corrupted axioms on seed_v2 doc_015, semantically-duplicate predicates on seed_long solar_system), not to the intervention pattern.
134
+
135
+ Receipt artifacts:
136
+ - [`fixtures/bench_receipts/s25_generator_side_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_2026-04-28.json) — full ablation matrix on seed_v1.
137
+ - [`fixtures/bench_receipts/s25_residual_closure_2026-04-28.json`](fixtures/bench_receipts/s25_residual_closure_2026-04-28.json) — combined + lemma-exclusion on seed_v1.
138
+ - [`fixtures/bench_receipts/s25_generator_side_seed_v2_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_seed_v2_2026-04-28.json) — all three ablations on seed_v2.
139
+ - [`fixtures/bench_receipts/s25_generator_side_seed_long_combined_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_seed_long_combined_2026-04-28.json) — combined ablation on seed_long_paragraphs.
140
+
141
+ Reproducible: `python -m scripts.bench.runners.s25_generator_side --ablation combined --corpus <path> --out <path>` (~$0.07–$0.20 OpenAI per corpus, ~3–8 min wall clock). Full attribution + per-ablation breakdowns + per-doc failure analysis in [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.5.
142
+
143
+ The deterministic canonical round-trip (the one `sum attest | sum verify` exercises) is **mechanically proven** (§1.1, 0.00% drift). The LLM round-trip is **not**, and this section is here to keep that distinction above the fold.
144
+
145
+ ---
146
+
147
+ ## CLI quick start
148
+
149
+ ```bash
150
+ pip install 'sum-engine[sieve]'
151
+
152
+ echo "Alice likes cats. Bob owns a dog." \
153
+ | sum attest --extractor=sieve > bundle.json
154
+
155
+ sum verify --input bundle.json
156
+ # → sum: ✓ verified 2 axiom(s), state integer matches (hmac=absent, ed25519=absent)
157
+
158
+ sum render < bundle.json > tome.md
159
+ # → bundle's axioms re-emitted as canonical prose; round-trips to the same state integer
160
+ ```
161
+
162
+ The reverse direction also runs under explicit slider control. The local path actions only the density slider; non-neutral length / formality / audience / perspective require the LLM extrapolator and route through the hosted Worker:
163
+
164
+ ```bash
165
+ sum render --density 0.5 < bundle.json
166
+ # → keeps the lex-prefix half of the axioms; @sliders header records what was requested
167
+
168
+ sum render --length 0.9 --use-worker https://sum.ototao.com --json < bundle.json
169
+ # → LLM-conditioned tome + signed render_receipt (sum.render_receipt.v1) on stdout
170
+ ```
171
+
172
+ Add cryptographic attestation with one flag:
173
+
174
+ ```bash
175
+ # Ed25519 / W3C VC 2.0 (eddsa-jcs-2022)
176
+ python -m scripts.generate_did_web --domain your.example --private-key-out keys/issuer.pem
177
+ sum attest --ed25519-key keys/issuer.pem < prose.txt | sum verify --strict
178
+ # → hmac=absent, ed25519=verified
179
+ ```
180
+
181
+ The same bundle bytes verify under `sum verify` (Python), `node standalone_verifier/verify.js` (WebCrypto), and the in-browser demo (SubtleCrypto). [`docs/DID_SETUP.md`](docs/DID_SETUP.md) walks the did:key / did:web issuer setup. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1 documents what the cross-runtime Ed25519 contract proves.
182
+
183
+ ### Calling SUM from MCP-aware LLM clients
184
+
185
+ ```bash
186
+ pip install 'sum-engine[mcp,sieve]'
187
+ # Claude Desktop / Claude Code / Cursor / Continue: add to MCP config:
188
+ # { "mcpServers": { "sum": { "command": "sum-mcp" } } }
189
+ ```
190
+
191
+ `sum-mcp` exposes `extract`, `attest`, `verify`, `inspect`, `schema` as MCP tools. Bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers — same canonical codec. See [`docs/MCP_INTEGRATION.md`](docs/MCP_INTEGRATION.md) for the full client wiring.
192
+
193
+ ### Calling SUM over HTTP
194
+
195
+ The hosted Worker at `https://sum.ototao.com` exposes `/api/render`, `/api/complete`, `/api/qid`, and the `/.well-known/{jwks,revoked-kids}.json` verification surfaces. [`docs/API_REFERENCE.md`](docs/API_REFERENCE.md) is the wire spec — request/response shapes, error codes, the six-step receipt-verification flow, working Node + Python examples. Use this when the caller is a web app, mobile app, or server-side service; use the MCP server when the caller is a local LLM client.
196
+
197
+ ---
198
+
199
+ ## How the trust loop fits together
200
+
201
+ ```
202
+ prose ─► /api/render ─► tome
203
+ + render_receipt {kid, payload, jws}
204
+
205
+
206
+ /.well-known/jwks.json
207
+ (Ed25519 OKP JWK by kid)
208
+
209
+
210
+ jose.flattenedVerify(JCS(payload))
211
+
212
+
213
+ render attested ✓ — issuer signed
214
+ (this tome, these triples, this slider
215
+ position, this model, at this time)
216
+ ```
217
+
218
+ The receipt is a *render attestation*, not a truth oracle. Fact preservation is verified by the bench (NLI audit on weak cells). The receipt is what a downstream system keeps as durable proof; the tome is what a reader consumes. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5.
219
+
220
+ ---
221
+
222
+ ## Underlying substrate
223
+
224
+ Below the slider sits the substrate that earlier phases shipped and verified. Pointers, not paraphrase — every claim links to its source-of-truth doc.
225
+
226
+ - **Canonical round-trip conservation (provable).** `reconstruct(parse(canonical_tome(S))) == S` for every Gödel state `S`. 0.00% drift on `seed_tiny_v1` / `seed_v1` / `seed_v2`. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1.
227
+ - **Cross-runtime state equivalence (provable).** Python (`sympy`), Node (BigInt + Miller-Rabin), in-browser JS produce byte-identical state integers. Locked by 4 harnesses (`make xruntime` + `make xruntime-adversarial`). [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2.
228
+ - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
229
+ - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
230
+ - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
231
+ - **103 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
232
+
233
+ ---
234
+
235
+ ## Reproduce the bench
236
+
237
+ ```bash
238
+ # Short corpus (n=8, 4–12 triples/doc, ~$0.30, ~2 min with NLI)
239
+ bash scripts/bench/run_paragraphs.sh
240
+
241
+ # Long corpus (n=16, 9–24 triples/doc, ~$1.50, ~10 min with NLI)
242
+ bash scripts/bench/run_long_paragraphs.sh
243
+ ```
244
+
245
+ Both runners require `OPENAI_API_KEY` (NLI audit + extraction). Pinned model snapshots are mandatory; the harness raises `SystemExit` on unpinned identifiers (see [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.6). Output is NDJSON `sum.slider_drift_bench.v1`, with per-cell strict / normalized / semantic / NLI fact-preservation columns.
246
+
247
+ ---
248
+
249
+ ## Future developments
250
+
251
+ This roadmap names only unshipped work. Items already landed live in [`CHANGELOG.md`](CHANGELOG.md) `[Unreleased]`. Detailed sequencing lives in [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md).
252
+
253
+ **Closing the LLM round-trip drift.** This is the headline open problem. The full LLM round-trip (`text → LLM-extract → axioms → LLM-generate → prose' → LLM-extract → axioms'`) currently produces 107.75 % drift and 0.12 exact-match recall on `seed_v1` — facts preserved, keys not. Closing this gap is a canonicalisation problem (entity resolution, predicate normalisation, pinned-vocabulary extraction); none of those passes are shipped yet. See [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.5 for the full attribution and per-document failure modes.
254
+
255
+ **Hardening backlog**
256
+
257
+ - `sha256_128_v2` default-activation — Python ↔ Node byte-identity now locked (12-key K1-v2 + 6-state K2-v2 gate runs on every PR; `scripts/verify_godel_v2_cross_runtime.py`). The default scheme stays `sha256_64_v1`; flipping the default is a separate operator decision that requires a `bundle_version` minor bump per `docs/COMPATIBILITY_POLICY.md`. The migration path is now empirically open.
258
+ - `/api/qid` accuracy floor — **measured 2026-04-28** on a 30-term hand-curated corpus across people, places, concepts, and common nouns: **hit-rate 100% (30/30), label-substring-match 100% (24/24, excluding 6 common-noun rows)**. Receipt at [`fixtures/bench_receipts/qid_accuracy_2026-04-28.json`](fixtures/bench_receipts/qid_accuracy_2026-04-28.json) under schema `sum.qid_resolution_accuracy.v1`. **Boundary:** label-substring match accepted `relativity` → `Q201607 (Relativity Records)` — a music-label entity, not the physics theory. The two-tier metric is robust to wbsearchentities's quirks but does not measure semantic-accuracy against canonical Q-IDs; that's a follow-on with hand-verified ground-truth pairs. The current resolver is a thin layer over `wbsearchentities`; SPARQL-driven disambiguation that prefers the most-linked-to entity for ambiguous terms remains an unshipped enhancement.
259
+ - Threat-model validation — every documented defence in [`docs/THREAT_MODEL.md`](docs/THREAT_MODEL.md) gets an executable test.
260
+ - Delta-bundle composition semantics — specifies what `bundle.is_delta` means cross-runtime.
261
+ - Sigstore / cosign signing of release artifacts.
262
+ - LLM-extraction honesty guardrails — `extraction.verifiable: true | false` so signed ≠ true is visible at the consumer interface.
263
+ - Calibration-set authoring for the Venn-Abers conformal-interval implementation that already ships.
264
+ - Remaining sieve recall work on `seed_v2` (apposition / relative-clause / compound-conjunct) — gated on the §2.5 work, see [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §6.
265
+
266
+ **Platform surface (post-hardening)**
267
+
268
+ Source anchoring in the bundle schema, bundle explorer / viewer, `sum verify --explain`, `sum tutorial` onboarding, shareable bundle URLs `/b/{hash}`, PWA-installable demo, `sum attest <url>` fetch mode. Each item names its dependency in [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md).
269
+
270
+ ---
271
+
272
+ ## Verification surface
273
+
274
+ `make help` lists every dev command. Common targets:
275
+
276
+ ```bash
277
+ make install # editable install with sieve + dev extras
278
+ make test # full pytest run (1000+ tests)
279
+ make xruntime # cross-runtime K1/K1-mw/K2/K3/K4 (Python ↔ Node)
280
+ make xruntime-adversarial # rejection-matrix A1–A6
281
+ make fortress # 21-check pure-math invariants
282
+ make smoke # fresh-venv install + attest|verify round-trip
283
+ make demo # open the single-file browser demo
284
+ ```
285
+
286
+ CI runs the full suite on every push (`.github/workflows/quantum-ci.yml`); the `cross-runtime-harness` job runs K1–K4 + A1–A6 on Node 22; `pypi-install-smoke` builds the wheel and runs `echo prose | sum attest | sum verify` in a throwaway venv.
287
+
288
+ ---
289
+
290
+ ## Truthfulness contract
291
+
292
+ Every claim in this repo carries an explicit epistemic status — `provable`, `certified`, `empirical-benchmark`, or `expert-opinion`. The arbiter is [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md). A summary surface that quotes an empirical-benchmark number alongside language like "mathematically guaranteed" is a policy violation per §5 and must be corrected.
293
+
294
+ Performance language (`fast`, `efficient`, `low-latency`, `scalable`) requires a benchmark in the same commit. Adversarial input agreement (the A-matrix) is a separate proof from valid-input agreement (the K-matrix); both run in CI.
295
+
296
+ If a number in this README disagrees with [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) or [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md), the docs are canonical and this README is wrong.
297
+
298
+ ---
299
+
300
+ ## Contributing
301
+
302
+ 1. Fork and branch.
303
+ 2. `make install && make test && make xruntime`.
304
+ 3. Read [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md) for principles, stop-the-line triggers, and the work-ordering rule.
305
+ 4. Open a PR. Every claim added to docs or commit messages must trace to a test, a measurement, or an explicit `designed, not proved` label.
306
+
307
+ [`CONTRIBUTING.md`](CONTRIBUTING.md) has the test-gate matrix and the verification-gate runbook.
308
+
309
+ ---
310
+
311
+ ## License
312
+
313
+ Apache 2.0. See [`LICENSE`](LICENSE).
@@ -0,0 +1,252 @@
1
+ # SUM — verifiable bidirectional knowledge distillation
2
+
3
+ [![CI](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml/badge.svg)](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml)
4
+ [![PyPI — sum-engine](https://img.shields.io/pypi/v/sum-engine.svg?label=PyPI%20sum-engine)](https://pypi.org/project/sum-engine/)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
6
+ [![Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
7
+
8
+ > **A cross-runtime trust surface for LLM-rendered text.** Three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes. Every render through the hosted Worker carries a detached-JWS receipt (`sum.render_receipt.v1`) that any third party can verify offline against `/.well-known/jwks.json`. Live at https://sum-demo.ototao.workers.dev.
9
+
10
+ That is the load-bearing claim and what makes SUM different from a generic summarisation tool. The cryptographic side is **mechanically proven** — three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries; SUM does not blur the line between the two. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
11
+
12
+ Headline supporting numbers (each links to its source of truth):
13
+
14
+ | Claim | Status | Source |
15
+ |---|---|---|
16
+ | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
17
+ | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
18
+ | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
19
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
20
+ | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
21
+
22
+ A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
23
+
24
+ ---
25
+
26
+ ## Verify it yourself in 60 seconds
27
+
28
+ The trust loop: hit the live Worker, get back a tome plus a detached Ed25519 JWS over the JCS-canonicalised receipt payload, fetch the issuer JWKS, verify.
29
+
30
+ ```bash
31
+ # 1. JWKS — single Ed25519 OKP JWK, application/jwk-set+json
32
+ curl -sS https://sum-demo.ototao.workers.dev/.well-known/jwks.json | jq .
33
+ # → {"keys":[{"crv":"Ed25519","kty":"OKP","x":"...","alg":"EdDSA","use":"sig","kid":"sum-render-2026-04-27-1"}]}
34
+
35
+ # 2. Render — tome + render_receipt (signed JWS over JCS payload)
36
+ curl -sS -X POST https://sum-demo.ototao.workers.dev/api/render \
37
+ -H 'content-type: application/json' \
38
+ -d '{"triples":[["alice","graduated","2012"],["alice","born","1990"]],
39
+ "slider_position":{"density":1.0,"length":0.5,"formality":0.7,"audience":0.5,"perspective":0.5}}' \
40
+ | jq '.render_receipt | {schema, kid, payload, jws_segments: (.jws | split(".") | length)}'
41
+ ```
42
+
43
+ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §A.5; the same format is reachable from Python (`joserfc` + `jcs`), Go, and Rust per §3.
44
+
45
+ ---
46
+
47
+ ## What ships today
48
+
49
+ | Surface | Status | Verifies |
50
+ |---|---|---|
51
+ | `pip install 'sum-engine[sieve]'` — `sum attest` / `sum verify` / `sum render` / `sum resolve` / `sum ledger` / `sum inspect` / `sum schema` | shipped on PyPI ≥ 0.4.1 | structural reconstruction; HMAC-SHA256 + Ed25519 signatures (W3C VC 2.0 `eddsa-jcs-2022`); bidirectional `sum attest` ↔ `sum render` symmetry from the shell |
52
+ | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/.well-known/jwks.json` → JWKS; `/api/qid` → Wikidata resolver |
53
+ | Single-file browser demo (`single_file_demo/index.html`) | shipped | paste prose → in-browser attest → CanonicalBundle JSON; same bytes verify under `node standalone_verifier/verify.js` (Chrome / Firefox / Safari with WebCrypto Ed25519 support) |
54
+ | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
55
+ | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned via the Worker (Anthropic, Cloudflare AI Gateway optional) | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable |
56
+ | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
57
+
58
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
59
+
60
+ ### LLM narrative round-trip — closed across measured corpora (2026-04-28)
61
+
62
+ The hardest measurement in `PROOF_BOUNDARY.md` is the full LLM narrative round-trip (`text → LLM-extract → axioms → LLM-generate → prose' → LLM-extract → axioms'`). The unprompted-pipeline baseline on `seed_v1` was **drift = 107.75% / exact-match recall = 0.12** — facts preserved, keys not.
63
+
64
+ A two-layer generator-side intervention (canonical-first generator prompt + constrained-decoding extractor with vocab-pinned `Literal` enums + lemma-exclusion of source-predicate lemmas from the canonical-padding set) now closes this across every measured corpus shape:
65
+
66
+ | Corpus | n_docs | axioms / doc | combined recall | drift_pct | full recall |
67
+ |---|---:|---:|---:|---:|---:|
68
+ | seed_v1 (single-fact SVO) | 50 | 1 | **1.0000** | 0.00 | 50 / 50 |
69
+ | seed_v2 (7 difficulty parse patterns + multi-fact) | 20 | 1–2 | **0.9750** | 5.00 | 19 / 20 |
70
+ | seed_long_paragraphs (16-topic multi-paragraph) | 16 | 11–28 | **0.9972** | 0.57 | 15 / 16 |
71
+
72
+ The combined intervention lands **≥ 0.97 recall and ≤ 5 % drift on every measured corpus shape** — single-fact short-form, multi-fact difficulty-pattern, and multi-paragraph dense-prose. The §2.5 closure is corpus-independent. The remaining gap on each corpus traces to upstream LLM source-extraction artifacts (corrupted axioms on seed_v2 doc_015, semantically-duplicate predicates on seed_long solar_system), not to the intervention pattern.
73
+
74
+ Receipt artifacts:
75
+ - [`fixtures/bench_receipts/s25_generator_side_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_2026-04-28.json) — full ablation matrix on seed_v1.
76
+ - [`fixtures/bench_receipts/s25_residual_closure_2026-04-28.json`](fixtures/bench_receipts/s25_residual_closure_2026-04-28.json) — combined + lemma-exclusion on seed_v1.
77
+ - [`fixtures/bench_receipts/s25_generator_side_seed_v2_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_seed_v2_2026-04-28.json) — all three ablations on seed_v2.
78
+ - [`fixtures/bench_receipts/s25_generator_side_seed_long_combined_2026-04-28.json`](fixtures/bench_receipts/s25_generator_side_seed_long_combined_2026-04-28.json) — combined ablation on seed_long_paragraphs.
79
+
80
+ Reproducible: `python -m scripts.bench.runners.s25_generator_side --ablation combined --corpus <path> --out <path>` (~$0.07–$0.20 OpenAI per corpus, ~3–8 min wall clock). Full attribution + per-ablation breakdowns + per-doc failure analysis in [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.5.
81
+
82
+ The deterministic canonical round-trip (the one `sum attest | sum verify` exercises) is **mechanically proven** (§1.1, 0.00% drift). The LLM round-trip is **not**, and this section is here to keep that distinction above the fold.
83
+
84
+ ---
85
+
86
+ ## CLI quick start
87
+
88
+ ```bash
89
+ pip install 'sum-engine[sieve]'
90
+
91
+ echo "Alice likes cats. Bob owns a dog." \
92
+ | sum attest --extractor=sieve > bundle.json
93
+
94
+ sum verify --input bundle.json
95
+ # → sum: ✓ verified 2 axiom(s), state integer matches (hmac=absent, ed25519=absent)
96
+
97
+ sum render < bundle.json > tome.md
98
+ # → bundle's axioms re-emitted as canonical prose; round-trips to the same state integer
99
+ ```
100
+
101
+ The reverse direction also runs under explicit slider control. The local path actions only the density slider; non-neutral length / formality / audience / perspective require the LLM extrapolator and route through the hosted Worker:
102
+
103
+ ```bash
104
+ sum render --density 0.5 < bundle.json
105
+ # → keeps the lex-prefix half of the axioms; @sliders header records what was requested
106
+
107
+ sum render --length 0.9 --use-worker https://sum.ototao.com --json < bundle.json
108
+ # → LLM-conditioned tome + signed render_receipt (sum.render_receipt.v1) on stdout
109
+ ```
110
+
111
+ Add cryptographic attestation with one flag:
112
+
113
+ ```bash
114
+ # Ed25519 / W3C VC 2.0 (eddsa-jcs-2022)
115
+ python -m scripts.generate_did_web --domain your.example --private-key-out keys/issuer.pem
116
+ sum attest --ed25519-key keys/issuer.pem < prose.txt | sum verify --strict
117
+ # → hmac=absent, ed25519=verified
118
+ ```
119
+
120
+ The same bundle bytes verify under `sum verify` (Python), `node standalone_verifier/verify.js` (WebCrypto), and the in-browser demo (SubtleCrypto). [`docs/DID_SETUP.md`](docs/DID_SETUP.md) walks the did:key / did:web issuer setup. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1 documents what the cross-runtime Ed25519 contract proves.
121
+
122
+ ### Calling SUM from MCP-aware LLM clients
123
+
124
+ ```bash
125
+ pip install 'sum-engine[mcp,sieve]'
126
+ # Claude Desktop / Claude Code / Cursor / Continue: add to MCP config:
127
+ # { "mcpServers": { "sum": { "command": "sum-mcp" } } }
128
+ ```
129
+
130
+ `sum-mcp` exposes `extract`, `attest`, `verify`, `inspect`, `schema` as MCP tools. Bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers — same canonical codec. See [`docs/MCP_INTEGRATION.md`](docs/MCP_INTEGRATION.md) for the full client wiring.
131
+
132
+ ### Calling SUM over HTTP
133
+
134
+ The hosted Worker at `https://sum.ototao.com` exposes `/api/render`, `/api/complete`, `/api/qid`, and the `/.well-known/{jwks,revoked-kids}.json` verification surfaces. [`docs/API_REFERENCE.md`](docs/API_REFERENCE.md) is the wire spec — request/response shapes, error codes, the six-step receipt-verification flow, working Node + Python examples. Use this when the caller is a web app, mobile app, or server-side service; use the MCP server when the caller is a local LLM client.
135
+
136
+ ---
137
+
138
+ ## How the trust loop fits together
139
+
140
+ ```
141
+ prose ─► /api/render ─► tome
142
+ + render_receipt {kid, payload, jws}
143
+
144
+
145
+ /.well-known/jwks.json
146
+ (Ed25519 OKP JWK by kid)
147
+
148
+
149
+ jose.flattenedVerify(JCS(payload))
150
+
151
+
152
+ render attested ✓ — issuer signed
153
+ (this tome, these triples, this slider
154
+ position, this model, at this time)
155
+ ```
156
+
157
+ The receipt is a *render attestation*, not a truth oracle. Fact preservation is verified by the bench (NLI audit on weak cells). The receipt is what a downstream system keeps as durable proof; the tome is what a reader consumes. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5.
158
+
159
+ ---
160
+
161
+ ## Underlying substrate
162
+
163
+ Below the slider sits the substrate that earlier phases shipped and verified. Pointers, not paraphrase — every claim links to its source-of-truth doc.
164
+
165
+ - **Canonical round-trip conservation (provable).** `reconstruct(parse(canonical_tome(S))) == S` for every Gödel state `S`. 0.00% drift on `seed_tiny_v1` / `seed_v1` / `seed_v2`. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1.
166
+ - **Cross-runtime state equivalence (provable).** Python (`sympy`), Node (BigInt + Miller-Rabin), in-browser JS produce byte-identical state integers. Locked by 4 harnesses (`make xruntime` + `make xruntime-adversarial`). [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2.
167
+ - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
168
+ - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
169
+ - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
170
+ - **103 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
171
+
172
+ ---
173
+
174
+ ## Reproduce the bench
175
+
176
+ ```bash
177
+ # Short corpus (n=8, 4–12 triples/doc, ~$0.30, ~2 min with NLI)
178
+ bash scripts/bench/run_paragraphs.sh
179
+
180
+ # Long corpus (n=16, 9–24 triples/doc, ~$1.50, ~10 min with NLI)
181
+ bash scripts/bench/run_long_paragraphs.sh
182
+ ```
183
+
184
+ Both runners require `OPENAI_API_KEY` (NLI audit + extraction). Pinned model snapshots are mandatory; the harness raises `SystemExit` on unpinned identifiers (see [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.6). Output is NDJSON `sum.slider_drift_bench.v1`, with per-cell strict / normalized / semantic / NLI fact-preservation columns.
185
+
186
+ ---
187
+
188
+ ## Future developments
189
+
190
+ This roadmap names only unshipped work. Items already landed live in [`CHANGELOG.md`](CHANGELOG.md) `[Unreleased]`. Detailed sequencing lives in [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md).
191
+
192
+ **Closing the LLM round-trip drift.** This is the headline open problem. The full LLM round-trip (`text → LLM-extract → axioms → LLM-generate → prose' → LLM-extract → axioms'`) currently produces 107.75 % drift and 0.12 exact-match recall on `seed_v1` — facts preserved, keys not. Closing this gap is a canonicalisation problem (entity resolution, predicate normalisation, pinned-vocabulary extraction); none of those passes are shipped yet. See [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.5 for the full attribution and per-document failure modes.
193
+
194
+ **Hardening backlog**
195
+
196
+ - `sha256_128_v2` default-activation — Python ↔ Node byte-identity now locked (12-key K1-v2 + 6-state K2-v2 gate runs on every PR; `scripts/verify_godel_v2_cross_runtime.py`). The default scheme stays `sha256_64_v1`; flipping the default is a separate operator decision that requires a `bundle_version` minor bump per `docs/COMPATIBILITY_POLICY.md`. The migration path is now empirically open.
197
+ - `/api/qid` accuracy floor — **measured 2026-04-28** on a 30-term hand-curated corpus across people, places, concepts, and common nouns: **hit-rate 100% (30/30), label-substring-match 100% (24/24, excluding 6 common-noun rows)**. Receipt at [`fixtures/bench_receipts/qid_accuracy_2026-04-28.json`](fixtures/bench_receipts/qid_accuracy_2026-04-28.json) under schema `sum.qid_resolution_accuracy.v1`. **Boundary:** label-substring match accepted `relativity` → `Q201607 (Relativity Records)` — a music-label entity, not the physics theory. The two-tier metric is robust to wbsearchentities's quirks but does not measure semantic-accuracy against canonical Q-IDs; that's a follow-on with hand-verified ground-truth pairs. The current resolver is a thin layer over `wbsearchentities`; SPARQL-driven disambiguation that prefers the most-linked-to entity for ambiguous terms remains an unshipped enhancement.
198
+ - Threat-model validation — every documented defence in [`docs/THREAT_MODEL.md`](docs/THREAT_MODEL.md) gets an executable test.
199
+ - Delta-bundle composition semantics — specifies what `bundle.is_delta` means cross-runtime.
200
+ - Sigstore / cosign signing of release artifacts.
201
+ - LLM-extraction honesty guardrails — `extraction.verifiable: true | false` so signed ≠ true is visible at the consumer interface.
202
+ - Calibration-set authoring for the Venn-Abers conformal-interval implementation that already ships.
203
+ - Remaining sieve recall work on `seed_v2` (apposition / relative-clause / compound-conjunct) — gated on the §2.5 work, see [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §6.
204
+
205
+ **Platform surface (post-hardening)**
206
+
207
+ Source anchoring in the bundle schema, bundle explorer / viewer, `sum verify --explain`, `sum tutorial` onboarding, shareable bundle URLs `/b/{hash}`, PWA-installable demo, `sum attest <url>` fetch mode. Each item names its dependency in [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md).
208
+
209
+ ---
210
+
211
+ ## Verification surface
212
+
213
+ `make help` lists every dev command. Common targets:
214
+
215
+ ```bash
216
+ make install # editable install with sieve + dev extras
217
+ make test # full pytest run (1000+ tests)
218
+ make xruntime # cross-runtime K1/K1-mw/K2/K3/K4 (Python ↔ Node)
219
+ make xruntime-adversarial # rejection-matrix A1–A6
220
+ make fortress # 21-check pure-math invariants
221
+ make smoke # fresh-venv install + attest|verify round-trip
222
+ make demo # open the single-file browser demo
223
+ ```
224
+
225
+ CI runs the full suite on every push (`.github/workflows/quantum-ci.yml`); the `cross-runtime-harness` job runs K1–K4 + A1–A6 on Node 22; `pypi-install-smoke` builds the wheel and runs `echo prose | sum attest | sum verify` in a throwaway venv.
226
+
227
+ ---
228
+
229
+ ## Truthfulness contract
230
+
231
+ Every claim in this repo carries an explicit epistemic status — `provable`, `certified`, `empirical-benchmark`, or `expert-opinion`. The arbiter is [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md). A summary surface that quotes an empirical-benchmark number alongside language like "mathematically guaranteed" is a policy violation per §5 and must be corrected.
232
+
233
+ Performance language (`fast`, `efficient`, `low-latency`, `scalable`) requires a benchmark in the same commit. Adversarial input agreement (the A-matrix) is a separate proof from valid-input agreement (the K-matrix); both run in CI.
234
+
235
+ If a number in this README disagrees with [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) or [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md), the docs are canonical and this README is wrong.
236
+
237
+ ---
238
+
239
+ ## Contributing
240
+
241
+ 1. Fork and branch.
242
+ 2. `make install && make test && make xruntime`.
243
+ 3. Read [`docs/NEXT_SESSION_PLAYBOOK.md`](docs/NEXT_SESSION_PLAYBOOK.md) for principles, stop-the-line triggers, and the work-ordering rule.
244
+ 4. Open a PR. Every claim added to docs or commit messages must trace to a test, a measurement, or an explicit `designed, not proved` label.
245
+
246
+ [`CONTRIBUTING.md`](CONTRIBUTING.md) has the test-gate matrix and the verification-gate runbook.
247
+
248
+ ---
249
+
250
+ ## License
251
+
252
+ Apache 2.0. See [`LICENSE`](LICENSE).