sum-engine 0.6.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {sum_engine-0.6.0/sum_engine.egg-info → sum_engine-0.7.1}/PKG-INFO +59 -11
  2. {sum_engine-0.6.0 → sum_engine-0.7.1}/README.md +55 -8
  3. {sum_engine-0.6.0 → sum_engine-0.7.1}/pyproject.toml +26 -3
  4. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_cli/main.py +528 -2
  5. {sum_engine-0.6.0 → sum_engine-0.7.1/sum_engine.egg-info}/PKG-INFO +59 -11
  6. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine.egg-info/SOURCES.txt +11 -0
  7. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine.egg-info/requires.txt +3 -2
  8. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/live_llm_adapter.py +217 -8
  9. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/llm_dispatch.py +20 -0
  10. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/akashic_ledger.py +40 -4
  11. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/jose_envelope.py +101 -0
  12. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/render_receipt/verifier.py +11 -1
  13. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/__init__.py +12 -0
  14. sum_engine-0.7.1/sum_engine_internal/research/conformal/risk_control.py +207 -0
  15. sum_engine-0.7.1/sum_engine_internal/transform_receipt/__init__.py +55 -0
  16. sum_engine-0.7.1/sum_engine_internal/transform_receipt/format.py +215 -0
  17. sum_engine-0.7.1/sum_engine_internal/transform_receipt/sign.py +50 -0
  18. sum_engine-0.7.1/sum_engine_internal/transform_receipt/verifier.py +132 -0
  19. sum_engine-0.7.1/sum_engine_internal/transforms/__init__.py +104 -0
  20. sum_engine-0.7.1/sum_engine_internal/transforms/_base.py +203 -0
  21. sum_engine-0.7.1/sum_engine_internal/transforms/compose.py +304 -0
  22. sum_engine-0.7.1/sum_engine_internal/transforms/extract.py +406 -0
  23. sum_engine-0.7.1/sum_engine_internal/transforms/share.py +231 -0
  24. sum_engine-0.7.1/sum_engine_internal/transforms/slider.py +380 -0
  25. {sum_engine-0.6.0 → sum_engine-0.7.1}/LICENSE +0 -0
  26. {sum_engine-0.6.0 → sum_engine-0.7.1}/setup.cfg +0 -0
  27. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_cli/__init__.py +0 -0
  28. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_cli/audit_log.py +0 -0
  29. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine.egg-info/dependency_links.txt +0 -0
  30. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine.egg-info/entry_points.txt +0 -0
  31. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine.egg-info/top_level.txt +0 -0
  32. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/__init__.py +0 -0
  33. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/adapters/__init__.py +0 -0
  34. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/adapters/format_pivot.py +0 -0
  35. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/__init__.py +0 -0
  36. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/bind.py +0 -0
  37. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/mcp_bind.py +0 -0
  38. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/__init__.py +0 -0
  39. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/causal_discovery.py +0 -0
  40. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/chunked_corpus.py +0 -0
  41. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/minhash.py +0 -0
  42. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/predicate_canon.py +0 -0
  43. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/semantic_arithmetic.py +0 -0
  44. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/syntactic_sieve.py +0 -0
  45. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/zk_semantics.py +0 -0
  46. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/__init__.py +0 -0
  47. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/_predicates.py +0 -0
  48. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/eu_ai_act_article_12.py +0 -0
  49. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/gdpr_article_30.py +0 -0
  50. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/hipaa_164_312_b.py +0 -0
  51. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/iso_27001_8_15.py +0 -0
  52. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/pci_dss_4_req_10.py +0 -0
  53. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/report.py +0 -0
  54. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/soc_2_cc_7_2.py +0 -0
  55. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/__init__.py +0 -0
  56. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/automated_scientist.py +0 -0
  57. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/autonomous_agent.py +0 -0
  58. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/causal_triggers.py +0 -0
  59. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/confidence_calibrator.py +0 -0
  60. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/__init__.py +0 -0
  61. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/common_english_2000.txt +0 -0
  62. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/common_english_5000.txt +0 -0
  63. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/epistemic_arbiter.py +0 -0
  64. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/epistemic_loop.py +0 -0
  65. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/extraction_validator.py +0 -0
  66. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/gauge_orchestrator.py +0 -0
  67. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/llm_entailment.py +0 -0
  68. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/mass_semantic_engine.py +0 -0
  69. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/ouroboros.py +0 -0
  70. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/s25_interventions.py +0 -0
  71. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/semantic_dedup.py +0 -0
  72. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/slider_renderer.py +0 -0
  73. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/tome_generator.py +0 -0
  74. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/tome_sliders.py +0 -0
  75. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/vector_bridge.py +0 -0
  76. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/venn_abers.py +0 -0
  77. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/evidence/__init__.py +0 -0
  78. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/evidence/chain.py +0 -0
  79. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/__init__.py +0 -0
  80. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/base.py +0 -0
  81. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/egglog_store.py +0 -0
  82. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/unionfind_store.py +0 -0
  83. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/__init__.py +0 -0
  84. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/canonical_codec.py +0 -0
  85. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/jcs.py +0 -0
  86. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/key_manager.py +0 -0
  87. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/p2p_mesh.py +0 -0
  88. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/prov_o.py +0 -0
  89. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/provenance.py +0 -0
  90. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/rate_limiter.py +0 -0
  91. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/resource_guards.py +0 -0
  92. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/scheme_registry.py +0 -0
  93. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/state_encoding.py +0 -0
  94. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/telemetry.py +0 -0
  95. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/tome_parser.py +0 -0
  96. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/verifiable_credential.py +0 -0
  97. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/zig_bridge.py +0 -0
  98. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/__init__.py +0 -0
  99. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/__main__.py +0 -0
  100. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/errors.py +0 -0
  101. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/server.py +0 -0
  102. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/merkle_sidecar/__init__.py +0 -0
  103. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/merkle_sidecar/tree.py +0 -0
  104. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/render_receipt/__init__.py +0 -0
  105. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/__init__.py +0 -0
  106. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/bootstrap/__init__.py +0 -0
  107. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/bootstrap/multiplier_bootstrap.py +0 -0
  108. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/entropy_baseline.py +0 -0
  109. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/split_conformal.py +0 -0
  110. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/lsh/__init__.py +0 -0
  111. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/lsh/bundle_index.py +0 -0
  112. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/__init__.py +0 -0
  113. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/baseline.py +0 -0
  114. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/mmd.py +0 -0
  115. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/__init__.py +0 -0
  116. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/axiom_embedding.py +0 -0
  117. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/pcp.py +0 -0
  118. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sequential/__init__.py +0 -0
  119. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sequential/sprt.py +0 -0
  120. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian.py +0 -0
  121. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v2.py +0 -0
  122. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v3.py +0 -0
  123. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v32.py +0 -0
  124. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/__init__.py +0 -0
  125. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/consistency.py +0 -0
  126. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/predicate_library.py +0 -0
  127. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/spectral_entropy/__init__.py +0 -0
  128. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/research/spectral_entropy/vn_entropy.py +0 -0
  129. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/trust_root/__init__.py +0 -0
  130. {sum_engine-0.6.0 → sum_engine-0.7.1}/sum_engine_internal/trust_root/verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sum-engine
3
- Version: 0.6.0
3
+ Version: 0.7.1
4
4
  Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
5
5
  Author: ototao
6
6
  License: Apache-2.0
@@ -24,7 +24,8 @@ License-File: LICENSE
24
24
  Requires-Dist: cryptography>=41.0.0
25
25
  Requires-Dist: sympy>=1.12
26
26
  Provides-Extra: sieve
27
- Requires-Dist: spacy>=3.7.0; extra == "sieve"
27
+ Requires-Dist: spacy>=3.8.0; extra == "sieve"
28
+ Requires-Dist: click>=8.0; extra == "sieve"
28
29
  Provides-Extra: openai
29
30
  Requires-Dist: openai<3.0.0,>=1.40.0; extra == "openai"
30
31
  Requires-Dist: pydantic>=2.0.0; extra == "openai"
@@ -34,7 +35,7 @@ Provides-Extra: anthropic
34
35
  Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
35
36
  Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
36
37
  Provides-Extra: receipt-verify
37
- Requires-Dist: joserfc>=1.0.0; extra == "receipt-verify"
38
+ Requires-Dist: joserfc<2.0.0,>=1.0.0; extra == "receipt-verify"
38
39
  Provides-Extra: mcp
39
40
  Requires-Dist: mcp>=1.0.0; extra == "mcp"
40
41
  Provides-Extra: research
@@ -68,9 +69,15 @@ Dynamic: license-file
68
69
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
69
70
  [![Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
70
71
 
71
- > **A cross-runtime trust surface for LLM-rendered text.** Three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes. Every render through the hosted Worker carries a detached-JWS receipt (`sum.render_receipt.v1`) that any third party can verify offline against `/.well-known/jwks.json`. Live at https://sum-demo.ototao.workers.dev.
72
+ > **SUM lets people and agents transform knowledge without losing the ability to verify what changed, what stayed the same, who signed it, and what remains unproven.**
72
73
 
73
- That is the load-bearing claim and what makes SUM different from a generic summarisation tool. The cryptographic side is **mechanically proven**three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries; SUM does not blur the line between the two. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
74
+ Every transformation extract triples from prose, render a tome at a controlled slider position, compose bundles across documents, share a render emits a cryptographically-signed receipt that any third party can verify offline. The receipt attests *that the transformation happened and what its inputs were*. Separate per-axis benchmarks attest *how much the transformation preserved meaning*. Both are kept honest by separate proof discipline — and the project never blurs the line between them.
75
+
76
+ *Live trust loop:* https://sum-demo.ototao.workers.dev — three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes; verify offline against `/.well-known/jwks.json`. Mechanically proven; locked in CI on every PR.
77
+
78
+ **Built for:** journalists working under deepfake-era citation requirements, academic survey writers who need provenance back to source PDFs, agentic-AI builders who need their agents to pass verifiable evidence and not just messages, and regulated-domain content (EU AI Act Article 12, FTC AI disclosure, HIPAA, SOC 2, PCI DSS) where "we say it's true" isn't enough.
79
+
80
+ The cryptographic side is **mechanically proven** — three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
74
81
 
75
82
  Headline supporting numbers (each links to its source of truth):
76
83
 
@@ -79,7 +86,7 @@ Headline supporting numbers (each links to its source of truth):
79
86
  | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
80
87
  | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
81
88
  | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
82
- | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
89
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
83
90
  | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
84
91
 
85
92
  A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
@@ -112,13 +119,28 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
112
119
  | Surface | Status | Verifies |
113
120
  |---|---|---|
114
121
  | `pip install 'sum-engine[sieve]'` — `sum attest` / `sum verify` / `sum render` / `sum resolve` / `sum ledger` / `sum inspect` / `sum schema` | shipped on PyPI ≥ 0.4.1 | structural reconstruction; HMAC-SHA256 + Ed25519 signatures (W3C VC 2.0 `eddsa-jcs-2022`); bidirectional `sum attest` ↔ `sum render` symmetry from the shell |
115
- | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/.well-known/jwks.json` → JWKS; `/api/qid` → Wikidata resolver |
122
+ | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/api/transform` → generic transform-registry dispatch + `sum.transform_receipt.v1`; `/api/complete` → LLM proxy; `/api/qid` → Wikidata resolver; `/.well-known/jwks.json` + `/.well-known/revoked-kids.json` → trust-loop endpoints. Public LLM-axis routes are rate-limited per IP — see [`docs/PUBLIC_API_RATE_LIMITS.md`](docs/PUBLIC_API_RATE_LIMITS.md) (5/day operator-keyed demo; 100/hr with BYO key via `X-Render-LLM-Key-Anthropic` / `-OpenAI`). |
116
123
  | Single-file browser demo (`single_file_demo/index.html`) | shipped | paste prose → in-browser attest → CanonicalBundle JSON; same bytes verify under `node standalone_verifier/verify.js` (Chrome / Firefox / Safari with WebCrypto Ed25519 support) |
117
124
  | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
118
- | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned via the Worker (Anthropic, Cloudflare AI Gateway optional) | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable |
125
+ | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
119
126
  | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
127
+ | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI ≥ 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
128
+ | Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
129
+ | `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
130
+ | Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
131
+ | Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
132
+
133
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
120
134
 
121
- The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
135
+ ## Strategic context
136
+
137
+ The operational compass — read in this order if you want the project's intent + how it operates + where it's going:
138
+
139
+ - [`docs/CHARTER_2026-05-17.md`](docs/CHARTER_2026-05-17.md) — intent, the Why, strategy, objectives, success criteria, constraints, and the operational loop. The compass every other doc resolves to.
140
+ - [`docs/PRODUCT_DELIBERATION_2026-05-14.md`](docs/PRODUCT_DELIBERATION_2026-05-14.md) — three-option strategic analysis + grant-outcome decision tree.
141
+ - [`docs/ZENITH_FRAMING_2026-05-16.md`](docs/ZENITH_FRAMING_2026-05-16.md) — destination framing (SUM as chain-of-custody for AI-transformed knowledge) plus three new concepts (Perspective Receipts, Trust Profiles, Epistemic Nutrition Label) on the design queue.
142
+ - [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) — five-task empirical-benchmark hardening plan (T1–T5; T5 shipped, T1–T4 queued).
143
+ - [`docs/DOGFOOD_QUICKSTART.md`](docs/DOGFOOD_QUICKSTART.md) — five-minute guide to running SUM on your own writing.
122
144
 
123
145
  ### LLM narrative round-trip — closed across measured corpora (2026-04-28)
124
146
 
@@ -230,7 +252,33 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
230
252
  - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
231
253
  - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
232
254
  - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
233
- - **103 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
255
+ - **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
256
+
257
+ ### Research substrate (under `sum_engine_internal/research/`)
258
+
259
+ Less-surfaced but shipped:
260
+
261
+ - **MinHash-LSH bundle similarity index** (`research/lsh/`) — near-duplicate bundle detection at scale.
262
+ - **Robust PCA corruption score** (`research/robust_pca/`) — `corruption_score` field in bundle metadata; flags adversarially-perturbed bundles.
263
+ - **Sequential & conformal-prediction** (`research/sequential/`, `research/conformal/`) — bench-side confidence bounds with documented coverage guarantees.
264
+ - **MMD distribution distance** (`research/mmd/`) — `axiom_distribution_mmd` field on bundles; surfaces when an attested bundle is structurally unlike its baseline corpus.
265
+ - **Spectral entropy** (`research/spectral_entropy/`) — axiom-graph entropy on every bundle, with confidence interval.
266
+ - **Bootstrap multiplier spike detection** (`research/bootstrap/`) — see [`docs/MULTIPLIER_BOOTSTRAP_SPIKE_FINDINGS.md`](docs/MULTIPLIER_BOOTSTRAP_SPIKE_FINDINGS.md).
267
+ - **SMT consistency checking** (`research/smt_consistency/`) — z3-backed `axiom_consistency_check` on every bundle.
268
+ - **Sheaf-Laplacian hallucination detection** — see [`docs/SHEAF_HALLUCINATION_DETECTOR.md`](docs/SHEAF_HALLUCINATION_DETECTOR.md) (research direction).
269
+
270
+ ### Other substrate-adjacent surfaces
271
+
272
+ - **Trust-root manifest** (`sum_engine_internal/trust_root/`) — operator-issued signed manifest binding kid lifecycle, revocation policy, and verifier expectations.
273
+ - **Merkle sidecar format** (`sum_engine_internal/merkle_sidecar/`) — see [`docs/MERKLE_SIDECAR_FORMAT.md`](docs/MERKLE_SIDECAR_FORMAT.md).
274
+ - **Evidence-chain layer** (`sum_engine_internal/evidence/`) — substrate behind `source_chain_hash` (T4).
275
+ - **Algorithm registry** — see [`docs/ALGORITHM_REGISTRY.md`](docs/ALGORITHM_REGISTRY.md) (the in-tree list of permitted signing algs; crypto-agility gate).
276
+ - **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
277
+ - **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
278
+
279
+ ### Internal research surfaces (NOT shipped, present in repo)
280
+
281
+ - **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
234
282
 
235
283
  ---
236
284
 
@@ -277,7 +325,7 @@ Source anchoring in the bundle schema, bundle explorer / viewer, `sum verify --e
277
325
 
278
326
  ```bash
279
327
  make install # editable install with sieve + dev extras
280
- make test # full pytest run (1000+ tests)
328
+ make test # full pytest run (2000+ tests)
281
329
  make xruntime # cross-runtime K1/K1-mw/K2/K3/K4 (Python ↔ Node)
282
330
  make xruntime-adversarial # rejection-matrix A1–A6
283
331
  make fortress # 21-check pure-math invariants
@@ -5,9 +5,15 @@
5
5
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
6
6
  [![Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
7
7
 
8
- > **A cross-runtime trust surface for LLM-rendered text.** Three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes. Every render through the hosted Worker carries a detached-JWS receipt (`sum.render_receipt.v1`) that any third party can verify offline against `/.well-known/jwks.json`. Live at https://sum-demo.ototao.workers.dev.
8
+ > **SUM lets people and agents transform knowledge without losing the ability to verify what changed, what stayed the same, who signed it, and what remains unproven.**
9
9
 
10
- That is the load-bearing claim and what makes SUM different from a generic summarisation tool. The cryptographic side is **mechanically proven**three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries; SUM does not blur the line between the two. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
10
+ Every transformation extract triples from prose, render a tome at a controlled slider position, compose bundles across documents, share a render emits a cryptographically-signed receipt that any third party can verify offline. The receipt attests *that the transformation happened and what its inputs were*. Separate per-axis benchmarks attest *how much the transformation preserved meaning*. Both are kept honest by separate proof discipline — and the project never blurs the line between them.
11
+
12
+ *Live trust loop:* https://sum-demo.ototao.workers.dev — three runtimes (Python, Node, modern browsers) produce byte-identical Ed25519 signatures over the same JCS-canonical bytes; verify offline against `/.well-known/jwks.json`. Mechanically proven; locked in CI on every PR.
13
+
14
+ **Built for:** journalists working under deepfake-era citation requirements, academic survey writers who need provenance back to source PDFs, agentic-AI builders who need their agents to pass verifiable evidence and not just messages, and regulated-domain content (EU AI Act Article 12, FTC AI disclosure, HIPAA, SOC 2, PCI DSS) where "we say it's true" isn't enough.
15
+
16
+ The cryptographic side is **mechanically proven** — three independent verifier implementations agreeing byte-for-byte on every signed bundle, locked in CI on every PR. The semantic side (extraction quality, slider fact preservation) is **empirically measured** with explicit per-corpus numbers and explicit per-corpus boundaries. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) is the arbiter.
11
17
 
12
18
  Headline supporting numbers (each links to its source of truth):
13
19
 
@@ -16,7 +22,7 @@ Headline supporting numbers (each links to its source of truth):
16
22
  | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
17
23
  | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
18
24
  | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
19
- | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
25
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
20
26
  | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
21
27
 
22
28
  A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
@@ -49,13 +55,28 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
49
55
  | Surface | Status | Verifies |
50
56
  |---|---|---|
51
57
  | `pip install 'sum-engine[sieve]'` — `sum attest` / `sum verify` / `sum render` / `sum resolve` / `sum ledger` / `sum inspect` / `sum schema` | shipped on PyPI ≥ 0.4.1 | structural reconstruction; HMAC-SHA256 + Ed25519 signatures (W3C VC 2.0 `eddsa-jcs-2022`); bidirectional `sum attest` ↔ `sum render` symmetry from the shell |
52
- | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/.well-known/jwks.json` → JWKS; `/api/qid` → Wikidata resolver |
58
+ | Cloudflare Worker at `sum-demo.ototao.workers.dev` | shipped | `/api/render` → tome + `render_receipt`; `/api/transform` → generic transform-registry dispatch + `sum.transform_receipt.v1`; `/api/complete` → LLM proxy; `/api/qid` → Wikidata resolver; `/.well-known/jwks.json` + `/.well-known/revoked-kids.json` → trust-loop endpoints. Public LLM-axis routes are rate-limited per IP — see [`docs/PUBLIC_API_RATE_LIMITS.md`](docs/PUBLIC_API_RATE_LIMITS.md) (5/day operator-keyed demo; 100/hr with BYO key via `X-Render-LLM-Key-Anthropic` / `-OpenAI`). |
53
59
  | Single-file browser demo (`single_file_demo/index.html`) | shipped | paste prose → in-browser attest → CanonicalBundle JSON; same bytes verify under `node standalone_verifier/verify.js` (Chrome / Firefox / Safari with WebCrypto Ed25519 support) |
54
60
  | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
55
- | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned via the Worker (Anthropic, Cloudflare AI Gateway optional) | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable |
61
+ | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
56
62
  | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
63
+ | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI ≥ 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
64
+ | Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
65
+ | `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
66
+ | Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
67
+ | Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
68
+
69
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
57
70
 
58
- The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
71
+ ## Strategic context
72
+
73
+ The operational compass — read in this order if you want the project's intent + how it operates + where it's going:
74
+
75
+ - [`docs/CHARTER_2026-05-17.md`](docs/CHARTER_2026-05-17.md) — intent, the Why, strategy, objectives, success criteria, constraints, and the operational loop. The compass every other doc resolves to.
76
+ - [`docs/PRODUCT_DELIBERATION_2026-05-14.md`](docs/PRODUCT_DELIBERATION_2026-05-14.md) — three-option strategic analysis + grant-outcome decision tree.
77
+ - [`docs/ZENITH_FRAMING_2026-05-16.md`](docs/ZENITH_FRAMING_2026-05-16.md) — destination framing (SUM as chain-of-custody for AI-transformed knowledge) plus three new concepts (Perspective Receipts, Trust Profiles, Epistemic Nutrition Label) on the design queue.
78
+ - [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) — five-task empirical-benchmark hardening plan (T1–T5; T5 shipped, T1–T4 queued).
79
+ - [`docs/DOGFOOD_QUICKSTART.md`](docs/DOGFOOD_QUICKSTART.md) — five-minute guide to running SUM on your own writing.
59
80
 
60
81
  ### LLM narrative round-trip — closed across measured corpora (2026-04-28)
61
82
 
@@ -167,7 +188,33 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
167
188
  - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
168
189
  - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
169
190
  - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
170
- - **103 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
191
+ - **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
192
+
193
+ ### Research substrate (under `sum_engine_internal/research/`)
194
+
195
+ Less-surfaced but shipped:
196
+
197
+ - **MinHash-LSH bundle similarity index** (`research/lsh/`) — near-duplicate bundle detection at scale.
198
+ - **Robust PCA corruption score** (`research/robust_pca/`) — `corruption_score` field in bundle metadata; flags adversarially-perturbed bundles.
199
+ - **Sequential & conformal-prediction** (`research/sequential/`, `research/conformal/`) — bench-side confidence bounds with documented coverage guarantees.
200
+ - **MMD distribution distance** (`research/mmd/`) — `axiom_distribution_mmd` field on bundles; surfaces when an attested bundle is structurally unlike its baseline corpus.
201
+ - **Spectral entropy** (`research/spectral_entropy/`) — axiom-graph entropy on every bundle, with confidence interval.
202
+ - **Bootstrap multiplier spike detection** (`research/bootstrap/`) — see [`docs/MULTIPLIER_BOOTSTRAP_SPIKE_FINDINGS.md`](docs/MULTIPLIER_BOOTSTRAP_SPIKE_FINDINGS.md).
203
+ - **SMT consistency checking** (`research/smt_consistency/`) — z3-backed `axiom_consistency_check` on every bundle.
204
+ - **Sheaf-Laplacian hallucination detection** — see [`docs/SHEAF_HALLUCINATION_DETECTOR.md`](docs/SHEAF_HALLUCINATION_DETECTOR.md) (research direction).
205
+
206
+ ### Other substrate-adjacent surfaces
207
+
208
+ - **Trust-root manifest** (`sum_engine_internal/trust_root/`) — operator-issued signed manifest binding kid lifecycle, revocation policy, and verifier expectations.
209
+ - **Merkle sidecar format** (`sum_engine_internal/merkle_sidecar/`) — see [`docs/MERKLE_SIDECAR_FORMAT.md`](docs/MERKLE_SIDECAR_FORMAT.md).
210
+ - **Evidence-chain layer** (`sum_engine_internal/evidence/`) — substrate behind `source_chain_hash` (T4).
211
+ - **Algorithm registry** — see [`docs/ALGORITHM_REGISTRY.md`](docs/ALGORITHM_REGISTRY.md) (the in-tree list of permitted signing algs; crypto-agility gate).
212
+ - **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
213
+ - **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
214
+
215
+ ### Internal research surfaces (NOT shipped, present in repo)
216
+
217
+ - **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
171
218
 
172
219
  ---
173
220
 
@@ -214,7 +261,7 @@ Source anchoring in the bundle schema, bundle explorer / viewer, `sum verify --e
214
261
 
215
262
  ```bash
216
263
  make install # editable install with sieve + dev extras
217
- make test # full pytest run (1000+ tests)
264
+ make test # full pytest run (2000+ tests)
218
265
  make xruntime # cross-runtime K1/K1-mw/K2/K3/K4 (Python ↔ Node)
219
266
  make xruntime-adversarial # rejection-matrix A1–A6
220
267
  make fortress # 21-check pure-math invariants
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sum-engine"
7
- version = "0.6.0"
7
+ version = "0.7.1"
8
8
  description = "SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere."
9
9
  readme = "README.md"
10
10
  license = { text = "Apache-2.0" }
@@ -42,7 +42,26 @@ dependencies = [
42
42
  # pip install sum-engine[openai] # OpenAI structured-output path
43
43
  # pip install sum-engine[llm] # alias for [openai] (legacy name)
44
44
  # pip install sum-engine[all] # everything, plus dev tooling
45
- sieve = ["spacy>=3.7.0"]
45
+ sieve = [
46
+ # Floor bumped 3.7.0 → 3.8.0 on 2026-05-29 (F14). At spacy 3.7.0
47
+ # the auto-downloaded en_core_web_sm now resolves to a 3.8-series
48
+ # model the older runtime cannot load, and the fallback download
49
+ # builds a malformed URL (`download/-en_core_web_sm/-…`) because
50
+ # spacy.io's compatibility table no longer serves 3.7-compatible
51
+ # entries. Bumping the floor to the empirically-operable version
52
+ # keeps the declared floor honest. CI: new `pip install sum-engine
53
+ # (floor venv smoke)` job pins to floor and runs the full smoke,
54
+ # so the next time the floor decays we catch it before users do.
55
+ # See `docs/DOGFOOD_FINDINGS_2026-05-29.md` F14.
56
+ "spacy>=3.8.0",
57
+ # spacy ≥ 3.8 imports `from click import NoSuchOption` at module
58
+ # load (spacy/cli/_util.py); typer ≥ 0.13 stopped pulling click
59
+ # transitively. Pin click explicitly so a fresh
60
+ # `pip install sum-engine[sieve]` does not ImportError on first
61
+ # spacy import. CI: `pip install sum-engine (fresh venv smoke)`
62
+ # caught this 2026-05-28. See F13.
63
+ "click>=8.0",
64
+ ]
46
65
  # `[openai]` is the canonical, vendor-named extra; `[llm]` is kept as a
47
66
  # back-compat alias because it predates the multi-provider dispatcher
48
67
  # (Anthropic and OpenAI now have their own named extras). Both install
@@ -62,7 +81,11 @@ anthropic = ["anthropic>=0.97.0", "pydantic>=2.0.0"]
62
81
  # detached-JWS / RFC 7797 b64=false machinery; the existing pure-Python
63
82
  # JCS module at sum_engine_internal/infrastructure/jcs.py handles
64
83
  # canonicalization. Cryptography is already a hard dep above.
65
- receipt-verify = ["joserfc>=1.0.0"]
84
+ # Upper bound: joserfc>=1.x warns that the "EdDSA" JWS alg is deprecated
85
+ # (RFC 9864 favours explicit Ed25519/Ed448 alg identifiers). The whole
86
+ # render-receipt trust loop signs with "EdDSA", so we pin below 2.0.0
87
+ # until we confirm a major release does not drop the "EdDSA" alias.
88
+ receipt-verify = ["joserfc>=1.0.0,<2.0.0"]
66
89
  # MCP (Model Context Protocol) server. Exposes SUM verbs as MCP
67
90
  # tools so any MCP-aware LLM client (Claude Desktop, Claude Code,
68
91
  # Cursor, Continue, custom agents) can call SUM directly. The