sum-engine 0.7.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {sum_engine-0.7.0/sum_engine.egg-info → sum_engine-0.7.1}/PKG-INFO +12 -7
  2. {sum_engine-0.7.0 → sum_engine-0.7.1}/README.md +8 -4
  3. {sum_engine-0.7.0 → sum_engine-0.7.1}/pyproject.toml +26 -3
  4. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_cli/main.py +14 -0
  5. {sum_engine-0.7.0 → sum_engine-0.7.1/sum_engine.egg-info}/PKG-INFO +12 -7
  6. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine.egg-info/SOURCES.txt +1 -0
  7. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine.egg-info/requires.txt +3 -2
  8. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/akashic_ledger.py +40 -4
  9. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/__init__.py +12 -0
  10. sum_engine-0.7.1/sum_engine_internal/research/conformal/risk_control.py +207 -0
  11. {sum_engine-0.7.0 → sum_engine-0.7.1}/LICENSE +0 -0
  12. {sum_engine-0.7.0 → sum_engine-0.7.1}/setup.cfg +0 -0
  13. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_cli/__init__.py +0 -0
  14. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_cli/audit_log.py +0 -0
  15. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine.egg-info/dependency_links.txt +0 -0
  16. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine.egg-info/entry_points.txt +0 -0
  17. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine.egg-info/top_level.txt +0 -0
  18. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/__init__.py +0 -0
  19. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/adapters/__init__.py +0 -0
  20. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/adapters/format_pivot.py +0 -0
  21. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/__init__.py +0 -0
  22. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/bind.py +0 -0
  23. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/agent_surface/mcp_bind.py +0 -0
  24. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/__init__.py +0 -0
  25. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/causal_discovery.py +0 -0
  26. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/chunked_corpus.py +0 -0
  27. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/minhash.py +0 -0
  28. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/predicate_canon.py +0 -0
  29. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/semantic_arithmetic.py +0 -0
  30. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/syntactic_sieve.py +0 -0
  31. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/algorithms/zk_semantics.py +0 -0
  32. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/__init__.py +0 -0
  33. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/_predicates.py +0 -0
  34. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/eu_ai_act_article_12.py +0 -0
  35. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/gdpr_article_30.py +0 -0
  36. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/hipaa_164_312_b.py +0 -0
  37. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/iso_27001_8_15.py +0 -0
  38. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/pci_dss_4_req_10.py +0 -0
  39. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/report.py +0 -0
  40. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/compliance/soc_2_cc_7_2.py +0 -0
  41. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/__init__.py +0 -0
  42. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/automated_scientist.py +0 -0
  43. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/autonomous_agent.py +0 -0
  44. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/causal_triggers.py +0 -0
  45. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/confidence_calibrator.py +0 -0
  46. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/__init__.py +0 -0
  47. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/common_english_2000.txt +0 -0
  48. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/data/common_english_5000.txt +0 -0
  49. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/epistemic_arbiter.py +0 -0
  50. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/epistemic_loop.py +0 -0
  51. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/extraction_validator.py +0 -0
  52. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/gauge_orchestrator.py +0 -0
  53. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/live_llm_adapter.py +0 -0
  54. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/llm_dispatch.py +0 -0
  55. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/llm_entailment.py +0 -0
  56. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/mass_semantic_engine.py +0 -0
  57. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/ouroboros.py +0 -0
  58. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/s25_interventions.py +0 -0
  59. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/semantic_dedup.py +0 -0
  60. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/slider_renderer.py +0 -0
  61. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/tome_generator.py +0 -0
  62. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/tome_sliders.py +0 -0
  63. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/vector_bridge.py +0 -0
  64. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/ensemble/venn_abers.py +0 -0
  65. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/evidence/__init__.py +0 -0
  66. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/evidence/chain.py +0 -0
  67. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/__init__.py +0 -0
  68. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/base.py +0 -0
  69. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/egglog_store.py +0 -0
  70. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/graph_store/unionfind_store.py +0 -0
  71. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/__init__.py +0 -0
  72. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/canonical_codec.py +0 -0
  73. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/jcs.py +0 -0
  74. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/jose_envelope.py +0 -0
  75. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/key_manager.py +0 -0
  76. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/p2p_mesh.py +0 -0
  77. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/prov_o.py +0 -0
  78. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/provenance.py +0 -0
  79. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/rate_limiter.py +0 -0
  80. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/resource_guards.py +0 -0
  81. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/scheme_registry.py +0 -0
  82. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/state_encoding.py +0 -0
  83. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/telemetry.py +0 -0
  84. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/tome_parser.py +0 -0
  85. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/verifiable_credential.py +0 -0
  86. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/infrastructure/zig_bridge.py +0 -0
  87. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/__init__.py +0 -0
  88. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/__main__.py +0 -0
  89. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/errors.py +0 -0
  90. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/mcp_server/server.py +0 -0
  91. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/merkle_sidecar/__init__.py +0 -0
  92. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/merkle_sidecar/tree.py +0 -0
  93. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/render_receipt/__init__.py +0 -0
  94. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/render_receipt/verifier.py +0 -0
  95. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/__init__.py +0 -0
  96. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/bootstrap/__init__.py +0 -0
  97. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/bootstrap/multiplier_bootstrap.py +0 -0
  98. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/entropy_baseline.py +0 -0
  99. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/conformal/split_conformal.py +0 -0
  100. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/lsh/__init__.py +0 -0
  101. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/lsh/bundle_index.py +0 -0
  102. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/__init__.py +0 -0
  103. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/baseline.py +0 -0
  104. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/mmd/mmd.py +0 -0
  105. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/__init__.py +0 -0
  106. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/axiom_embedding.py +0 -0
  107. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/robust_pca/pcp.py +0 -0
  108. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sequential/__init__.py +0 -0
  109. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sequential/sprt.py +0 -0
  110. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian.py +0 -0
  111. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v2.py +0 -0
  112. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v3.py +0 -0
  113. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/sheaf_laplacian_v32.py +0 -0
  114. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/__init__.py +0 -0
  115. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/consistency.py +0 -0
  116. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/smt_consistency/predicate_library.py +0 -0
  117. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/spectral_entropy/__init__.py +0 -0
  118. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/research/spectral_entropy/vn_entropy.py +0 -0
  119. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transform_receipt/__init__.py +0 -0
  120. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transform_receipt/format.py +0 -0
  121. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transform_receipt/sign.py +0 -0
  122. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transform_receipt/verifier.py +0 -0
  123. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/__init__.py +0 -0
  124. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/_base.py +0 -0
  125. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/compose.py +0 -0
  126. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/extract.py +0 -0
  127. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/share.py +0 -0
  128. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/transforms/slider.py +0 -0
  129. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/trust_root/__init__.py +0 -0
  130. {sum_engine-0.7.0 → sum_engine-0.7.1}/sum_engine_internal/trust_root/verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sum-engine
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
5
5
  Author: ototao
6
6
  License: Apache-2.0
@@ -24,7 +24,8 @@ License-File: LICENSE
24
24
  Requires-Dist: cryptography>=41.0.0
25
25
  Requires-Dist: sympy>=1.12
26
26
  Provides-Extra: sieve
27
- Requires-Dist: spacy>=3.7.0; extra == "sieve"
27
+ Requires-Dist: spacy>=3.8.0; extra == "sieve"
28
+ Requires-Dist: click>=8.0; extra == "sieve"
28
29
  Provides-Extra: openai
29
30
  Requires-Dist: openai<3.0.0,>=1.40.0; extra == "openai"
30
31
  Requires-Dist: pydantic>=2.0.0; extra == "openai"
@@ -34,7 +35,7 @@ Provides-Extra: anthropic
34
35
  Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
35
36
  Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
36
37
  Provides-Extra: receipt-verify
37
- Requires-Dist: joserfc>=1.0.0; extra == "receipt-verify"
38
+ Requires-Dist: joserfc<2.0.0,>=1.0.0; extra == "receipt-verify"
38
39
  Provides-Extra: mcp
39
40
  Requires-Dist: mcp>=1.0.0; extra == "mcp"
40
41
  Provides-Extra: research
@@ -85,7 +86,7 @@ Headline supporting numbers (each links to its source of truth):
85
86
  | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
86
87
  | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
87
88
  | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
88
- | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
89
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
89
90
  | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
90
91
 
91
92
  A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
@@ -123,13 +124,13 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
123
124
  | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
124
125
  | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
125
126
  | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
126
- | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped (CLI in repo HEAD; PyPI catch-up tag pending) | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
127
+ | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
127
128
  | Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
128
129
  | `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
129
130
  | Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
130
131
  | Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
131
132
 
132
- The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
133
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
133
134
 
134
135
  ## Strategic context
135
136
 
@@ -251,7 +252,7 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
251
252
  - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
252
253
  - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
253
254
  - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
254
- - **168 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
255
+ - **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
255
256
 
256
257
  ### Research substrate (under `sum_engine_internal/research/`)
257
258
 
@@ -275,6 +276,10 @@ Less-surfaced but shipped:
275
276
  - **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
276
277
  - **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
277
278
 
279
+ ### Internal research surfaces (NOT shipped, present in repo)
280
+
281
+ - **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
282
+
278
283
  ---
279
284
 
280
285
  ## Reproduce the bench
@@ -22,7 +22,7 @@ Headline supporting numbers (each links to its source of truth):
22
22
  | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
23
23
  | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
24
24
  | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
25
- | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
25
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
26
26
  | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
27
27
 
28
28
  A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
@@ -60,13 +60,13 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
60
60
  | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
61
61
  | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
62
62
  | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
63
- | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped (CLI in repo HEAD; PyPI catch-up tag pending) | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
63
+ | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
64
64
  | Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
65
65
  | `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
66
66
  | Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
67
67
  | Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
68
68
 
69
- The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
69
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
70
70
 
71
71
  ## Strategic context
72
72
 
@@ -188,7 +188,7 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
188
188
  - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
189
189
  - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
190
190
  - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
191
- - **168 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
191
+ - **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
192
192
 
193
193
  ### Research substrate (under `sum_engine_internal/research/`)
194
194
 
@@ -212,6 +212,10 @@ Less-surfaced but shipped:
212
212
  - **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
213
213
  - **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
214
214
 
215
+ ### Internal research surfaces (NOT shipped, present in repo)
216
+
217
+ - **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
218
+
215
219
  ---
216
220
 
217
221
  ## Reproduce the bench
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sum-engine"
7
- version = "0.7.0"
7
+ version = "0.7.1"
8
8
  description = "SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere."
9
9
  readme = "README.md"
10
10
  license = { text = "Apache-2.0" }
@@ -42,7 +42,26 @@ dependencies = [
42
42
  # pip install sum-engine[openai] # OpenAI structured-output path
43
43
  # pip install sum-engine[llm] # alias for [openai] (legacy name)
44
44
  # pip install sum-engine[all] # everything, plus dev tooling
45
- sieve = ["spacy>=3.7.0"]
45
+ sieve = [
46
+ # Floor bumped 3.7.0 → 3.8.0 on 2026-05-29 (F14). At spacy 3.7.0
47
+ # the auto-downloaded en_core_web_sm now resolves to a 3.8-series
48
+ # model the older runtime cannot load, and the fallback download
49
+ # builds a malformed URL (`download/-en_core_web_sm/-…`) because
50
+ # spacy.io's compatibility table no longer serves 3.7-compatible
51
+ # entries. Bumping the floor to the empirically-operable version
52
+ # keeps the declared floor honest. CI: new `pip install sum-engine
53
+ # (floor venv smoke)` job pins to floor and runs the full smoke,
54
+ # so the next time the floor decays we catch it before users do.
55
+ # See `docs/DOGFOOD_FINDINGS_2026-05-29.md` F14.
56
+ "spacy>=3.8.0",
57
+ # spacy ≥ 3.8 imports `from click import NoSuchOption` at module
58
+ # load (spacy/cli/_util.py); typer ≥ 0.13 stopped pulling click
59
+ # transitively. Pin click explicitly so a fresh
60
+ # `pip install sum-engine[sieve]` does not ImportError on first
61
+ # spacy import. CI: `pip install sum-engine (fresh venv smoke)`
62
+ # caught this 2026-05-28. See F13.
63
+ "click>=8.0",
64
+ ]
46
65
  # `[openai]` is the canonical, vendor-named extra; `[llm]` is kept as a
47
66
  # back-compat alias because it predates the multi-provider dispatcher
48
67
  # (Anthropic and OpenAI now have their own named extras). Both install
@@ -62,7 +81,11 @@ anthropic = ["anthropic>=0.97.0", "pydantic>=2.0.0"]
62
81
  # detached-JWS / RFC 7797 b64=false machinery; the existing pure-Python
63
82
  # JCS module at sum_engine_internal/infrastructure/jcs.py handles
64
83
  # canonicalization. Cryptography is already a hard dep above.
65
- receipt-verify = ["joserfc>=1.0.0"]
84
+ # Upper bound: joserfc>=1.x warns that the "EdDSA" JWS alg is deprecated
85
+ # (RFC 9864 favours explicit Ed25519/Ed448 alg identifiers). The whole
86
+ # render-receipt trust loop signs with "EdDSA", so we pin below 2.0.0
87
+ # until we confirm a major release does not drop the "EdDSA" alias.
88
+ receipt-verify = ["joserfc>=1.0.0,<2.0.0"]
66
89
  # MCP (Model Context Protocol) server. Exposes SUM verbs as MCP
67
90
  # tools so any MCP-aware LLM client (Claude Desktop, Claude Code,
68
91
  # Cursor, Continue, custom agents) can call SUM directly. The
@@ -379,6 +379,20 @@ def cmd_attest(args: argparse.Namespace) -> int:
379
379
  title=args.title,
380
380
  )
381
381
 
382
+ # Surface the extracted axioms on the bundle so downstream transforms
383
+ # (`sum transform apply compose`, slider input shape) can consume the
384
+ # attest output directly without re-parsing canonical_tome. The data
385
+ # exists internally as ``triples``; before this it was dropped at
386
+ # serialization. Additive — the signature covers
387
+ # ``canonical_tome|state_integer|timestamp``, not the bundle JSON, so
388
+ # writing a new top-level key does not invalidate any existing
389
+ # signature. Format mirrors what compose._bundle_triples expects:
390
+ # list of {subject, predicate, object} dicts.
391
+ bundle["axioms"] = [
392
+ {"subject": s, "predicate": p, "object": o}
393
+ for (s, p, o) in triples
394
+ ]
395
+
382
396
  # Optional: attach a lightweight sidecar naming the extractor + source
383
397
  # URI so downstream consumers can trace provenance without the full
384
398
  # AkashicLedger. This is additive — the CanonicalBundle schema
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sum-engine
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
5
5
  Author: ototao
6
6
  License: Apache-2.0
@@ -24,7 +24,8 @@ License-File: LICENSE
24
24
  Requires-Dist: cryptography>=41.0.0
25
25
  Requires-Dist: sympy>=1.12
26
26
  Provides-Extra: sieve
27
- Requires-Dist: spacy>=3.7.0; extra == "sieve"
27
+ Requires-Dist: spacy>=3.8.0; extra == "sieve"
28
+ Requires-Dist: click>=8.0; extra == "sieve"
28
29
  Provides-Extra: openai
29
30
  Requires-Dist: openai<3.0.0,>=1.40.0; extra == "openai"
30
31
  Requires-Dist: pydantic>=2.0.0; extra == "openai"
@@ -34,7 +35,7 @@ Provides-Extra: anthropic
34
35
  Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
35
36
  Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
36
37
  Provides-Extra: receipt-verify
37
- Requires-Dist: joserfc>=1.0.0; extra == "receipt-verify"
38
+ Requires-Dist: joserfc<2.0.0,>=1.0.0; extra == "receipt-verify"
38
39
  Provides-Extra: mcp
39
40
  Requires-Dist: mcp>=1.0.0; extra == "mcp"
40
41
  Provides-Extra: research
@@ -85,7 +86,7 @@ Headline supporting numbers (each links to its source of truth):
85
86
  | Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
86
87
  | Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
87
88
  | Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
88
- | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
89
+ | Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
89
90
  | Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
90
91
 
91
92
  A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
@@ -123,13 +124,13 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
123
124
  | Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
124
125
  | 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
125
126
  | MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
126
- | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped (CLI in repo HEAD; PyPI catch-up tag pending) | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
127
+ | Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
127
128
  | Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
128
129
  | `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
129
130
  | Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
130
131
  | Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
131
132
 
132
- The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
133
+ The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
133
134
 
134
135
  ## Strategic context
135
136
 
@@ -251,7 +252,7 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
251
252
  - **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
252
253
  - **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
253
254
  - **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
254
- - **168 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
255
+ - **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
255
256
 
256
257
  ### Research substrate (under `sum_engine_internal/research/`)
257
258
 
@@ -275,6 +276,10 @@ Less-surfaced but shipped:
275
276
  - **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
276
277
  - **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
277
278
 
279
+ ### Internal research surfaces (NOT shipped, present in repo)
280
+
281
+ - **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
282
+
278
283
  ---
279
284
 
280
285
  ## Reproduce the bench
@@ -97,6 +97,7 @@ sum_engine_internal/research/bootstrap/__init__.py
97
97
  sum_engine_internal/research/bootstrap/multiplier_bootstrap.py
98
98
  sum_engine_internal/research/conformal/__init__.py
99
99
  sum_engine_internal/research/conformal/entropy_baseline.py
100
+ sum_engine_internal/research/conformal/risk_control.py
100
101
  sum_engine_internal/research/conformal/split_conformal.py
101
102
  sum_engine_internal/research/lsh/__init__.py
102
103
  sum_engine_internal/research/lsh/bundle_index.py
@@ -38,11 +38,12 @@ openai<3.0.0,>=1.40.0
38
38
  pydantic>=2.0.0
39
39
 
40
40
  [receipt-verify]
41
- joserfc>=1.0.0
41
+ joserfc<2.0.0,>=1.0.0
42
42
 
43
43
  [research]
44
44
  numpy>=1.24.0
45
45
  scipy>=1.10.0
46
46
 
47
47
  [sieve]
48
- spacy>=3.7.0
48
+ spacy>=3.8.0
49
+ click>=8.0
@@ -31,9 +31,11 @@ License: Apache License 2.0
31
31
  import math
32
32
  import hashlib
33
33
  import json
34
+ import random
34
35
  import sqlite3
35
36
  import asyncio
36
37
  import logging
38
+ import time
37
39
  from contextlib import contextmanager
38
40
  from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
39
41
 
@@ -86,11 +88,22 @@ class AkashicLedger:
86
88
 
87
89
  # SQLite busy_timeout (ms) applied to every connection. Lets
88
90
  # writers wait for the lock instead of failing immediately —
89
- # eliminates "database is locked" under concurrent-writer
90
- # contention without changing logic. 5 s is comfortably above
91
- # the worst-case test scenario (1000 concurrent inserts).
91
+ # the first line of defence against "database is locked" under
92
+ # concurrent-writer contention.
92
93
  _BUSY_TIMEOUT_MS = 5000
93
94
 
95
+ # Second line of defence: busy_timeout is NOT starvation-free. Under
96
+ # sustained many-writer contention (and on a loaded CI runner), an
97
+ # unlucky writer can be repeatedly passed over and still see
98
+ # SQLITE_BUSY after the timeout. A bounded retry-with-jittered-backoff
99
+ # around BEGIN IMMEDIATE turns that probabilistic failure into an
100
+ # eventual success. Retrying BEGIN IMMEDIATE is safe — it acquires the
101
+ # write lock before any SQL runs, so a failed attempt leaves no
102
+ # partial state. Worst case ~ attempts × busy_timeout, but in practice
103
+ # the lock is grabbed on the first or second try.
104
+ _BEGIN_RETRY_ATTEMPTS = 6
105
+ _BEGIN_RETRY_BASE_SLEEP_S = 0.05
106
+
94
107
  def __init__(self, db_path: str = "akashic.db"):
95
108
  self.db_path = db_path
96
109
  self._init_db()
@@ -208,9 +221,32 @@ class AkashicLedger:
208
221
  Tests/test_ledger_concurrency.py exercises this discipline.
209
222
  """
210
223
  with self._connect() as conn:
211
- conn.execute("BEGIN IMMEDIATE")
224
+ self._begin_immediate(conn)
212
225
  yield conn
213
226
 
227
+ def _begin_immediate(self, conn: sqlite3.Connection) -> None:
228
+ """Acquire the reserved write-lock, retrying on transient
229
+ "database is locked" with jittered exponential backoff.
230
+
231
+ See ``_BEGIN_RETRY_ATTEMPTS`` for why busy_timeout alone is not
232
+ sufficient under heavy contention. Only SQLITE_BUSY/locked is
233
+ retried; any other OperationalError propagates immediately.
234
+ """
235
+ last_exc: sqlite3.OperationalError | None = None
236
+ for attempt in range(self._BEGIN_RETRY_ATTEMPTS):
237
+ try:
238
+ conn.execute("BEGIN IMMEDIATE")
239
+ return
240
+ except sqlite3.OperationalError as exc:
241
+ if "locked" not in str(exc).lower():
242
+ raise
243
+ last_exc = exc
244
+ if attempt < self._BEGIN_RETRY_ATTEMPTS - 1:
245
+ backoff = self._BEGIN_RETRY_BASE_SLEEP_S * (2 ** attempt)
246
+ time.sleep(backoff + random.uniform(0.0, 0.02))
247
+ assert last_exc is not None # loop ran at least once
248
+ raise last_exc
249
+
214
250
  def _migrate_structured_provenance(self, conn: sqlite3.Connection) -> None:
215
251
  """M1: Structured ProvenanceRecord side-table + axiom linking.
216
252
 
@@ -33,6 +33,13 @@ from sum_engine_internal.research.conformal.entropy_baseline import (
33
33
  BaselineEntropyPredictor,
34
34
  get_default_predictor,
35
35
  )
36
+ from sum_engine_internal.research.conformal.risk_control import (
37
+ RateGuarantee,
38
+ certify_rate,
39
+ clopper_pearson_lower_bound,
40
+ empirical_bound_coverage,
41
+ hoeffding_lower_bound,
42
+ )
36
43
 
37
44
  __all__ = [
38
45
  "SplitConformal",
@@ -41,4 +48,9 @@ __all__ = [
41
48
  "average_interval_width",
42
49
  "BaselineEntropyPredictor",
43
50
  "get_default_predictor",
51
+ "RateGuarantee",
52
+ "certify_rate",
53
+ "clopper_pearson_lower_bound",
54
+ "hoeffding_lower_bound",
55
+ "empirical_bound_coverage",
44
56
  ]
@@ -0,0 +1,207 @@
1
+ """Distribution-free lower confidence bounds on a preservation rate.
2
+
3
+ The split-conformal kernel (`split_conformal.py`) wraps a point
4
+ predictor in a calibrated *interval*. This module answers the
5
+ complementary, one-sided question that SUM's slider contract actually
6
+ asks:
7
+
8
+ "With confidence ≥ 1 - δ, what is the largest X such that the
9
+ fact-preservation rate ≥ X?"
10
+
11
+ That is a one-sided lower confidence bound on the mean of bounded
12
+ [0, 1] observations (per-cell preservation fractions) or on a binomial
13
+ proportion (per-fact preserved / lost). It is the certifier shape the
14
+ bench-hardening plan's T3 names — "fact preservation ≥ X with 95 %
15
+ confidence over the tested envelope" — expressed as a finite-sample,
16
+ distribution-free guarantee rather than a tail percentile of an
17
+ empirical distribution.
18
+
19
+ Two bounds ship, both finite-sample and distribution-free:
20
+
21
+ - **Hoeffding** — for any observations in [0, 1]. From Hoeffding's
22
+ inequality P(μ̂ - μ ≥ t) ≤ exp(-2 n t²), the (1-δ) one-sided lower
23
+ bound is μ̂ - sqrt(ln(1/δ) / (2n)). Always valid; conservative. [provable]
24
+
25
+ - **Clopper–Pearson** — exact one-sided lower limit for a binomial
26
+ proportion (per-fact preserved/lost), the β-quantile
27
+ Beta(δ; k, n-k+1). Tighter than Hoeffding for the binary view and
28
+ the most interpretable framing ("≥ X % of facts preserved"). [provable]
29
+
30
+ Relationship to DKW (the other T3 tool): DKW bounds the *entire* drift
31
+ CDF uniformly, which is the right tool for a quantile statement over a
32
+ distribution. For a single *rate* (a mean / proportion), the bounds
33
+ here are the tighter, purpose-built instrument. Use DKW for the
34
+ full-distribution worst-case envelope and these for the headline rate;
35
+ they are complementary, not redundant.
36
+
37
+ Honest boundary: like all conformal-family guarantees, validity rests
38
+ on **exchangeability** between the calibration sample and deployment —
39
+ i.e. the bound holds *within the tested envelope* (the T2 capability
40
+ region), degrading on out-of-distribution inputs. State the envelope
41
+ alongside the bound; never quote the rate without it.
42
+
43
+ Author: ototao
44
+ License: Apache License 2.0
45
+ """
46
+ from __future__ import annotations
47
+
48
+ import math
49
+ from dataclasses import dataclass
50
+ from typing import Literal, Sequence
51
+
52
+ import numpy as np
53
+
54
+
55
+ @dataclass(frozen=True, slots=True)
56
+ class RateGuarantee:
57
+ """A finite-sample, distribution-free lower bound on a rate.
58
+
59
+ Reads as: "with confidence ≥ ``confidence``, the true rate is
60
+ ≥ ``rate_lower_bound``", valid under exchangeability of the
61
+ sample with deployment (i.e. within the tested envelope).
62
+ """
63
+ rate_lower_bound: float # the certified floor X
64
+ point_estimate: float # observed mean / proportion
65
+ n: int # sample size
66
+ delta: float # miscoverage allowance (confidence = 1 - delta)
67
+ method: str # "hoeffding" | "clopper_pearson"
68
+
69
+ @property
70
+ def confidence(self) -> float:
71
+ return 1.0 - self.delta
72
+
73
+ @property
74
+ def slack(self) -> float:
75
+ """Gap between the point estimate and the certified floor —
76
+ the price of finite-sample, distribution-free rigour."""
77
+ return self.point_estimate - self.rate_lower_bound
78
+
79
+
80
+ def _validate_delta(delta: float) -> None:
81
+ if not (0.0 < delta < 1.0):
82
+ raise ValueError(f"delta must be in (0, 1); got {delta}")
83
+
84
+
85
+ def hoeffding_lower_bound(values: Sequence[float], delta: float = 0.05) -> float:
86
+ """One-sided (1-δ) lower confidence bound on the mean of [0, 1]
87
+ observations, via Hoeffding's inequality. Distribution-free,
88
+ finite-sample. Clamped to [0, 1]."""
89
+ _validate_delta(delta)
90
+ arr = np.asarray(values, dtype=np.float64)
91
+ if arr.ndim != 1:
92
+ raise ValueError(f"values must be 1-D; got shape {arr.shape}")
93
+ n = arr.size
94
+ if n < 1:
95
+ raise ValueError("values must be non-empty")
96
+ # Reject non-finite FIRST: NaN slips past the [0,1] range check below
97
+ # (every NaN comparison is False), and a NaN/inf observation would
98
+ # otherwise poison the mean into a silently-invalid bound — observed
99
+ # to yield LCB=1.0 (a maximal "guarantee" from garbage input).
100
+ if not np.all(np.isfinite(arr)):
101
+ raise ValueError("values must all be finite (no NaN/inf)")
102
+ if np.any(arr < 0.0) or np.any(arr > 1.0):
103
+ raise ValueError("Hoeffding bound requires all values in [0, 1]")
104
+ mean = float(arr.mean())
105
+ radius = math.sqrt(math.log(1.0 / delta) / (2.0 * n))
106
+ return max(0.0, min(1.0, mean - radius))
107
+
108
+
109
+ def clopper_pearson_lower_bound(successes: int, n: int, delta: float = 0.05) -> float:
110
+ """Exact one-sided (1-δ) lower confidence limit for a binomial
111
+ proportion (``successes`` of ``n`` Bernoulli trials).
112
+
113
+ The limit is the δ-quantile of Beta(successes, n - successes + 1),
114
+ with the standard convention that the bound is 0 when there are no
115
+ successes. Tighter than Hoeffding for binary data and exact (never
116
+ under-covers)."""
117
+ _validate_delta(delta)
118
+ if n < 1:
119
+ raise ValueError("n must be >= 1")
120
+ if not (0 <= successes <= n):
121
+ raise ValueError(f"successes must be in [0, n]; got {successes} of {n}")
122
+ if successes == 0:
123
+ return 0.0
124
+ # Lazy import: keeps the module usable (Hoeffding path) without scipy.
125
+ from scipy.stats import beta # type: ignore
126
+ return float(beta.ppf(delta, successes, n - successes + 1))
127
+
128
+
129
+ def certify_rate(
130
+ observations: Sequence[float],
131
+ delta: float = 0.05,
132
+ method: Literal["auto", "hoeffding", "clopper_pearson"] = "auto",
133
+ ) -> RateGuarantee:
134
+ """Certify a distribution-free lower bound on the preservation rate.
135
+
136
+ ``method="auto"`` picks Clopper–Pearson when every observation is
137
+ exactly 0 or 1 (the per-fact preserved/lost view — exact and
138
+ tightest) and Hoeffding otherwise (the per-cell [0, 1] fraction
139
+ view — always valid).
140
+ """
141
+ arr = np.asarray(observations, dtype=np.float64)
142
+ if arr.ndim != 1:
143
+ raise ValueError(f"observations must be 1-D; got shape {arr.shape}")
144
+ n = arr.size
145
+ if n < 1:
146
+ raise ValueError("observations must be non-empty")
147
+ # Non-finite rejection before range check (NaN evades < / > and would
148
+ # poison the bound — see risk_control hardening note in hoeffding).
149
+ if not np.all(np.isfinite(arr)):
150
+ raise ValueError("observations must all be finite (no NaN/inf)")
151
+ if np.any(arr < 0.0) or np.any(arr > 1.0):
152
+ raise ValueError("observations must lie in [0, 1]")
153
+
154
+ is_binary = bool(np.all(np.isin(arr, (0.0, 1.0))))
155
+ chosen = method
156
+ if method == "auto":
157
+ chosen = "clopper_pearson" if is_binary else "hoeffding"
158
+
159
+ if chosen == "clopper_pearson":
160
+ if not is_binary:
161
+ raise ValueError(
162
+ "clopper_pearson requires binary (0/1) observations; "
163
+ "use 'hoeffding' for fractional [0, 1] values"
164
+ )
165
+ successes = int(round(float(arr.sum())))
166
+ lb = clopper_pearson_lower_bound(successes, n, delta)
167
+ elif chosen == "hoeffding":
168
+ lb = hoeffding_lower_bound(arr, delta)
169
+ else:
170
+ raise ValueError(f"unknown method {method!r}")
171
+
172
+ return RateGuarantee(
173
+ rate_lower_bound=lb,
174
+ point_estimate=float(arr.mean()),
175
+ n=n,
176
+ delta=float(delta),
177
+ method=chosen,
178
+ )
179
+
180
+
181
+ # -- Diagnostics --------------------------------------------------------
182
+
183
+
184
+ def empirical_bound_coverage(
185
+ true_rate: float,
186
+ n: int,
187
+ delta: float,
188
+ method: Literal["hoeffding", "clopper_pearson"],
189
+ n_trials: int = 2000,
190
+ seed: int = 0,
191
+ ) -> float:
192
+ """Fraction of trials in which the certified lower bound does not
193
+ exceed ``true_rate``. A valid (1-δ) bound must achieve coverage
194
+ ≥ 1-δ. This is the empirical check of the provable guarantee."""
195
+ if not (0.0 <= true_rate <= 1.0):
196
+ raise ValueError("true_rate must be in [0, 1]")
197
+ rng = np.random.RandomState(seed)
198
+ covered = 0
199
+ for _ in range(n_trials):
200
+ draws = (rng.uniform(size=n) < true_rate).astype(np.float64)
201
+ if method == "clopper_pearson":
202
+ lb = clopper_pearson_lower_bound(int(draws.sum()), n, delta)
203
+ else:
204
+ lb = hoeffding_lower_bound(draws, delta)
205
+ if lb <= true_rate:
206
+ covered += 1
207
+ return covered / n_trials
File without changes
File without changes