sum-engine 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sum_engine-0.7.0 → sum_engine-0.8.0}/PKG-INFO +31 -9
- sum_engine-0.7.0/sum_engine.egg-info/PKG-INFO → sum_engine-0.8.0/README.md +20 -69
- {sum_engine-0.7.0 → sum_engine-0.8.0}/pyproject.toml +50 -7
- sum_engine-0.8.0/sum_cli/__main__.py +11 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_cli/main.py +689 -0
- sum_engine-0.7.0/README.md → sum_engine-0.8.0/sum_engine.egg-info/PKG-INFO +91 -6
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine.egg-info/SOURCES.txt +17 -1
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine.egg-info/requires.txt +12 -2
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine.egg-info/top_level.txt +1 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/akashic_ledger.py +40 -4
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/jcs.py +44 -7
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/jose_envelope.py +21 -6
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/conformal/__init__.py +14 -0
- sum_engine-0.8.0/sum_engine_internal/research/conformal/risk_control.py +260 -0
- sum_engine-0.8.0/sum_engine_internal/research/frontier.py +236 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/__init__.py +132 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/conformal_meaning.py +300 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/drift_budget.py +362 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/exchangeability.py +197 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/local_judge.py +266 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/meaning_loss.py +405 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/perspective_receipt.py +294 -0
- sum_engine-0.8.0/sum_engine_internal/research/meaning/receipt.py +414 -0
- sum_engine-0.8.0/sum_verify/__init__.py +152 -0
- sum_engine-0.8.0/sum_verify/__main__.py +110 -0
- sum_engine-0.8.0/sum_verify/_conformal.py +272 -0
- sum_engine-0.8.0/sum_verify/_meaning.py +206 -0
- sum_engine-0.8.0/sum_verify/py.typed +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/LICENSE +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/setup.cfg +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_cli/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_cli/audit_log.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine.egg-info/dependency_links.txt +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine.egg-info/entry_points.txt +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/adapters/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/adapters/format_pivot.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/agent_surface/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/agent_surface/bind.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/agent_surface/mcp_bind.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/causal_discovery.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/chunked_corpus.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/minhash.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/predicate_canon.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/semantic_arithmetic.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/syntactic_sieve.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/algorithms/zk_semantics.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/_predicates.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/eu_ai_act_article_12.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/gdpr_article_30.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/hipaa_164_312_b.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/iso_27001_8_15.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/pci_dss_4_req_10.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/report.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/compliance/soc_2_cc_7_2.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/automated_scientist.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/autonomous_agent.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/causal_triggers.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/confidence_calibrator.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/data/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/data/common_english_2000.txt +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/data/common_english_5000.txt +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/epistemic_arbiter.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/epistemic_loop.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/extraction_validator.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/gauge_orchestrator.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/live_llm_adapter.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/llm_dispatch.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/llm_entailment.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/mass_semantic_engine.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/ouroboros.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/s25_interventions.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/semantic_dedup.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/slider_renderer.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/tome_generator.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/tome_sliders.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/vector_bridge.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/ensemble/venn_abers.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/evidence/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/evidence/chain.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/graph_store/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/graph_store/base.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/graph_store/egglog_store.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/graph_store/unionfind_store.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/canonical_codec.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/key_manager.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/p2p_mesh.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/prov_o.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/provenance.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/rate_limiter.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/resource_guards.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/scheme_registry.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/state_encoding.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/telemetry.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/tome_parser.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/verifiable_credential.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/infrastructure/zig_bridge.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/mcp_server/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/mcp_server/__main__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/mcp_server/errors.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/mcp_server/server.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/merkle_sidecar/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/merkle_sidecar/tree.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/render_receipt/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/render_receipt/verifier.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/bootstrap/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/bootstrap/multiplier_bootstrap.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/conformal/entropy_baseline.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/conformal/split_conformal.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/lsh/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/lsh/bundle_index.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/mmd/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/mmd/baseline.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/mmd/mmd.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/robust_pca/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/robust_pca/axiom_embedding.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/robust_pca/pcp.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sequential/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sequential/sprt.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sheaf_laplacian.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sheaf_laplacian_v2.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sheaf_laplacian_v3.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/sheaf_laplacian_v32.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/smt_consistency/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/smt_consistency/consistency.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/smt_consistency/predicate_library.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/spectral_entropy/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/research/spectral_entropy/vn_entropy.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transform_receipt/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transform_receipt/format.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transform_receipt/sign.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transform_receipt/verifier.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/_base.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/compose.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/extract.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/share.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/transforms/slider.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/trust_root/__init__.py +0 -0
- {sum_engine-0.7.0 → sum_engine-0.8.0}/sum_engine_internal/trust_root/verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sum-engine
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
|
|
5
5
|
Author: ototao
|
|
6
6
|
License: Apache-2.0
|
|
@@ -24,7 +24,8 @@ License-File: LICENSE
|
|
|
24
24
|
Requires-Dist: cryptography>=41.0.0
|
|
25
25
|
Requires-Dist: sympy>=1.12
|
|
26
26
|
Provides-Extra: sieve
|
|
27
|
-
Requires-Dist: spacy>=3.
|
|
27
|
+
Requires-Dist: spacy>=3.8.0; extra == "sieve"
|
|
28
|
+
Requires-Dist: click>=8.0; extra == "sieve"
|
|
28
29
|
Provides-Extra: openai
|
|
29
30
|
Requires-Dist: openai<3.0.0,>=1.40.0; extra == "openai"
|
|
30
31
|
Requires-Dist: pydantic>=2.0.0; extra == "openai"
|
|
@@ -34,12 +35,18 @@ Provides-Extra: anthropic
|
|
|
34
35
|
Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
|
|
35
36
|
Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
|
|
36
37
|
Provides-Extra: receipt-verify
|
|
37
|
-
Requires-Dist: joserfc
|
|
38
|
+
Requires-Dist: joserfc<2.0.0,>=1.0.0; extra == "receipt-verify"
|
|
39
|
+
Provides-Extra: verify
|
|
40
|
+
Requires-Dist: joserfc<2.0.0,>=1.0.0; extra == "verify"
|
|
38
41
|
Provides-Extra: mcp
|
|
39
42
|
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
40
43
|
Provides-Extra: research
|
|
41
44
|
Requires-Dist: numpy>=1.24.0; extra == "research"
|
|
42
45
|
Requires-Dist: scipy>=1.10.0; extra == "research"
|
|
46
|
+
Provides-Extra: judge
|
|
47
|
+
Requires-Dist: transformers>=4.30.0; extra == "judge"
|
|
48
|
+
Requires-Dist: torch>=2.0.0; extra == "judge"
|
|
49
|
+
Requires-Dist: sentencepiece>=0.1.99; extra == "judge"
|
|
43
50
|
Provides-Extra: omni-format
|
|
44
51
|
Requires-Dist: markitdown==0.1.5; extra == "omni-format"
|
|
45
52
|
Provides-Extra: dev
|
|
@@ -56,12 +63,13 @@ Requires-Dist: sum-engine[sieve]; extra == "all"
|
|
|
56
63
|
Requires-Dist: sum-engine[openai]; extra == "all"
|
|
57
64
|
Requires-Dist: sum-engine[anthropic]; extra == "all"
|
|
58
65
|
Requires-Dist: sum-engine[receipt-verify]; extra == "all"
|
|
66
|
+
Requires-Dist: sum-engine[verify]; extra == "all"
|
|
59
67
|
Requires-Dist: sum-engine[mcp]; extra == "all"
|
|
60
68
|
Requires-Dist: sum-engine[omni-format]; extra == "all"
|
|
61
69
|
Requires-Dist: sum-engine[dev]; extra == "all"
|
|
62
70
|
Dynamic: license-file
|
|
63
71
|
|
|
64
|
-
# SUM —
|
|
72
|
+
# SUM — chain of custody for AI-transformed text
|
|
65
73
|
|
|
66
74
|
[](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml)
|
|
67
75
|
[](https://pypi.org/project/sum-engine/)
|
|
@@ -85,13 +93,21 @@ Headline supporting numbers (each links to its source of truth):
|
|
|
85
93
|
| Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
|
|
86
94
|
| Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
|
|
87
95
|
| Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
|
|
88
|
-
| Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
|
|
96
|
+
| Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
|
|
89
97
|
| Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
|
|
90
98
|
|
|
91
99
|
A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
|
|
92
100
|
|
|
93
101
|
---
|
|
94
102
|
|
|
103
|
+
## Why it matters
|
|
104
|
+
|
|
105
|
+
More of what people read is now produced or reshaped by AI — summarised, translated, distilled, rewritten. As that grows, the ability to check *what changed, what was preserved, and what was lost* stops being a nicety and becomes shared infrastructure for a trustworthy information commons.
|
|
106
|
+
|
|
107
|
+
SUM is built to be that layer **in the open**: Apache-2.0, offline-verifiable by anyone, and aligned with open standards (C2PA `digital_source_type`, W3C VC 2.0, JOSE / JWS / JWKS) rather than a proprietary trust silo. It does not ask you to trust *SUM* — any third party verifies the receipt themselves, in three independent runtimes, and the project states plainly where proof ends and measurement begins. The aim is a checkable **chain of custody for knowledge in motion**, not another walled garden.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
95
111
|
## Verify it yourself in 60 seconds
|
|
96
112
|
|
|
97
113
|
The trust loop: hit the live Worker, get back a tome plus a detached Ed25519 JWS over the JCS-canonicalised receipt payload, fetch the issuer JWKS, verify.
|
|
@@ -123,19 +139,21 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
|
|
|
123
139
|
| Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
|
|
124
140
|
| 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
|
|
125
141
|
| MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
|
|
126
|
-
| Transform substrate (`sum.transform_receipt.v1` + registry) | shipped
|
|
142
|
+
| Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI ≥ 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
|
|
127
143
|
| Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
|
|
128
144
|
| `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
|
|
145
|
+
| Meaning-loss receipts + `sum_verify` SDK | shipped on PyPI ≥ 0.8.0 | `sum.meaning_risk_receipt.v1` — a signed, replayable, distribution-free bound on a *named meaning-loss proxy* (`pip install 'sum-engine[verify]'` → `import sum_verify` / `python -m sum_verify`, dependency-light: no numpy/scipy/torch). Plus `sum meaning-diff` (per-document "what was kept / dropped / added"), `sum drift-budget` (compose meaning-loss across a transform chain), and `sum exchangeability` (advisory: is a bound applicable to *your* text?). Research-flagged; the affirmative contribution behind arXiv Paper-1. |
|
|
129
146
|
| Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
|
|
130
147
|
| Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
|
|
131
148
|
|
|
132
|
-
The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
|
|
149
|
+
The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
|
|
133
150
|
|
|
134
151
|
## Strategic context
|
|
135
152
|
|
|
136
153
|
The operational compass — read in this order if you want the project's intent + how it operates + where it's going:
|
|
137
154
|
|
|
138
155
|
- [`docs/CHARTER_2026-05-17.md`](docs/CHARTER_2026-05-17.md) — intent, the Why, strategy, objectives, success criteria, constraints, and the operational loop. The compass every other doc resolves to.
|
|
156
|
+
- [`docs/PRODUCT_VISION.md`](docs/PRODUCT_VISION.md) — the product vision (the slider workbench: drop text → render it from a tag to a tome, with a signed receipt of what was preserved) and the **positioning**: SUM is the chain-of-custody *standard* for AI-transformed text — **provenance-first, attest-don't-detect** (a cryptographic guarantee robust to rewriting; an "is this AI?" answer ships only as an honest advisory signal, never a "99 %").
|
|
139
157
|
- [`docs/PRODUCT_DELIBERATION_2026-05-14.md`](docs/PRODUCT_DELIBERATION_2026-05-14.md) — three-option strategic analysis + grant-outcome decision tree.
|
|
140
158
|
- [`docs/ZENITH_FRAMING_2026-05-16.md`](docs/ZENITH_FRAMING_2026-05-16.md) — destination framing (SUM as chain-of-custody for AI-transformed knowledge) plus three new concepts (Perspective Receipts, Trust Profiles, Epistemic Nutrition Label) on the design queue.
|
|
141
159
|
- [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) — five-task empirical-benchmark hardening plan (T1–T5; T5 shipped, T1–T4 queued).
|
|
@@ -251,7 +269,7 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
|
|
|
251
269
|
- **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
|
|
252
270
|
- **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
|
|
253
271
|
- **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
|
|
254
|
-
- **
|
|
272
|
+
- **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
|
|
255
273
|
|
|
256
274
|
### Research substrate (under `sum_engine_internal/research/`)
|
|
257
275
|
|
|
@@ -275,6 +293,10 @@ Less-surfaced but shipped:
|
|
|
275
293
|
- **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
|
|
276
294
|
- **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
|
|
277
295
|
|
|
296
|
+
### Internal research surfaces (NOT shipped, present in repo)
|
|
297
|
+
|
|
298
|
+
- **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
|
|
299
|
+
|
|
278
300
|
---
|
|
279
301
|
|
|
280
302
|
## Reproduce the bench
|
|
@@ -332,7 +354,7 @@ CI runs the full suite on every push (`.github/workflows/quantum-ci.yml`); the `
|
|
|
332
354
|
|
|
333
355
|
---
|
|
334
356
|
|
|
335
|
-
##
|
|
357
|
+
## Epistemic contract
|
|
336
358
|
|
|
337
359
|
Every claim in this repo carries an explicit epistemic status — `provable`, `certified`, `empirical-benchmark`, or `expert-opinion`. The arbiter is [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md). A summary surface that quotes an empirical-benchmark number alongside language like "mathematically guaranteed" is a policy violation per §5 and must be corrected.
|
|
338
360
|
|
|
@@ -1,67 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Name: sum-engine
|
|
3
|
-
Version: 0.7.0
|
|
4
|
-
Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
|
|
5
|
-
Author: ototao
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
Project-URL: Homepage, https://github.com/OtotaO/SUM
|
|
8
|
-
Project-URL: Repository, https://github.com/OtotaO/SUM
|
|
9
|
-
Project-URL: Proof Boundary, https://github.com/OtotaO/SUM/blob/main/docs/PROOF_BOUNDARY.md
|
|
10
|
-
Project-URL: Feature Catalog, https://github.com/OtotaO/SUM/blob/main/docs/FEATURE_CATALOG.md
|
|
11
|
-
Keywords: knowledge-graph,verifiable-credentials,attestation,godel-encoding,semantic-web,agent-cli
|
|
12
|
-
Classifier: Development Status :: 4 - Beta
|
|
13
|
-
Classifier: Intended Audience :: Developers
|
|
14
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
-
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
-
Classifier: Topic :: Security :: Cryptography
|
|
20
|
-
Classifier: Environment :: Console
|
|
21
|
-
Requires-Python: >=3.10
|
|
22
|
-
Description-Content-Type: text/markdown
|
|
23
|
-
License-File: LICENSE
|
|
24
|
-
Requires-Dist: cryptography>=41.0.0
|
|
25
|
-
Requires-Dist: sympy>=1.12
|
|
26
|
-
Provides-Extra: sieve
|
|
27
|
-
Requires-Dist: spacy>=3.7.0; extra == "sieve"
|
|
28
|
-
Provides-Extra: openai
|
|
29
|
-
Requires-Dist: openai<3.0.0,>=1.40.0; extra == "openai"
|
|
30
|
-
Requires-Dist: pydantic>=2.0.0; extra == "openai"
|
|
31
|
-
Provides-Extra: llm
|
|
32
|
-
Requires-Dist: sum-engine[openai]; extra == "llm"
|
|
33
|
-
Provides-Extra: anthropic
|
|
34
|
-
Requires-Dist: anthropic>=0.97.0; extra == "anthropic"
|
|
35
|
-
Requires-Dist: pydantic>=2.0.0; extra == "anthropic"
|
|
36
|
-
Provides-Extra: receipt-verify
|
|
37
|
-
Requires-Dist: joserfc>=1.0.0; extra == "receipt-verify"
|
|
38
|
-
Provides-Extra: mcp
|
|
39
|
-
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
40
|
-
Provides-Extra: research
|
|
41
|
-
Requires-Dist: numpy>=1.24.0; extra == "research"
|
|
42
|
-
Requires-Dist: scipy>=1.10.0; extra == "research"
|
|
43
|
-
Provides-Extra: omni-format
|
|
44
|
-
Requires-Dist: markitdown==0.1.5; extra == "omni-format"
|
|
45
|
-
Provides-Extra: dev
|
|
46
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
47
|
-
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
48
|
-
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
49
|
-
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
50
|
-
Requires-Dist: types-setuptools; extra == "dev"
|
|
51
|
-
Requires-Dist: PyJWT>=2.8.0; extra == "dev"
|
|
52
|
-
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
53
|
-
Requires-Dist: hypothesis>=6.0.0; extra == "dev"
|
|
54
|
-
Provides-Extra: all
|
|
55
|
-
Requires-Dist: sum-engine[sieve]; extra == "all"
|
|
56
|
-
Requires-Dist: sum-engine[openai]; extra == "all"
|
|
57
|
-
Requires-Dist: sum-engine[anthropic]; extra == "all"
|
|
58
|
-
Requires-Dist: sum-engine[receipt-verify]; extra == "all"
|
|
59
|
-
Requires-Dist: sum-engine[mcp]; extra == "all"
|
|
60
|
-
Requires-Dist: sum-engine[omni-format]; extra == "all"
|
|
61
|
-
Requires-Dist: sum-engine[dev]; extra == "all"
|
|
62
|
-
Dynamic: license-file
|
|
63
|
-
|
|
64
|
-
# SUM — verifiable bidirectional knowledge distillation
|
|
1
|
+
# SUM — chain of custody for AI-transformed text
|
|
65
2
|
|
|
66
3
|
[](https://github.com/OtotaO/SUM/actions/workflows/quantum-ci.yml)
|
|
67
4
|
[](https://pypi.org/project/sum-engine/)
|
|
@@ -85,13 +22,21 @@ Headline supporting numbers (each links to its source of truth):
|
|
|
85
22
|
| Three-runtime byte-symmetric Ed25519 over JCS bytes | provable; locked by `make xruntime` (K1–K4) + `make xruntime-adversarial` (A1–A6) | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.2, §1.3.1 |
|
|
86
23
|
| Canonical round-trip `reconstruct(parse(canonical_tome(S))) == S` | provable; 0.00% drift on every CI run | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.1 |
|
|
87
24
|
| Render receipt — `sum.render_receipt.v1`, Ed25519 / JCS / detached JWS | shipped; verifier in three runtimes | [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) |
|
|
88
|
-
| Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
|
|
25
|
+
| Slider fact preservation: median 1.000, p10 0.769 (long n=16) / 0.818 (short n=8) | empirical-benchmark — measured; same-commit replay receipt still pending (bench-hardening T2/T3) | [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md) |
|
|
89
26
|
| Extraction F1 = 1.000 (`seed_v1`), 0.762 with precision 1.000 (`seed_v2`) | empirical-benchmark | [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1 |
|
|
90
27
|
|
|
91
28
|
A render receipt verifies the *render attestation* (issuer signed this tome, these triples, this slider position, this model, at this time). It does not verify the truth of the tome's content — that is what the slider bench measures separately. See [`docs/RENDER_RECEIPT_FORMAT.md`](docs/RENDER_RECEIPT_FORMAT.md) §5 for the explicit trust scope.
|
|
92
29
|
|
|
93
30
|
---
|
|
94
31
|
|
|
32
|
+
## Why it matters
|
|
33
|
+
|
|
34
|
+
More of what people read is now produced or reshaped by AI — summarised, translated, distilled, rewritten. As that grows, the ability to check *what changed, what was preserved, and what was lost* stops being a nicety and becomes shared infrastructure for a trustworthy information commons.
|
|
35
|
+
|
|
36
|
+
SUM is built to be that layer **in the open**: Apache-2.0, offline-verifiable by anyone, and aligned with open standards (C2PA `digital_source_type`, W3C VC 2.0, JOSE / JWS / JWKS) rather than a proprietary trust silo. It does not ask you to trust *SUM* — any third party verifies the receipt themselves, in three independent runtimes, and the project states plainly where proof ends and measurement begins. The aim is a checkable **chain of custody for knowledge in motion**, not another walled garden.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
95
40
|
## Verify it yourself in 60 seconds
|
|
96
41
|
|
|
97
42
|
The trust loop: hit the live Worker, get back a tome plus a detached Ed25519 JWS over the JCS-canonicalised receipt payload, fetch the issuer JWKS, verify.
|
|
@@ -123,19 +68,21 @@ A minimal Node verifier using `jose` + `canonicalize` is in [`docs/RENDER_RECEIP
|
|
|
123
68
|
| Cross-runtime trust triangle | locked by CI (`make xruntime`) | K1 / K1-mw / K2 / K3 / K4 — Python ↔ Node ↔ Browser agree byte-for-byte on valid bundles. `make xruntime-adversarial` adds A1–A6 rejection-class equivalence. |
|
|
124
69
|
| 5-axis slider rendering surface | density actioned deterministically; length / formality / audience / perspective LLM-conditioned. Two dispatch paths: Worker `/api/render` (Anthropic + Cloudflare AI Gateway optional) producing `sum.render_receipt.v1`, OR Python `sum transform apply slider` (OpenAI via `OPENAI_API_KEY`) producing `sum.transform_receipt.v1` | bench: median LLM-axis fact preservation 1.000, p10 0.769 (long, n=16) / 0.818 (short, n=8), order preservation 1.000 wherever measurable. Tightening worktrail at [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) adds iteration-stability + DKW worst-case bounds + capability-region headlines |
|
|
125
70
|
| MCP server (`sum-mcp` console script) | shipped | five tools (`extract` / `attest` / `verify` / `inspect` / `schema`) exposed over stdio; bundles attested via MCP verify byte-identically through the CLI / Node / browser verifiers |
|
|
126
|
-
| Transform substrate (`sum.transform_receipt.v1` + registry) | shipped
|
|
71
|
+
| Transform substrate (`sum.transform_receipt.v1` + registry) | shipped on PyPI ≥ 0.7.0 | `sum transform list` / `sum transform apply <name>` — three registered transforms (`slider` / `extract` / `compose`); receipts via Ed25519 / JCS / detached JWS just like render-receipts; 20-fixture cross-runtime K-matrix locks accept + reject across Python ↔ Node ↔ browser; T4 `source_chain_hash` binds receipts to source byte ranges; T5 `ShareableRender` round-trips signed renders for offline verification; T6 multi-school extract runs two extractors in tandem for adversarial-divergence detection. Wire spec at [`docs/TRANSFORM_RECEIPT_FORMAT.md`](docs/TRANSFORM_RECEIPT_FORMAT.md); design at [`docs/TRANSFORM_REGISTRY.md`](docs/TRANSFORM_REGISTRY.md). |
|
|
127
72
|
| Replay-defense window (`signed_at_out_of_window`) | shipped | opt-in `max_age_seconds` parameter across all four verifier surfaces (Python render / Python transform / JS render / JS transform). Default-off preserves archival use; receivers opt in per use-case (agent-swarm 60s, real-time 600s, newsletter 1d, legal-discovery no window). |
|
|
128
73
|
| `sum verify --explain` layered output | shipped | Per-dimension report (`sum.verify_explained.v1`): cryptographic integrity / canonical reconstruction / axiom consistency / extraction provenance / source evidence coverage / semantic preservation / truth of content. Each carries `epistemic_status` (`provable` / `certified` / `empirical-benchmark` / `not-asserted`). Truth of content is ALWAYS `not_asserted` — locked by test. |
|
|
74
|
+
| Meaning-loss receipts + `sum_verify` SDK | shipped on PyPI ≥ 0.8.0 | `sum.meaning_risk_receipt.v1` — a signed, replayable, distribution-free bound on a *named meaning-loss proxy* (`pip install 'sum-engine[verify]'` → `import sum_verify` / `python -m sum_verify`, dependency-light: no numpy/scipy/torch). Plus `sum meaning-diff` (per-document "what was kept / dropped / added"), `sum drift-budget` (compose meaning-loss across a transform chain), and `sum exchangeability` (advisory: is a bound applicable to *your* text?). Research-flagged; the affirmative contribution behind arXiv Paper-1. |
|
|
129
75
|
| Negative-control corpus (T5 of bench-hardening) | shipped | 20 hand-authored documents across 5 failure modes (ambiguous coref / predicate-alias / contradictions / entity-resolution-adversarial / non-extractable). Runner exits 1 if observed failures don't match annotations. Baseline at [`fixtures/bench_receipts/negative_control_2026-05-17.json`](fixtures/bench_receipts/negative_control_2026-05-17.json). |
|
|
130
76
|
| Compliance validators (six regimes) | shipped | `sum compliance check --regime <id> --audit-log <path>` — EU AI Act Article 12, GDPR Article 30, HIPAA § 164.312(b), ISO/IEC 27001 A.8.15, SOC 2 CC 7.2, PCI DSS v4.0 Req 10. All six produce the same `sum.compliance_report.v1` schema; per-regime docs at `docs/COMPLIANCE_*.md`. |
|
|
131
77
|
|
|
132
|
-
The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
|
|
78
|
+
The slider's product claim — *axis changes do not lose facts* — is the load-bearing empirical result. It is verified by NLI audit on every embedding-flagged "loss" cell; full attribution in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md). In keeping with the "what remains unproven" half of the promise above: these headline numbers are **measured observations**, not yet same-commit-replayable — the bench harness (`Tests/benchmarks/slider_drift_bench.py`) is scaffold-state and no `sum.slider_drift_bench.v1` receipt is committed. Closing that to a replayable receipt is bench-hardening tasks T2 / T3 ([`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md)); see the reproducibility-status note in [`docs/SLIDER_CONTRACT.md`](docs/SLIDER_CONTRACT.md).
|
|
133
79
|
|
|
134
80
|
## Strategic context
|
|
135
81
|
|
|
136
82
|
The operational compass — read in this order if you want the project's intent + how it operates + where it's going:
|
|
137
83
|
|
|
138
84
|
- [`docs/CHARTER_2026-05-17.md`](docs/CHARTER_2026-05-17.md) — intent, the Why, strategy, objectives, success criteria, constraints, and the operational loop. The compass every other doc resolves to.
|
|
85
|
+
- [`docs/PRODUCT_VISION.md`](docs/PRODUCT_VISION.md) — the product vision (the slider workbench: drop text → render it from a tag to a tome, with a signed receipt of what was preserved) and the **positioning**: SUM is the chain-of-custody *standard* for AI-transformed text — **provenance-first, attest-don't-detect** (a cryptographic guarantee robust to rewriting; an "is this AI?" answer ships only as an honest advisory signal, never a "99 %").
|
|
139
86
|
- [`docs/PRODUCT_DELIBERATION_2026-05-14.md`](docs/PRODUCT_DELIBERATION_2026-05-14.md) — three-option strategic analysis + grant-outcome decision tree.
|
|
140
87
|
- [`docs/ZENITH_FRAMING_2026-05-16.md`](docs/ZENITH_FRAMING_2026-05-16.md) — destination framing (SUM as chain-of-custody for AI-transformed knowledge) plus three new concepts (Perspective Receipts, Trust Profiles, Epistemic Nutrition Label) on the design queue.
|
|
141
88
|
- [`docs/BENCH_HARDENING_FROM_QCVV.md`](docs/BENCH_HARDENING_FROM_QCVV.md) — five-task empirical-benchmark hardening plan (T1–T5; T5 shipped, T1–T4 queued).
|
|
@@ -251,7 +198,7 @@ Below the slider sits the substrate that earlier phases shipped and verified. Po
|
|
|
251
198
|
- **Bundle public-key attestation (provable).** Ed25519-signed CanonicalBundles are tamper-detectable by any third party in any of the three runtimes. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.3.1.
|
|
252
199
|
- **Merkle hash-chain integrity (provable, including under concurrent writers).** [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §1.7.
|
|
253
200
|
- **Extraction F1 (empirical-benchmark).** 1.000 on `seed_v1` (50 simple-SVO docs); 0.762 with precision 1.000 on `seed_v2` (20-doc difficulty corpus). Every remaining `seed_v2` failure is a recall miss, not a truth inversion. [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md) §2.1.
|
|
254
|
-
- **
|
|
201
|
+
- **170 numbered features**, each with a reproducible verification command, in [`docs/FEATURE_CATALOG.md`](docs/FEATURE_CATALOG.md).
|
|
255
202
|
|
|
256
203
|
### Research substrate (under `sum_engine_internal/research/`)
|
|
257
204
|
|
|
@@ -275,6 +222,10 @@ Less-surfaced but shipped:
|
|
|
275
222
|
- **Audit log format** — every CLI operation can emit `sum.audit_log.v1` events; see [`docs/AUDIT_LOG_FORMAT.md`](docs/AUDIT_LOG_FORMAT.md).
|
|
276
223
|
- **Agent surface** (`sum_engine_internal/agent_surface/`) — see [`docs/AGENT_SURFACE_FINDINGS.md`](docs/AGENT_SURFACE_FINDINGS.md).
|
|
277
224
|
|
|
225
|
+
### Internal research surfaces (NOT shipped, present in repo)
|
|
226
|
+
|
|
227
|
+
- **`api/quantum_router.py` + `quantum_main.py`** — FastAPI surface with 26+ endpoints (branchable knowledge graph, ZK semantic proofs, federated KG sync, JWT-tenant knowledge OS). 1,684 LOC; 58/58 tests pass; runs locally via `uvicorn quantum_main:app`. **NOT in the PyPI wheel** (`pyproject.toml` excludes `api*`), **NOT in the live Worker**, **NOT in the dogfood quickstart**. The substrate it composes is load-bearing for the shipping surfaces above; only the FastAPI HTTP layer is internal-research. Promote to a shipping `[api]` extra only if a named buyer or grant deliverable explicitly references one of the endpoint clusters. See top-of-file banner in `api/quantum_router.py` for the full triage rationale.
|
|
228
|
+
|
|
278
229
|
---
|
|
279
230
|
|
|
280
231
|
## Reproduce the bench
|
|
@@ -332,7 +283,7 @@ CI runs the full suite on every push (`.github/workflows/quantum-ci.yml`); the `
|
|
|
332
283
|
|
|
333
284
|
---
|
|
334
285
|
|
|
335
|
-
##
|
|
286
|
+
## Epistemic contract
|
|
336
287
|
|
|
337
288
|
Every claim in this repo carries an explicit epistemic status — `provable`, `certified`, `empirical-benchmark`, or `expert-opinion`. The arbiter is [`docs/PROOF_BOUNDARY.md`](docs/PROOF_BOUNDARY.md). A summary surface that quotes an empirical-benchmark number alongside language like "mathematically guaranteed" is a policy violation per §5 and must be corrected.
|
|
338
289
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sum-engine"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.8.0"
|
|
8
8
|
description = "SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
@@ -42,7 +42,26 @@ dependencies = [
|
|
|
42
42
|
# pip install sum-engine[openai] # OpenAI structured-output path
|
|
43
43
|
# pip install sum-engine[llm] # alias for [openai] (legacy name)
|
|
44
44
|
# pip install sum-engine[all] # everything, plus dev tooling
|
|
45
|
-
sieve = [
|
|
45
|
+
sieve = [
|
|
46
|
+
# Floor bumped 3.7.0 → 3.8.0 on 2026-05-29 (F14). At spacy 3.7.0
|
|
47
|
+
# the auto-downloaded en_core_web_sm now resolves to a 3.8-series
|
|
48
|
+
# model the older runtime cannot load, and the fallback download
|
|
49
|
+
# builds a malformed URL (`download/-en_core_web_sm/-…`) because
|
|
50
|
+
# spacy.io's compatibility table no longer serves 3.7-compatible
|
|
51
|
+
# entries. Bumping the floor to the empirically-operable version
|
|
52
|
+
# keeps the declared floor honest. CI: new `pip install sum-engine
|
|
53
|
+
# (floor venv smoke)` job pins to floor and runs the full smoke,
|
|
54
|
+
# so the next time the floor decays we catch it before users do.
|
|
55
|
+
# See `docs/DOGFOOD_FINDINGS_2026-05-29.md` F14.
|
|
56
|
+
"spacy>=3.8.0",
|
|
57
|
+
# spacy ≥ 3.8 imports `from click import NoSuchOption` at module
|
|
58
|
+
# load (spacy/cli/_util.py); typer ≥ 0.13 stopped pulling click
|
|
59
|
+
# transitively. Pin click explicitly so a fresh
|
|
60
|
+
# `pip install sum-engine[sieve]` does not ImportError on first
|
|
61
|
+
# spacy import. CI: `pip install sum-engine (fresh venv smoke)`
|
|
62
|
+
# caught this 2026-05-28. See F13.
|
|
63
|
+
"click>=8.0",
|
|
64
|
+
]
|
|
46
65
|
# `[openai]` is the canonical, vendor-named extra; `[llm]` is kept as a
|
|
47
66
|
# back-compat alias because it predates the multi-provider dispatcher
|
|
48
67
|
# (Anthropic and OpenAI now have their own named extras). Both install
|
|
@@ -62,7 +81,21 @@ anthropic = ["anthropic>=0.97.0", "pydantic>=2.0.0"]
|
|
|
62
81
|
# detached-JWS / RFC 7797 b64=false machinery; the existing pure-Python
|
|
63
82
|
# JCS module at sum_engine_internal/infrastructure/jcs.py handles
|
|
64
83
|
# canonicalization. Cryptography is already a hard dep above.
|
|
65
|
-
|
|
84
|
+
# Upper bound: joserfc>=1.x warns that the "EdDSA" JWS alg is deprecated
|
|
85
|
+
# (RFC 9864 favours explicit Ed25519/Ed448 alg identifiers). The whole
|
|
86
|
+
# render-receipt trust loop signs with "EdDSA", so we pin below 2.0.0
|
|
87
|
+
# until we confirm a major release does not drop the "EdDSA" alias.
|
|
88
|
+
receipt-verify = ["joserfc>=1.0.0,<2.0.0"]
|
|
89
|
+
# The stable, dependency-light verify SDK (`import sum_verify`). The
|
|
90
|
+
# package an integrator pins to CHECK SUM receipts — meaning-risk /
|
|
91
|
+
# render / transform — without the CLI or the research numeric stack.
|
|
92
|
+
# Deliberately tiny: joserfc for the detached-JWS machinery, cryptography
|
|
93
|
+
# (already a core dep) for Ed25519. The conformal bound replay is
|
|
94
|
+
# re-derived in pure Python (sum_verify/_conformal.py), so verifying a
|
|
95
|
+
# meaning-risk receipt offline pulls NO numpy / scipy / torch — the
|
|
96
|
+
# property `Tests/test_sum_verify_sdk.py` pins in a clean subprocess.
|
|
97
|
+
# Same joserfc pin/rationale as [receipt-verify] above.
|
|
98
|
+
verify = ["joserfc>=1.0.0,<2.0.0"]
|
|
66
99
|
# MCP (Model Context Protocol) server. Exposes SUM verbs as MCP
|
|
67
100
|
# tools so any MCP-aware LLM client (Claude Desktop, Claude Code,
|
|
68
101
|
# Cursor, Continue, custom agents) can call SUM directly. The
|
|
@@ -79,6 +112,12 @@ mcp = ["mcp>=1.0.0"]
|
|
|
79
112
|
# verified blindspots (predicate-flip, off-graph fact-fabrication,
|
|
80
113
|
# empty-render false negative).
|
|
81
114
|
research = ["numpy>=1.24.0", "scipy>=1.10.0"]
|
|
115
|
+
# Local, deterministic, zero-$ entailment judge for the meaning-loss
|
|
116
|
+
# EntailmentScorer (sum_engine_internal/research/meaning/local_judge.py).
|
|
117
|
+
# A local sentence-embedding model run offline in eval mode — fixes the
|
|
118
|
+
# F18 lexical-scorer paraphrase misranking without any paid API. Heavy
|
|
119
|
+
# (torch); strictly optional, lazy-imported.
|
|
120
|
+
judge = ["transformers>=4.30.0", "torch>=2.0.0", "sentencepiece>=0.1.99"]
|
|
82
121
|
# Omni-format adapter. Markdown is the canonical pivot for the
|
|
83
122
|
# attest pipeline: any input format -> markdown -> existing
|
|
84
123
|
# extract/state/bundle path. Source URI anchors to the original
|
|
@@ -119,6 +158,7 @@ all = [
|
|
|
119
158
|
"sum-engine[openai]",
|
|
120
159
|
"sum-engine[anthropic]",
|
|
121
160
|
"sum-engine[receipt-verify]",
|
|
161
|
+
"sum-engine[verify]",
|
|
122
162
|
"sum-engine[mcp]",
|
|
123
163
|
"sum-engine[omni-format]",
|
|
124
164
|
"sum-engine[dev]",
|
|
@@ -140,21 +180,24 @@ Repository = "https://github.com/OtotaO/SUM"
|
|
|
140
180
|
"Feature Catalog" = "https://github.com/OtotaO/SUM/blob/main/docs/FEATURE_CATALOG.md"
|
|
141
181
|
|
|
142
182
|
[tool.check-wheel-contents]
|
|
143
|
-
# The wheel intentionally ships
|
|
183
|
+
# The wheel intentionally ships THREE top-level packages:
|
|
144
184
|
# sum_cli — the CLI entry point (provides the `sum`
|
|
145
185
|
# console script via [project.scripts]).
|
|
146
186
|
# sum_engine_internal — the implementation modules consumers
|
|
147
187
|
# `import` from (the public Python API).
|
|
188
|
+
# sum_verify — the small, stable, dependency-light verify
|
|
189
|
+
# SDK (`import sum_verify`); pinnable independent
|
|
190
|
+
# of the CLI / research stack.
|
|
148
191
|
# Without this whitelist, check-wheel-contents fires W009 on every
|
|
149
192
|
# build. Whitelisting by name (rather than ignoring W009 globally)
|
|
150
193
|
# preserves the gate: if a future build accidentally adds an
|
|
151
|
-
# unexpected
|
|
152
|
-
toplevel = ["sum_cli", "sum_engine_internal"]
|
|
194
|
+
# unexpected fourth top-level package, this config still catches it.
|
|
195
|
+
toplevel = ["sum_cli", "sum_engine_internal", "sum_verify"]
|
|
153
196
|
|
|
154
197
|
[tool.setuptools.packages.find]
|
|
155
198
|
# Include the CLI and the core engine modules it depends on. Tests and
|
|
156
199
|
# scripts are dev-time only and excluded from the distribution.
|
|
157
|
-
include = ["sum_cli*", "sum_engine_internal*"]
|
|
200
|
+
include = ["sum_cli*", "sum_engine_internal*", "sum_verify*"]
|
|
158
201
|
exclude = ["Tests*", "Tests.*", "scripts*", "api*", "single_file_demo*"]
|
|
159
202
|
|
|
160
203
|
[tool.setuptools.package-data]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Enable ``python -m sum_cli`` as an alias for the ``sum`` console script.
|
|
2
|
+
|
|
3
|
+
Several prospective adopters in the 30-guest adoption simulation (2026-06-09)
|
|
4
|
+
reached for ``python3 -m sum_cli`` and hit "package cannot be directly
|
|
5
|
+
executed" — the only working forms were the ``sum`` entry point or
|
|
6
|
+
``python3 -m sum_cli.main``. This makes the obvious on-ramp work.
|
|
7
|
+
"""
|
|
8
|
+
from sum_cli.main import main
|
|
9
|
+
|
|
10
|
+
if __name__ == "__main__":
|
|
11
|
+
raise SystemExit(main())
|