sum-engine 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sum_engine-0.2.1 → sum_engine-0.3.0}/PKG-INFO +24 -7
- {sum_engine-0.2.1 → sum_engine-0.3.0}/README.md +23 -6
- {sum_engine-0.2.1 → sum_engine-0.3.0}/pyproject.toml +1 -1
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_cli/main.py +446 -8
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/PKG-INFO +24 -7
- {sum_engine-0.2.1 → sum_engine-0.3.0}/LICENSE +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/setup.cfg +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_cli/__init__.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/SOURCES.txt +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/dependency_links.txt +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/entry_points.txt +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/requires.txt +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine.egg-info/top_level.txt +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/__init__.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/__init__.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/causal_discovery.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/predicate_canon.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/semantic_arithmetic.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/syntactic_sieve.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/algorithms/zk_semantics.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/__init__.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/automated_scientist.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/autonomous_agent.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/causal_triggers.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/confidence_calibrator.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/epistemic_arbiter.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/epistemic_loop.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/extraction_validator.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/gauge_orchestrator.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/live_llm_adapter.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/llm_entailment.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/mass_semantic_engine.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/ouroboros.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/semantic_dedup.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/tome_generator.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/tome_sliders.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/vector_bridge.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/ensemble/venn_abers.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/__init__.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/akashic_ledger.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/canonical_codec.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/jcs.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/key_manager.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/p2p_mesh.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/prov_o.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/provenance.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/rate_limiter.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/resource_guards.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/scheme_registry.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/state_encoding.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/telemetry.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/tome_parser.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/verifiable_credential.py +0 -0
- {sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/zig_bridge.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sum-engine
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
|
|
5
5
|
Author: ototao
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,7 +91,7 @@ Every headline number below is reproducible via `python -m scripts.bench.run_ben
|
|
|
91
91
|
| `record_provenance_batch` sustained throughput | **~22 k ops/sec** (10.2× the single-write path) | empirical-benchmark |
|
|
92
92
|
| Merkle-chain integrity under concurrent writers | holds (50–200-event bursts) | **provable** (post `9c4139d`) |
|
|
93
93
|
| Cross-runtime byte-identity fixtures | **131 / 131 passing** across Python ↔ Node.js ↔ Browser JS | empirical-benchmark |
|
|
94
|
-
| Test suite | **
|
|
94
|
+
| Test suite | **1035 collected** at v0.3.0, with `.[dev]` extras installed (8-test gap is spacy-skipped when `en_core_web_sm` isn't downloaded) | **provable** |
|
|
95
95
|
|
|
96
96
|
---
|
|
97
97
|
|
|
@@ -390,6 +390,20 @@ Open `http://localhost:8000` to access:
|
|
|
390
390
|
These are **roadmap items**, not current capabilities. Each is a concrete piece of work with a defined entry in `docs/PROOF_BOUNDARY.md` §3. They are listed in approximate order of prerequisite dependence.
|
|
391
391
|
|
|
392
392
|
### Shipped since the last README pass
|
|
393
|
+
|
|
394
|
+
#### v0.1.0 → v0.2.1 release wave (2026-04-22 → 2026-04-23)
|
|
395
|
+
|
|
396
|
+
- ✅ **`sum-engine` on PyPI.** `pip install sum-engine[sieve]` installs the `sum` CLI binary (subcommands `attest` / `verify` / `resolve`). Three releases: 0.1.0 first public release, 0.2.0 `internal/` → `sum_engine_internal/` namespace rename, 0.2.1 dynamic version-resolution fix (`importlib.metadata.version("sum-engine")`). See [`CHANGELOG.md`](CHANGELOG.md).
|
|
397
|
+
- ✅ **`sum verify` actually verifies signatures** (not just structural reconstruction) — HMAC-SHA256 with `--signing-key`, Ed25519 always when `public_signature` + `public_key` are present. JSON result carries `signatures: {hmac, ed25519}` with values in `{verified, skipped, absent, invalid}`. `--strict` mode requires at least one verifiable signature. 15 cases pinning every branch.
|
|
398
|
+
- ✅ **`sum attest --ed25519-key PEM`** — mints W3C VC 2.0 (`eddsa-jcs-2022`) compatible Ed25519-signed bundles. PEM produced by `python -m scripts.generate_did_web` (did:web bootstrap shipped in [`docs/DID_SETUP.md`](docs/DID_SETUP.md)). Bundles verifiable by any DIF-conformant verifier (Universal Resolver, Digital Bazaar, Spruce ssi, Veramo, Microsoft Entra, Mattr).
|
|
399
|
+
- ✅ **`sum attest --ledger DB`** closes the attest → resolve loop. Per-triple ProvenanceRecords (source URI, byte range, sentence excerpt, extractor ID) recorded into a SQLite AkashicLedger; resulting `prov_id`s attached to `bundle.sum_cli.prov_ids`. `sum resolve <prov_id> --db DB` walks axiom → source byte range.
|
|
400
|
+
- ✅ **Cross-runtime Ed25519 trust triangle.** Same bundle bytes verify identically in Python (`sum verify`), Node (`standalone_verifier/verify.js` via WebCrypto), and modern browsers (`single_file_demo/index.html` via SubtleCrypto — Chrome 113+, Firefox 129+, Safari 17+). Locked in CI by kill-experiments K3 (positive) + K4 (tampered-bundle rejection); the harness also runs K1 + K1-multiword + K2.
|
|
401
|
+
- ✅ **HMAC is now optional** in `CanonicalCodec`. The old `sum-default-key` placebo default is gone (it was a publicly-known shared secret — cryptographic theater). `signing_key=None` emits no `signature` field; downgrade-protection preserved when a key IS configured.
|
|
402
|
+
- ✅ **PORTFOLIO.md + CLAUDE.md contract.** `PORTFOLIO.md` at repo root is the body of [sumequities.com/projects/sum](https://github.com/OtotaO/SUMequities) — verified-claims-only, every metric row labelled `**proved**` or `**empirical-benchmark**`. CI job `portfolio-contract` (blocking) + warn-only pre-commit hook enforce the contract.
|
|
403
|
+
- ✅ **CI gates landed.** `cross-runtime-harness` runs K1-K4 every PR (Node 22). `pypi-install-smoke` builds the wheel and runs `echo prose | sum attest | sum verify` in a fresh venv every push. `publish-pypi.yml` handles OIDC-authenticated PyPI releases on tag pushes.
|
|
404
|
+
|
|
405
|
+
#### Earlier (substrate work)
|
|
406
|
+
|
|
393
407
|
- ✅ **Per-doc logging in the regeneration runner** (commit `02b4413`) — `RegenerationMetrics.per_doc` names the specific (s, p, o) triples that failed entailment so the aggregate FActScore gap is debuggable at the generator-prompt layer.
|
|
394
408
|
- ✅ **LLM narrative full round-trip runner** (commit `9fd232d`, first measurement `2c252f0`) — composes `LiveLLMAdapter.extract_triplets → generate_text → extract_triplets`, reports per-doc drift. Measured on `seed_v1`: 107.75 % drift / 0.12 exact-match recall. See PROOF_BOUNDARY §2.5.
|
|
395
409
|
- ✅ **W3C Verifiable Credentials 2.0 emission + verification** (commit `e007f94`) — pure-Python `eddsa-jcs-2022` Data Integrity path at `sum_engine_internal/infrastructure/verifiable_credential.py` + RFC 8785 JCS at `sum_engine_internal/infrastructure/jcs.py`. 58 tests. Bundles consumable by any VC-compliant ecosystem.
|
|
@@ -453,11 +467,14 @@ npx wrangler pages deploy single_file_demo --project-name sum-demo
|
|
|
453
467
|
|
|
454
468
|
No environment variables required. No KV / R2 / D1 attached. The demo is 100 % client-side.
|
|
455
469
|
|
|
456
|
-
###
|
|
470
|
+
### Hosted-demo LLM proxy (Cloudflare Pages Function — shipped)
|
|
471
|
+
|
|
472
|
+
The demo upgrades extraction to LLM-grade *automatically* when pasted into a Claude artifact (commit `e5e57b6` — `window.claude.complete` is detected at runtime). On a plain Cloudflare Pages URL without a Claude account, extraction has two fallbacks, in order:
|
|
457
473
|
|
|
458
|
-
|
|
474
|
+
1. **Pages Function** at [`single_file_demo/functions/api/complete.ts`](single_file_demo/functions/api/complete.ts) — proxies Anthropic (preferred) or OpenAI through the optional Cloudflare AI Gateway. Server-side only reads `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` from the deployment's environment variables; never the user's browser. Returns the same JSON shape `window.claude.complete` would produce, so the in-page extractor doesn't care which path served it.
|
|
475
|
+
2. **Naive tokeniser** — pure-browser sentence-split + stopword-strip fallback when neither the artifact runtime nor the Pages Function path is available. Honest about its limits.
|
|
459
476
|
|
|
460
|
-
|
|
477
|
+
The demo's UI labels which path produced any given result (`extracted by Claude (artifact runtime)` / `extracted by Claude via Pages Function` / `extracted by naive tokeniser`).
|
|
461
478
|
|
|
462
479
|
### Roadmap — hybrid edge architecture (not shipped today)
|
|
463
480
|
|
|
@@ -469,9 +486,9 @@ These are vision items; the shipped today is the static artifact above.
|
|
|
469
486
|
|
|
470
487
|
---
|
|
471
488
|
|
|
472
|
-
## 🛡️ Verification:
|
|
489
|
+
## 🛡️ Verification: 1000+-Test Suite + 131 Cross-Runtime Fixtures
|
|
473
490
|
|
|
474
|
-
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests.
|
|
491
|
+
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests. With the `[dev]` extras installed (`pip install -e '.[sieve,dev]'` or `make install`), `pytest --collect-only` reports **1035 tests** at v0.3.0; the full-suite green count is **1027 passed** (the 8-test gap is spacy-dependent cases that skip when `en_core_web_sm` isn't downloaded into the venv). CI runs the full suite on every push via `.github/workflows/quantum-ci.yml`.
|
|
475
492
|
|
|
476
493
|
```text
|
|
477
494
|
Provable (deterministic code + tests that enforce the proof):
|
|
@@ -47,7 +47,7 @@ Every headline number below is reproducible via `python -m scripts.bench.run_ben
|
|
|
47
47
|
| `record_provenance_batch` sustained throughput | **~22 k ops/sec** (10.2× the single-write path) | empirical-benchmark |
|
|
48
48
|
| Merkle-chain integrity under concurrent writers | holds (50–200-event bursts) | **provable** (post `9c4139d`) |
|
|
49
49
|
| Cross-runtime byte-identity fixtures | **131 / 131 passing** across Python ↔ Node.js ↔ Browser JS | empirical-benchmark |
|
|
50
|
-
| Test suite | **
|
|
50
|
+
| Test suite | **1035 collected** at v0.3.0, with `.[dev]` extras installed (8-test gap is spacy-skipped when `en_core_web_sm` isn't downloaded) | **provable** |
|
|
51
51
|
|
|
52
52
|
---
|
|
53
53
|
|
|
@@ -346,6 +346,20 @@ Open `http://localhost:8000` to access:
|
|
|
346
346
|
These are **roadmap items**, not current capabilities. Each is a concrete piece of work with a defined entry in `docs/PROOF_BOUNDARY.md` §3. They are listed in approximate order of prerequisite dependence.
|
|
347
347
|
|
|
348
348
|
### Shipped since the last README pass
|
|
349
|
+
|
|
350
|
+
#### v0.1.0 → v0.2.1 release wave (2026-04-22 → 2026-04-23)
|
|
351
|
+
|
|
352
|
+
- ✅ **`sum-engine` on PyPI.** `pip install sum-engine[sieve]` installs the `sum` CLI binary (subcommands `attest` / `verify` / `resolve`). Three releases: 0.1.0 first public release, 0.2.0 `internal/` → `sum_engine_internal/` namespace rename, 0.2.1 dynamic version-resolution fix (`importlib.metadata.version("sum-engine")`). See [`CHANGELOG.md`](CHANGELOG.md).
|
|
353
|
+
- ✅ **`sum verify` actually verifies signatures** (not just structural reconstruction) — HMAC-SHA256 with `--signing-key`, Ed25519 always when `public_signature` + `public_key` are present. JSON result carries `signatures: {hmac, ed25519}` with values in `{verified, skipped, absent, invalid}`. `--strict` mode requires at least one verifiable signature. 15 cases pinning every branch.
|
|
354
|
+
- ✅ **`sum attest --ed25519-key PEM`** — mints W3C VC 2.0 (`eddsa-jcs-2022`) compatible Ed25519-signed bundles. PEM produced by `python -m scripts.generate_did_web` (did:web bootstrap shipped in [`docs/DID_SETUP.md`](docs/DID_SETUP.md)). Bundles verifiable by any DIF-conformant verifier (Universal Resolver, Digital Bazaar, Spruce ssi, Veramo, Microsoft Entra, Mattr).
|
|
355
|
+
- ✅ **`sum attest --ledger DB`** closes the attest → resolve loop. Per-triple ProvenanceRecords (source URI, byte range, sentence excerpt, extractor ID) recorded into a SQLite AkashicLedger; resulting `prov_id`s attached to `bundle.sum_cli.prov_ids`. `sum resolve <prov_id> --db DB` walks axiom → source byte range.
|
|
356
|
+
- ✅ **Cross-runtime Ed25519 trust triangle.** Same bundle bytes verify identically in Python (`sum verify`), Node (`standalone_verifier/verify.js` via WebCrypto), and modern browsers (`single_file_demo/index.html` via SubtleCrypto — Chrome 113+, Firefox 129+, Safari 17+). Locked in CI by kill-experiments K3 (positive) + K4 (tampered-bundle rejection); the harness also runs K1 + K1-multiword + K2.
|
|
357
|
+
- ✅ **HMAC is now optional** in `CanonicalCodec`. The old `sum-default-key` placebo default is gone (it was a publicly-known shared secret — cryptographic theater). `signing_key=None` emits no `signature` field; downgrade-protection preserved when a key IS configured.
|
|
358
|
+
- ✅ **PORTFOLIO.md + CLAUDE.md contract.** `PORTFOLIO.md` at repo root is the body of [sumequities.com/projects/sum](https://github.com/OtotaO/SUMequities) — verified-claims-only, every metric row labelled `**proved**` or `**empirical-benchmark**`. CI job `portfolio-contract` (blocking) + warn-only pre-commit hook enforce the contract.
|
|
359
|
+
- ✅ **CI gates landed.** `cross-runtime-harness` runs K1-K4 every PR (Node 22). `pypi-install-smoke` builds the wheel and runs `echo prose | sum attest | sum verify` in a fresh venv every push. `publish-pypi.yml` handles OIDC-authenticated PyPI releases on tag pushes.
|
|
360
|
+
|
|
361
|
+
#### Earlier (substrate work)
|
|
362
|
+
|
|
349
363
|
- ✅ **Per-doc logging in the regeneration runner** (commit `02b4413`) — `RegenerationMetrics.per_doc` names the specific (s, p, o) triples that failed entailment so the aggregate FActScore gap is debuggable at the generator-prompt layer.
|
|
350
364
|
- ✅ **LLM narrative full round-trip runner** (commit `9fd232d`, first measurement `2c252f0`) — composes `LiveLLMAdapter.extract_triplets → generate_text → extract_triplets`, reports per-doc drift. Measured on `seed_v1`: 107.75 % drift / 0.12 exact-match recall. See PROOF_BOUNDARY §2.5.
|
|
351
365
|
- ✅ **W3C Verifiable Credentials 2.0 emission + verification** (commit `e007f94`) — pure-Python `eddsa-jcs-2022` Data Integrity path at `sum_engine_internal/infrastructure/verifiable_credential.py` + RFC 8785 JCS at `sum_engine_internal/infrastructure/jcs.py`. 58 tests. Bundles consumable by any VC-compliant ecosystem.
|
|
@@ -409,11 +423,14 @@ npx wrangler pages deploy single_file_demo --project-name sum-demo
|
|
|
409
423
|
|
|
410
424
|
No environment variables required. No KV / R2 / D1 attached. The demo is 100 % client-side.
|
|
411
425
|
|
|
412
|
-
###
|
|
426
|
+
### Hosted-demo LLM proxy (Cloudflare Pages Function — shipped)
|
|
427
|
+
|
|
428
|
+
The demo upgrades extraction to LLM-grade *automatically* when pasted into a Claude artifact (commit `e5e57b6` — `window.claude.complete` is detected at runtime). On a plain Cloudflare Pages URL without a Claude account, extraction has two fallbacks, in order:
|
|
413
429
|
|
|
414
|
-
|
|
430
|
+
1. **Pages Function** at [`single_file_demo/functions/api/complete.ts`](single_file_demo/functions/api/complete.ts) — proxies Anthropic (preferred) or OpenAI through the optional Cloudflare AI Gateway. Server-side only reads `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` from the deployment's environment variables; never the user's browser. Returns the same JSON shape `window.claude.complete` would produce, so the in-page extractor doesn't care which path served it.
|
|
431
|
+
2. **Naive tokeniser** — pure-browser sentence-split + stopword-strip fallback when neither the artifact runtime nor the Pages Function path is available. Honest about its limits.
|
|
415
432
|
|
|
416
|
-
|
|
433
|
+
The demo's UI labels which path produced any given result (`extracted by Claude (artifact runtime)` / `extracted by Claude via Pages Function` / `extracted by naive tokeniser`).
|
|
417
434
|
|
|
418
435
|
### Roadmap — hybrid edge architecture (not shipped today)
|
|
419
436
|
|
|
@@ -425,9 +442,9 @@ These are vision items; the shipped today is the static artifact above.
|
|
|
425
442
|
|
|
426
443
|
---
|
|
427
444
|
|
|
428
|
-
## 🛡️ Verification:
|
|
445
|
+
## 🛡️ Verification: 1000+-Test Suite + 131 Cross-Runtime Fixtures
|
|
429
446
|
|
|
430
|
-
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests.
|
|
447
|
+
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests. With the `[dev]` extras installed (`pip install -e '.[sieve,dev]'` or `make install`), `pytest --collect-only` reports **1035 tests** at v0.3.0; the full-suite green count is **1027 passed** (the 8-test gap is spacy-dependent cases that skip when `en_core_web_sm` isn't downloaded into the venv). CI runs the full suite on every push via `.github/workflows/quantum-ci.yml`.
|
|
431
448
|
|
|
432
449
|
```text
|
|
433
450
|
Provable (deterministic code + tests that enforce the proof):
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sum-engine"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
"""SUM CLI entry point.
|
|
2
2
|
|
|
3
3
|
Subcommands:
|
|
4
|
-
sum attest
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
sum
|
|
8
|
-
sum
|
|
9
|
-
sum
|
|
10
|
-
sum
|
|
4
|
+
sum attest — stdin prose → CanonicalBundle JSON on stdout
|
|
5
|
+
(optionally HMAC + Ed25519 + per-triple ledger)
|
|
6
|
+
sum verify — bundle → exit 0 on match, 1 on signature/state mismatch
|
|
7
|
+
sum resolve — prov_id → ProvenanceRecord JSON (ledger lookup)
|
|
8
|
+
sum ledger — introspect a ledger: list | stats | head
|
|
9
|
+
sum inspect — structural read of a bundle (no crypto, no reconstruction)
|
|
10
|
+
sum schema — JSON Schema for bundle | provenance | credential
|
|
11
|
+
sum --version — print version string
|
|
12
|
+
sum --help — auto-generated usage
|
|
11
13
|
|
|
12
14
|
Design notes (read once, explained here so the code stays terse):
|
|
13
15
|
|
|
@@ -518,6 +520,354 @@ def cmd_resolve(args: argparse.Namespace) -> int:
|
|
|
518
520
|
return 0
|
|
519
521
|
|
|
520
522
|
|
|
523
|
+
# ─── ledger ──────────────────────────────────────────────────────────
|
|
524
|
+
#
|
|
525
|
+
# Agentic-first introspection for an AkashicLedger. An agent that wants to
|
|
526
|
+
# know "what's in this ledger?" without already having a prov_id in hand
|
|
527
|
+
# previously had no answer — resolve looked up by id, and no other command
|
|
528
|
+
# enumerated. These three subcommands fill that gap: list (NDJSON rows),
|
|
529
|
+
# stats (one-shot summary), head (current state integer per branch).
|
|
530
|
+
#
|
|
531
|
+
# Output discipline: one JSON object per line (NDJSON) for list, a single
|
|
532
|
+
# pretty-printed JSON object for stats and head. Agents can pipe list
|
|
533
|
+
# into jq / jsonl tooling; humans can read stats / head directly.
|
|
534
|
+
|
|
535
|
+
def _open_ledger_or_exit(db_path: str):
|
|
536
|
+
"""Sanity-check the ledger path. Exit 2 with a clear message if the
|
|
537
|
+
file does not exist — the ledger auto-creates tables on __init__,
|
|
538
|
+
so "opening" a nonexistent path would silently make a new empty
|
|
539
|
+
database, which is almost never what an agent introspecting a
|
|
540
|
+
ledger wants."""
|
|
541
|
+
from pathlib import Path
|
|
542
|
+
|
|
543
|
+
from sum_engine_internal.infrastructure.akashic_ledger import AkashicLedger
|
|
544
|
+
|
|
545
|
+
if not Path(db_path).exists():
|
|
546
|
+
print(
|
|
547
|
+
f"sum: ledger not found at {db_path}. "
|
|
548
|
+
f"Mint one with: sum attest --ledger {db_path} < prose.txt",
|
|
549
|
+
file=sys.stderr,
|
|
550
|
+
)
|
|
551
|
+
raise SystemExit(2)
|
|
552
|
+
return AkashicLedger(db_path=db_path)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def cmd_ledger_list(args: argparse.Namespace) -> int:
|
|
556
|
+
"""Enumerate prov_ids with their linked axiom_keys and evidence spans.
|
|
557
|
+
|
|
558
|
+
Output: NDJSON (one record per line). --limit caps output size;
|
|
559
|
+
--axiom filters to a single axiom_key; --since takes an ISO 8601
|
|
560
|
+
timestamp and emits only records at-or-after. Combined filters AND.
|
|
561
|
+
"""
|
|
562
|
+
import sqlite3
|
|
563
|
+
|
|
564
|
+
_open_ledger_or_exit(args.db)
|
|
565
|
+
# Direct SQL — the ledger does not expose a list-all method and
|
|
566
|
+
# adding one would force an async surface we do not need here.
|
|
567
|
+
# Two-table join: axiom_provenance (axiom_key → prov_id) +
|
|
568
|
+
# provenance_records (prov_id → record_json).
|
|
569
|
+
clauses = []
|
|
570
|
+
params: list[object] = []
|
|
571
|
+
if args.axiom:
|
|
572
|
+
clauses.append("ap.axiom_key = ?")
|
|
573
|
+
params.append(args.axiom)
|
|
574
|
+
where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
|
|
575
|
+
limit_clause = f" LIMIT {int(args.limit)}" if args.limit else ""
|
|
576
|
+
|
|
577
|
+
emitted = 0
|
|
578
|
+
with sqlite3.connect(args.db) as conn:
|
|
579
|
+
rows = conn.execute(
|
|
580
|
+
f"SELECT ap.prov_id, ap.axiom_key, pr.record_json "
|
|
581
|
+
f"FROM axiom_provenance AS ap "
|
|
582
|
+
f"JOIN provenance_records AS pr ON pr.prov_id = ap.prov_id"
|
|
583
|
+
f"{where}"
|
|
584
|
+
f" ORDER BY ap.prov_id{limit_clause}",
|
|
585
|
+
params,
|
|
586
|
+
).fetchall()
|
|
587
|
+
|
|
588
|
+
for prov_id, axiom_key, record_json in rows:
|
|
589
|
+
rec = json.loads(record_json)
|
|
590
|
+
if args.since and rec.get("timestamp", "") < args.since:
|
|
591
|
+
continue
|
|
592
|
+
out = {
|
|
593
|
+
"prov_id": prov_id,
|
|
594
|
+
"axiom_key": axiom_key,
|
|
595
|
+
"source_uri": rec.get("source_uri"),
|
|
596
|
+
"byte_start": rec.get("byte_start"),
|
|
597
|
+
"byte_end": rec.get("byte_end"),
|
|
598
|
+
"timestamp": rec.get("timestamp"),
|
|
599
|
+
"extractor_id": rec.get("extractor_id"),
|
|
600
|
+
}
|
|
601
|
+
json.dump(out, sys.stdout)
|
|
602
|
+
sys.stdout.write("\n")
|
|
603
|
+
emitted += 1
|
|
604
|
+
if args.verbose:
|
|
605
|
+
print(f"sum: emitted {emitted} record(s) from {args.db}", file=sys.stderr)
|
|
606
|
+
return 0
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def cmd_ledger_stats(args: argparse.Namespace) -> int:
|
|
610
|
+
"""Emit a one-shot summary of the ledger's state."""
|
|
611
|
+
import asyncio
|
|
612
|
+
import sqlite3
|
|
613
|
+
|
|
614
|
+
ledger = _open_ledger_or_exit(args.db)
|
|
615
|
+
|
|
616
|
+
with sqlite3.connect(args.db) as conn:
|
|
617
|
+
total_prov, = conn.execute(
|
|
618
|
+
"SELECT COUNT(*) FROM provenance_records"
|
|
619
|
+
).fetchone()
|
|
620
|
+
distinct_axioms, = conn.execute(
|
|
621
|
+
"SELECT COUNT(DISTINCT axiom_key) FROM axiom_provenance"
|
|
622
|
+
).fetchone()
|
|
623
|
+
# Pull min/max timestamp across the stored JSON; SQLite cannot
|
|
624
|
+
# index inside JSON blobs without a generated column, so we
|
|
625
|
+
# accept the scan — ledger introspection is not a hot path.
|
|
626
|
+
ts_rows = conn.execute(
|
|
627
|
+
"SELECT record_json FROM provenance_records"
|
|
628
|
+
).fetchall()
|
|
629
|
+
|
|
630
|
+
timestamps = [
|
|
631
|
+
json.loads(r[0]).get("timestamp", "") for r in ts_rows
|
|
632
|
+
if r[0]
|
|
633
|
+
]
|
|
634
|
+
timestamps = [t for t in timestamps if t]
|
|
635
|
+
|
|
636
|
+
chain_tip = asyncio.run(ledger.get_chain_tip())
|
|
637
|
+
branch_heads = asyncio.run(ledger.load_branch_heads())
|
|
638
|
+
# Size in digits, not the integer itself (branch heads get huge and
|
|
639
|
+
# an agent parsing JSON does not want a megabyte integer inline).
|
|
640
|
+
branches = {
|
|
641
|
+
name: {"state_integer_digits": len(str(state_int))}
|
|
642
|
+
for name, state_int in branch_heads.items()
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
out = {
|
|
646
|
+
"db_path": args.db,
|
|
647
|
+
"provenance_records_total": total_prov,
|
|
648
|
+
"distinct_axiom_keys": distinct_axioms,
|
|
649
|
+
"earliest_timestamp": min(timestamps) if timestamps else None,
|
|
650
|
+
"latest_timestamp": max(timestamps) if timestamps else None,
|
|
651
|
+
"chain_tip_hash": chain_tip,
|
|
652
|
+
"branches": branches,
|
|
653
|
+
}
|
|
654
|
+
json.dump(out, sys.stdout, indent=2 if args.pretty else None)
|
|
655
|
+
sys.stdout.write("\n")
|
|
656
|
+
return 0
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def cmd_ledger_head(args: argparse.Namespace) -> int:
|
|
660
|
+
"""Emit current state integer for one branch (if --branch) or all."""
|
|
661
|
+
import asyncio
|
|
662
|
+
|
|
663
|
+
ledger = _open_ledger_or_exit(args.db)
|
|
664
|
+
heads = asyncio.run(ledger.load_branch_heads())
|
|
665
|
+
if args.branch:
|
|
666
|
+
state = heads.get(args.branch)
|
|
667
|
+
if state is None:
|
|
668
|
+
print(
|
|
669
|
+
f"sum: branch {args.branch!r} not found in {args.db}. "
|
|
670
|
+
f"Known branches: {sorted(heads)}",
|
|
671
|
+
file=sys.stderr,
|
|
672
|
+
)
|
|
673
|
+
return 1
|
|
674
|
+
out = {"branch": args.branch, "state_integer": str(state)}
|
|
675
|
+
else:
|
|
676
|
+
# String-encode every branch's state integer to avoid JSON
|
|
677
|
+
# int-precision loss for agents whose parsers use doubles.
|
|
678
|
+
out = {
|
|
679
|
+
"branches": {
|
|
680
|
+
name: {"state_integer": str(state)}
|
|
681
|
+
for name, state in heads.items()
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
json.dump(out, sys.stdout, indent=2 if args.pretty else None)
|
|
685
|
+
sys.stdout.write("\n")
|
|
686
|
+
return 0
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
# ─── inspect ─────────────────────────────────────────────────────────
|
|
690
|
+
#
|
|
691
|
+
# Fast, crypto-free read of a bundle's structural shape. Answers "what
|
|
692
|
+
# does this bundle contain?" without running Ed25519 verification,
|
|
693
|
+
# re-deriving primes, or reconstructing the state integer — useful when
|
|
694
|
+
# an agent wants to route based on bundle attributes before deciding
|
|
695
|
+
# whether to pay the full verify cost.
|
|
696
|
+
|
|
697
|
+
def cmd_inspect(args: argparse.Namespace) -> int:
|
|
698
|
+
raw = _read_input(args.input)
|
|
699
|
+
try:
|
|
700
|
+
b = json.loads(raw)
|
|
701
|
+
except json.JSONDecodeError as e:
|
|
702
|
+
print(f"sum: bundle is not valid JSON: {e}", file=sys.stderr)
|
|
703
|
+
return 2
|
|
704
|
+
|
|
705
|
+
# Count axioms by parsing the canonical tome's line structure — same
|
|
706
|
+
# regex verify uses, but we do not build primes or check the state.
|
|
707
|
+
import re
|
|
708
|
+
line_re = re.compile(r"^The (\S+) (\S+) (.+)\.$")
|
|
709
|
+
tome = b.get("canonical_tome", "")
|
|
710
|
+
axiom_lines = [
|
|
711
|
+
line for line in tome.splitlines()
|
|
712
|
+
if line_re.match(line.strip())
|
|
713
|
+
]
|
|
714
|
+
|
|
715
|
+
state_str = b.get("state_integer", "")
|
|
716
|
+
signatures = {
|
|
717
|
+
"hmac_present": bool(b.get("signature")),
|
|
718
|
+
"ed25519_present": bool(b.get("public_signature") and b.get("public_key")),
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
out = {
|
|
722
|
+
"bundle_version": b.get("bundle_version"),
|
|
723
|
+
"canonical_format_version": b.get("canonical_format_version"),
|
|
724
|
+
"prime_scheme": b.get("prime_scheme", "sha256_64_v1"),
|
|
725
|
+
"branch": b.get("branch"),
|
|
726
|
+
"timestamp": b.get("timestamp"),
|
|
727
|
+
"is_delta": bool(b.get("is_delta", False)),
|
|
728
|
+
"axiom_count_claimed": b.get("axiom_count"),
|
|
729
|
+
"axiom_count_parsed": len(axiom_lines),
|
|
730
|
+
"state_integer_digits": len(state_str),
|
|
731
|
+
"signatures": signatures,
|
|
732
|
+
# Surface sum_cli sidecar if present — agents want to know
|
|
733
|
+
# whether a bundle carries provenance refs without digging.
|
|
734
|
+
"sum_cli": b.get("sum_cli"),
|
|
735
|
+
}
|
|
736
|
+
json.dump(out, sys.stdout, indent=2 if args.pretty else None)
|
|
737
|
+
sys.stdout.write("\n")
|
|
738
|
+
return 0
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
# ─── schema ──────────────────────────────────────────────────────────
|
|
742
|
+
#
|
|
743
|
+
# Print JSON Schema for each output shape. Agents validating SUM output
|
|
744
|
+
# programmatically previously had to reverse-engineer the shape from
|
|
745
|
+
# prose docs; these schemas are the ground truth.
|
|
746
|
+
|
|
747
|
+
_BUNDLE_SCHEMA = {
|
|
748
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
749
|
+
"$id": "https://github.com/OtotaO/SUM/schemas/canonical-bundle.json",
|
|
750
|
+
"title": "CanonicalBundle",
|
|
751
|
+
"description": "A self-contained, optionally-signed SUM knowledge transport unit.",
|
|
752
|
+
"type": "object",
|
|
753
|
+
"required": [
|
|
754
|
+
"bundle_version", "canonical_format_version", "branch",
|
|
755
|
+
"axiom_count", "canonical_tome", "state_integer", "timestamp",
|
|
756
|
+
],
|
|
757
|
+
"properties": {
|
|
758
|
+
"bundle_version": {"type": "string", "examples": ["1.1.0"]},
|
|
759
|
+
"canonical_format_version": {"type": "string", "examples": ["1.0.0"]},
|
|
760
|
+
"branch": {"type": "string"},
|
|
761
|
+
"axiom_count": {"type": "integer", "minimum": 0},
|
|
762
|
+
"canonical_tome": {
|
|
763
|
+
"type": "string",
|
|
764
|
+
"description": "Markdown-ish rendering with `The S P O.` lines, one axiom per line.",
|
|
765
|
+
},
|
|
766
|
+
"state_integer": {
|
|
767
|
+
"type": "string",
|
|
768
|
+
"description": "Decimal-encoded Gödel state integer (string, not number, to preserve precision).",
|
|
769
|
+
"pattern": "^[0-9]+$",
|
|
770
|
+
},
|
|
771
|
+
"state_integer_hex": {"type": "string", "pattern": "^0x[0-9a-f]+$"},
|
|
772
|
+
"timestamp": {"type": "string", "format": "date-time"},
|
|
773
|
+
"prime_scheme": {"type": "string", "enum": ["sha256_64_v1", "sha256_128_v2"]},
|
|
774
|
+
"is_delta": {"type": "boolean"},
|
|
775
|
+
"signature": {
|
|
776
|
+
"type": "string",
|
|
777
|
+
"pattern": "^hmac-sha256:[0-9a-f]{64}$",
|
|
778
|
+
"description": "Optional HMAC-SHA256 over {tome|state|timestamp}.",
|
|
779
|
+
},
|
|
780
|
+
"public_signature": {
|
|
781
|
+
"type": "string",
|
|
782
|
+
"pattern": "^ed25519:.+$",
|
|
783
|
+
"description": "Optional Ed25519 signature (base64) over {tome|state|timestamp}.",
|
|
784
|
+
},
|
|
785
|
+
"public_key": {
|
|
786
|
+
"type": "string",
|
|
787
|
+
"pattern": "^ed25519:.+$",
|
|
788
|
+
"description": "Optional embedded Ed25519 public key (base64).",
|
|
789
|
+
},
|
|
790
|
+
"sum_cli": {
|
|
791
|
+
"type": "object",
|
|
792
|
+
"description": "Non-normative sidecar from the sum CLI: extractor, source_uri, prov_ids, cli_version.",
|
|
793
|
+
"additionalProperties": True,
|
|
794
|
+
},
|
|
795
|
+
},
|
|
796
|
+
"additionalProperties": True,
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
_PROVENANCE_SCHEMA = {
|
|
800
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
801
|
+
"$id": "https://github.com/OtotaO/SUM/schemas/provenance-record.json",
|
|
802
|
+
"title": "ProvenanceRecord",
|
|
803
|
+
"description": "Byte-level evidence that a given axiom was extracted from a specific source span by a specific extractor.",
|
|
804
|
+
"type": "object",
|
|
805
|
+
"required": ["source_uri", "byte_start", "byte_end", "extractor_id", "timestamp", "text_excerpt"],
|
|
806
|
+
"properties": {
|
|
807
|
+
"source_uri": {
|
|
808
|
+
"type": "string",
|
|
809
|
+
"description": "sha256:<64-hex>, doi:, https://, or urn:sum:source:.",
|
|
810
|
+
},
|
|
811
|
+
"byte_start": {"type": "integer", "minimum": 0},
|
|
812
|
+
"byte_end": {"type": "integer", "minimum": 1},
|
|
813
|
+
"extractor_id": {"type": "string", "examples": ["sum.sieve:deterministic_v1"]},
|
|
814
|
+
"timestamp": {"type": "string", "format": "date-time"},
|
|
815
|
+
"text_excerpt": {"type": "string", "maxLength": 1024},
|
|
816
|
+
"schema_version": {"type": "string", "examples": ["1.0.0"]},
|
|
817
|
+
},
|
|
818
|
+
"additionalProperties": False,
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
_CREDENTIAL_SCHEMA = {
|
|
822
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
823
|
+
"$id": "https://github.com/OtotaO/SUM/schemas/verifiable-credential.json",
|
|
824
|
+
"title": "VerifiableCredential 2.0 (eddsa-jcs-2022)",
|
|
825
|
+
"description": "W3C VC 2.0 credential shape SUM emits via sign_credential. Not a complete VC 2.0 schema — just the subset SUM produces.",
|
|
826
|
+
"type": "object",
|
|
827
|
+
"required": ["@context", "type", "issuer", "credentialSubject", "proof"],
|
|
828
|
+
"properties": {
|
|
829
|
+
"@context": {"type": "array", "items": {"type": "string"}},
|
|
830
|
+
"type": {"type": "array", "items": {"type": "string"}},
|
|
831
|
+
"issuer": {"type": "string", "description": "did:key:, did:web:, or https://"},
|
|
832
|
+
"validFrom": {"type": "string", "format": "date-time"},
|
|
833
|
+
"credentialSubject": {"type": "object"},
|
|
834
|
+
"proof": {
|
|
835
|
+
"type": "object",
|
|
836
|
+
"required": ["type", "cryptosuite", "verificationMethod", "proofPurpose", "proofValue"],
|
|
837
|
+
"properties": {
|
|
838
|
+
"type": {"const": "DataIntegrityProof"},
|
|
839
|
+
"cryptosuite": {"const": "eddsa-jcs-2022"},
|
|
840
|
+
"verificationMethod": {"type": "string"},
|
|
841
|
+
"proofPurpose": {"const": "assertionMethod"},
|
|
842
|
+
"proofValue": {"type": "string", "description": "Multibase base58btc-encoded Ed25519 signature."},
|
|
843
|
+
"created": {"type": "string", "format": "date-time"},
|
|
844
|
+
},
|
|
845
|
+
},
|
|
846
|
+
},
|
|
847
|
+
"additionalProperties": True,
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
_SCHEMA_BY_NAME = {
|
|
851
|
+
"bundle": _BUNDLE_SCHEMA,
|
|
852
|
+
"provenance": _PROVENANCE_SCHEMA,
|
|
853
|
+
"credential": _CREDENTIAL_SCHEMA,
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def cmd_schema(args: argparse.Namespace) -> int:
|
|
858
|
+
schema = _SCHEMA_BY_NAME.get(args.shape)
|
|
859
|
+
if schema is None:
|
|
860
|
+
print(
|
|
861
|
+
f"sum: unknown schema {args.shape!r}. Known: "
|
|
862
|
+
f"{sorted(_SCHEMA_BY_NAME)}",
|
|
863
|
+
file=sys.stderr,
|
|
864
|
+
)
|
|
865
|
+
return 2
|
|
866
|
+
json.dump(schema, sys.stdout, indent=2)
|
|
867
|
+
sys.stdout.write("\n")
|
|
868
|
+
return 0
|
|
869
|
+
|
|
870
|
+
|
|
521
871
|
# ─── Argparse wiring ─────────────────────────────────────────────────
|
|
522
872
|
|
|
523
873
|
def build_parser() -> argparse.ArgumentParser:
|
|
@@ -531,14 +881,20 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
531
881
|
epilog=(
|
|
532
882
|
"Examples:\n"
|
|
533
883
|
" echo 'Alice likes cats.' | sum attest > bundle.json\n"
|
|
534
|
-
" sum verify < bundle.json
|
|
884
|
+
" sum verify < bundle.json # structural only\n"
|
|
535
885
|
" sum attest --ed25519-key keys/issuer.pem | sum verify --strict\n"
|
|
886
|
+
" sum attest --ledger akashic.db < prose.txt\n"
|
|
536
887
|
" sum resolve prov:abc123... --db akashic.db\n"
|
|
888
|
+
" sum ledger list --db akashic.db # NDJSON of prov_ids\n"
|
|
889
|
+
" sum ledger stats --db akashic.db --pretty\n"
|
|
890
|
+
" sum inspect bundle.json --pretty # no-crypto read\n"
|
|
891
|
+
" sum schema bundle # JSON Schema stdout\n"
|
|
537
892
|
"\n"
|
|
538
893
|
"Attestation layers (all optional, compose freely):\n"
|
|
539
894
|
" state integer — content-addressed integrity (always present)\n"
|
|
540
895
|
" --signing-key — HMAC-SHA256 for shared-secret peers\n"
|
|
541
896
|
" --ed25519-key — Ed25519 public-key attestation (W3C VC 2.0)\n"
|
|
897
|
+
" --ledger — per-triple byte-level ProvenanceRecords\n"
|
|
542
898
|
"\n"
|
|
543
899
|
"For the full feature catalog, see "
|
|
544
900
|
"https://github.com/OtotaO/SUM/blob/main/docs/FEATURE_CATALOG.md"
|
|
@@ -666,6 +1022,88 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
666
1022
|
p_resolve.add_argument("--db", default="akashic.db", help="Path to the SQLite ledger. Default: ./akashic.db.")
|
|
667
1023
|
p_resolve.set_defaults(func=cmd_resolve)
|
|
668
1024
|
|
|
1025
|
+
# ledger — introspect an AkashicLedger without a prov_id in hand.
|
|
1026
|
+
p_ledger = subparsers.add_parser(
|
|
1027
|
+
"ledger",
|
|
1028
|
+
help="Introspect an AkashicLedger (list prov_ids, stats, branch heads).",
|
|
1029
|
+
description=(
|
|
1030
|
+
"Agentic introspection for an AkashicLedger. Three subcommands: "
|
|
1031
|
+
"list (NDJSON rows, one per prov_id), stats (one-shot summary), "
|
|
1032
|
+
"head (current state integer per branch)."
|
|
1033
|
+
),
|
|
1034
|
+
)
|
|
1035
|
+
p_ledger_sub = p_ledger.add_subparsers(dest="ledger_cmd", required=True, metavar="<ledger-cmd>")
|
|
1036
|
+
|
|
1037
|
+
p_list = p_ledger_sub.add_parser(
|
|
1038
|
+
"list",
|
|
1039
|
+
help="Enumerate prov_ids in the ledger (NDJSON on stdout).",
|
|
1040
|
+
description=(
|
|
1041
|
+
"Emits one JSON object per line (NDJSON): {prov_id, axiom_key, "
|
|
1042
|
+
"source_uri, byte_start, byte_end, timestamp, extractor_id}. "
|
|
1043
|
+
"Filters compose with AND: --axiom (exact axiom_key match), "
|
|
1044
|
+
"--since (ISO 8601, record timestamp >= since), --limit (max rows)."
|
|
1045
|
+
),
|
|
1046
|
+
)
|
|
1047
|
+
p_list.add_argument("--db", default="akashic.db", help="SQLite ledger path. Default: ./akashic.db.")
|
|
1048
|
+
p_list.add_argument("--axiom", default=None, help="Filter to this exact axiom_key (e.g. 'alice||like||cat').")
|
|
1049
|
+
p_list.add_argument("--since", default=None, help="Only records with timestamp >= this ISO 8601 string.")
|
|
1050
|
+
p_list.add_argument("--limit", type=int, default=0, help="Max rows to emit (0 = unlimited).")
|
|
1051
|
+
p_list.add_argument("--verbose", "-v", action="store_true", help="Print row count on stderr.")
|
|
1052
|
+
p_list.set_defaults(func=cmd_ledger_list)
|
|
1053
|
+
|
|
1054
|
+
p_stats = p_ledger_sub.add_parser(
|
|
1055
|
+
"stats",
|
|
1056
|
+
help="One-shot summary of ledger state: totals, timestamp range, chain tip, branches.",
|
|
1057
|
+
)
|
|
1058
|
+
p_stats.add_argument("--db", default="akashic.db", help="SQLite ledger path. Default: ./akashic.db.")
|
|
1059
|
+
p_stats.add_argument("--pretty", action="store_true", help="Pretty-print JSON.")
|
|
1060
|
+
p_stats.set_defaults(func=cmd_ledger_stats)
|
|
1061
|
+
|
|
1062
|
+
p_head = p_ledger_sub.add_parser(
|
|
1063
|
+
"head",
|
|
1064
|
+
help="Current state integer for one branch (--branch) or all branches.",
|
|
1065
|
+
)
|
|
1066
|
+
p_head.add_argument("--db", default="akashic.db", help="SQLite ledger path. Default: ./akashic.db.")
|
|
1067
|
+
p_head.add_argument("--branch", default=None, help="Specific branch name. Default: all branches.")
|
|
1068
|
+
p_head.add_argument("--pretty", action="store_true", help="Pretty-print JSON.")
|
|
1069
|
+
p_head.set_defaults(func=cmd_ledger_head)
|
|
1070
|
+
|
|
1071
|
+
# inspect — structural read of a bundle, no crypto, no reconstruction.
|
|
1072
|
+
p_inspect = subparsers.add_parser(
|
|
1073
|
+
"inspect",
|
|
1074
|
+
help="Read bundle metadata without running verification (fast, offline).",
|
|
1075
|
+
description=(
|
|
1076
|
+
"Reads a CanonicalBundle JSON and emits structural metadata: "
|
|
1077
|
+
"axiom counts (claimed + parsed), state integer size in digits, "
|
|
1078
|
+
"signature fields present, bundle + format versions, timestamp, "
|
|
1079
|
+
"branch, sum_cli sidecar if present. Does NOT verify signatures "
|
|
1080
|
+
"or reconstruct the state integer — use `sum verify` for that. "
|
|
1081
|
+
"Useful when an agent wants to route a bundle by shape before "
|
|
1082
|
+
"paying the full verify cost."
|
|
1083
|
+
),
|
|
1084
|
+
)
|
|
1085
|
+
p_inspect.add_argument("--input", "-i", help="Read from this path instead of stdin ('-' for stdin).")
|
|
1086
|
+
p_inspect.add_argument("--pretty", action="store_true", help="Pretty-print JSON output.")
|
|
1087
|
+
p_inspect.set_defaults(func=cmd_inspect)
|
|
1088
|
+
|
|
1089
|
+
# schema — JSON Schema for each shape SUM emits.
|
|
1090
|
+
p_schema = subparsers.add_parser(
|
|
1091
|
+
"schema",
|
|
1092
|
+
help="Emit JSON Schema for one of SUM's output shapes.",
|
|
1093
|
+
description=(
|
|
1094
|
+
"Prints a JSON Schema (Draft 2020-12) for one of SUM's output "
|
|
1095
|
+
"shapes. Use this to validate agent output before trusting it. "
|
|
1096
|
+
"Shapes: bundle (CanonicalBundle), provenance (ProvenanceRecord), "
|
|
1097
|
+
"credential (W3C VC 2.0 eddsa-jcs-2022)."
|
|
1098
|
+
),
|
|
1099
|
+
)
|
|
1100
|
+
p_schema.add_argument(
|
|
1101
|
+
"shape",
|
|
1102
|
+
choices=["bundle", "provenance", "credential"],
|
|
1103
|
+
help="Which output shape to emit the schema for.",
|
|
1104
|
+
)
|
|
1105
|
+
p_schema.set_defaults(func=cmd_schema)
|
|
1106
|
+
|
|
669
1107
|
return parser
|
|
670
1108
|
|
|
671
1109
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sum-engine
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SUM — bidirectional knowledge distillation with optional cryptographic attestation. Pipe prose, get a CanonicalBundle (HMAC / Ed25519 / W3C VC 2.0), verify anywhere.
|
|
5
5
|
Author: ototao
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,7 +91,7 @@ Every headline number below is reproducible via `python -m scripts.bench.run_ben
|
|
|
91
91
|
| `record_provenance_batch` sustained throughput | **~22 k ops/sec** (10.2× the single-write path) | empirical-benchmark |
|
|
92
92
|
| Merkle-chain integrity under concurrent writers | holds (50–200-event bursts) | **provable** (post `9c4139d`) |
|
|
93
93
|
| Cross-runtime byte-identity fixtures | **131 / 131 passing** across Python ↔ Node.js ↔ Browser JS | empirical-benchmark |
|
|
94
|
-
| Test suite | **
|
|
94
|
+
| Test suite | **1035 collected** at v0.3.0, with `.[dev]` extras installed (8-test gap is spacy-skipped when `en_core_web_sm` isn't downloaded) | **provable** |
|
|
95
95
|
|
|
96
96
|
---
|
|
97
97
|
|
|
@@ -390,6 +390,20 @@ Open `http://localhost:8000` to access:
|
|
|
390
390
|
These are **roadmap items**, not current capabilities. Each is a concrete piece of work with a defined entry in `docs/PROOF_BOUNDARY.md` §3. They are listed in approximate order of prerequisite dependence.
|
|
391
391
|
|
|
392
392
|
### Shipped since the last README pass
|
|
393
|
+
|
|
394
|
+
#### v0.1.0 → v0.2.1 release wave (2026-04-22 → 2026-04-23)
|
|
395
|
+
|
|
396
|
+
- ✅ **`sum-engine` on PyPI.** `pip install sum-engine[sieve]` installs the `sum` CLI binary (subcommands `attest` / `verify` / `resolve`). Three releases: 0.1.0 first public release, 0.2.0 `internal/` → `sum_engine_internal/` namespace rename, 0.2.1 dynamic version-resolution fix (`importlib.metadata.version("sum-engine")`). See [`CHANGELOG.md`](CHANGELOG.md).
|
|
397
|
+
- ✅ **`sum verify` actually verifies signatures** (not just structural reconstruction) — HMAC-SHA256 with `--signing-key`, Ed25519 always when `public_signature` + `public_key` are present. JSON result carries `signatures: {hmac, ed25519}` with values in `{verified, skipped, absent, invalid}`. `--strict` mode requires at least one verifiable signature. 15 cases pinning every branch.
|
|
398
|
+
- ✅ **`sum attest --ed25519-key PEM`** — mints W3C VC 2.0 (`eddsa-jcs-2022`) compatible Ed25519-signed bundles. PEM produced by `python -m scripts.generate_did_web` (did:web bootstrap shipped in [`docs/DID_SETUP.md`](docs/DID_SETUP.md)). Bundles verifiable by any DIF-conformant verifier (Universal Resolver, Digital Bazaar, Spruce ssi, Veramo, Microsoft Entra, Mattr).
|
|
399
|
+
- ✅ **`sum attest --ledger DB`** closes the attest → resolve loop. Per-triple ProvenanceRecords (source URI, byte range, sentence excerpt, extractor ID) recorded into a SQLite AkashicLedger; resulting `prov_id`s attached to `bundle.sum_cli.prov_ids`. `sum resolve <prov_id> --db DB` walks axiom → source byte range.
|
|
400
|
+
- ✅ **Cross-runtime Ed25519 trust triangle.** Same bundle bytes verify identically in Python (`sum verify`), Node (`standalone_verifier/verify.js` via WebCrypto), and modern browsers (`single_file_demo/index.html` via SubtleCrypto — Chrome 113+, Firefox 129+, Safari 17+). Locked in CI by kill-experiments K3 (positive) + K4 (tampered-bundle rejection); the harness also runs K1 + K1-multiword + K2.
|
|
401
|
+
- ✅ **HMAC is now optional** in `CanonicalCodec`. The old `sum-default-key` placebo default is gone (it was a publicly-known shared secret — cryptographic theater). `signing_key=None` emits no `signature` field; downgrade-protection preserved when a key IS configured.
|
|
402
|
+
- ✅ **PORTFOLIO.md + CLAUDE.md contract.** `PORTFOLIO.md` at repo root is the body of [sumequities.com/projects/sum](https://github.com/OtotaO/SUMequities) — verified-claims-only, every metric row labelled `**proved**` or `**empirical-benchmark**`. CI job `portfolio-contract` (blocking) + warn-only pre-commit hook enforce the contract.
|
|
403
|
+
- ✅ **CI gates landed.** `cross-runtime-harness` runs K1-K4 every PR (Node 22). `pypi-install-smoke` builds the wheel and runs `echo prose | sum attest | sum verify` in a fresh venv every push. `publish-pypi.yml` handles OIDC-authenticated PyPI releases on tag pushes.
|
|
404
|
+
|
|
405
|
+
#### Earlier (substrate work)
|
|
406
|
+
|
|
393
407
|
- ✅ **Per-doc logging in the regeneration runner** (commit `02b4413`) — `RegenerationMetrics.per_doc` names the specific (s, p, o) triples that failed entailment so the aggregate FActScore gap is debuggable at the generator-prompt layer.
|
|
394
408
|
- ✅ **LLM narrative full round-trip runner** (commit `9fd232d`, first measurement `2c252f0`) — composes `LiveLLMAdapter.extract_triplets → generate_text → extract_triplets`, reports per-doc drift. Measured on `seed_v1`: 107.75 % drift / 0.12 exact-match recall. See PROOF_BOUNDARY §2.5.
|
|
395
409
|
- ✅ **W3C Verifiable Credentials 2.0 emission + verification** (commit `e007f94`) — pure-Python `eddsa-jcs-2022` Data Integrity path at `sum_engine_internal/infrastructure/verifiable_credential.py` + RFC 8785 JCS at `sum_engine_internal/infrastructure/jcs.py`. 58 tests. Bundles consumable by any VC-compliant ecosystem.
|
|
@@ -453,11 +467,14 @@ npx wrangler pages deploy single_file_demo --project-name sum-demo
|
|
|
453
467
|
|
|
454
468
|
No environment variables required. No KV / R2 / D1 attached. The demo is 100 % client-side.
|
|
455
469
|
|
|
456
|
-
###
|
|
470
|
+
### Hosted-demo LLM proxy (Cloudflare Pages Function — shipped)
|
|
471
|
+
|
|
472
|
+
The demo upgrades extraction to LLM-grade *automatically* when pasted into a Claude artifact (commit `e5e57b6` — `window.claude.complete` is detected at runtime). On a plain Cloudflare Pages URL without a Claude account, extraction has two fallbacks, in order:
|
|
457
473
|
|
|
458
|
-
|
|
474
|
+
1. **Pages Function** at [`single_file_demo/functions/api/complete.ts`](single_file_demo/functions/api/complete.ts) — proxies Anthropic (preferred) or OpenAI through the optional Cloudflare AI Gateway. Server-side only reads `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` from the deployment's environment variables; never the user's browser. Returns the same JSON shape `window.claude.complete` would produce, so the in-page extractor doesn't care which path served it.
|
|
475
|
+
2. **Naive tokeniser** — pure-browser sentence-split + stopword-strip fallback when neither the artifact runtime nor the Pages Function path is available. Honest about its limits.
|
|
459
476
|
|
|
460
|
-
|
|
477
|
+
The demo's UI labels which path produced any given result (`extracted by Claude (artifact runtime)` / `extracted by Claude via Pages Function` / `extracted by naive tokeniser`).
|
|
461
478
|
|
|
462
479
|
### Roadmap — hybrid edge architecture (not shipped today)
|
|
463
480
|
|
|
@@ -469,9 +486,9 @@ These are vision items; the shipped today is the static artifact above.
|
|
|
469
486
|
|
|
470
487
|
---
|
|
471
488
|
|
|
472
|
-
## 🛡️ Verification:
|
|
489
|
+
## 🛡️ Verification: 1000+-Test Suite + 131 Cross-Runtime Fixtures
|
|
473
490
|
|
|
474
|
-
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests.
|
|
491
|
+
The test suite covers both proven invariants and empirically-measured properties; each assertion is scoped to the epistemic status of the thing it tests. With the `[dev]` extras installed (`pip install -e '.[sieve,dev]'` or `make install`), `pytest --collect-only` reports **1035 tests** at v0.3.0; the full-suite green count is **1027 passed** (the 8-test gap is spacy-dependent cases that skip when `en_core_web_sm` isn't downloaded into the venv). CI runs the full suite on every push via `.github/workflows/quantum-ci.yml`.
|
|
475
492
|
|
|
476
493
|
```text
|
|
477
494
|
Provable (deterministic code + tests that enforce the proof):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sum_engine-0.2.1 → sum_engine-0.3.0}/sum_engine_internal/infrastructure/verifiable_credential.py
RENAMED
|
File without changes
|
|
File without changes
|