sum-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. internal/__init__.py +8 -0
  2. internal/algorithms/__init__.py +1 -0
  3. internal/algorithms/causal_discovery.py +96 -0
  4. internal/algorithms/predicate_canon.py +137 -0
  5. internal/algorithms/semantic_arithmetic.py +890 -0
  6. internal/algorithms/syntactic_sieve.py +452 -0
  7. internal/algorithms/zk_semantics.py +90 -0
  8. internal/ensemble/__init__.py +1 -0
  9. internal/ensemble/automated_scientist.py +138 -0
  10. internal/ensemble/autonomous_agent.py +157 -0
  11. internal/ensemble/causal_triggers.py +121 -0
  12. internal/ensemble/confidence_calibrator.py +284 -0
  13. internal/ensemble/epistemic_arbiter.py +159 -0
  14. internal/ensemble/epistemic_loop.py +136 -0
  15. internal/ensemble/extraction_validator.py +172 -0
  16. internal/ensemble/gauge_orchestrator.py +150 -0
  17. internal/ensemble/live_llm_adapter.py +183 -0
  18. internal/ensemble/llm_entailment.py +117 -0
  19. internal/ensemble/mass_semantic_engine.py +138 -0
  20. internal/ensemble/ouroboros.py +281 -0
  21. internal/ensemble/semantic_dedup.py +261 -0
  22. internal/ensemble/tome_generator.py +286 -0
  23. internal/ensemble/tome_sliders.py +104 -0
  24. internal/ensemble/vector_bridge.py +195 -0
  25. internal/ensemble/venn_abers.py +211 -0
  26. internal/infrastructure/__init__.py +1 -0
  27. internal/infrastructure/akashic_ledger.py +812 -0
  28. internal/infrastructure/canonical_codec.py +452 -0
  29. internal/infrastructure/jcs.py +115 -0
  30. internal/infrastructure/key_manager.py +239 -0
  31. internal/infrastructure/p2p_mesh.py +168 -0
  32. internal/infrastructure/prov_o.py +159 -0
  33. internal/infrastructure/provenance.py +181 -0
  34. internal/infrastructure/rate_limiter.py +81 -0
  35. internal/infrastructure/resource_guards.py +117 -0
  36. internal/infrastructure/scheme_registry.py +136 -0
  37. internal/infrastructure/state_encoding.py +94 -0
  38. internal/infrastructure/telemetry.py +91 -0
  39. internal/infrastructure/tome_parser.py +55 -0
  40. internal/infrastructure/verifiable_credential.py +412 -0
  41. internal/infrastructure/zig_bridge.py +256 -0
  42. sum_cli/__init__.py +18 -0
  43. sum_cli/main.py +688 -0
  44. sum_engine-0.1.0.dist-info/METADATA +590 -0
  45. sum_engine-0.1.0.dist-info/RECORD +49 -0
  46. sum_engine-0.1.0.dist-info/WHEEL +5 -0
  47. sum_engine-0.1.0.dist-info/entry_points.txt +2 -0
  48. sum_engine-0.1.0.dist-info/licenses/LICENSE +201 -0
  49. sum_engine-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,452 @@
1
+ """
2
+ Deterministic Syntactic Sieve — High-Fidelity Edge NLP
3
+
4
+ Extracts topological (Subject, Predicate, Object) triplets using strict
5
+ grammatical dependency parsing via spaCy. Replaces the LLM for bulk
6
+ ingestion, parsing text at bare-metal CPU speeds.
7
+
8
+ Cost: $0. Speed: 10,000+ words per second. Deterministic: always.
9
+
10
+ Phase 13: Zenith of Process Intensification.
11
+ Stage 4 — Hedging detection for linguistic confidence signals.
12
+
13
+ Author: ototao
14
+ License: Apache License 2.0
15
+ """
16
+
17
+ import re
18
+ from datetime import datetime, timezone
19
+ from typing import Dict, List, Optional, Tuple
20
+
21
+ from internal.infrastructure.provenance import (
22
+ EXCERPT_MAX_CHARS,
23
+ ProvenanceRecord,
24
+ sha256_uri_for_text,
25
+ )
26
+
27
+ SIEVE_EXTRACTOR_ID = "sum.sieve:deterministic_v1"
28
+
29
+
30
+ # ─── Hedging / Epistemic Markers ──────────────────────────────────────
31
+ # Words and phrases that indicate uncertainty in the source text.
32
+ # Presence reduces confidence at the linguistic level.
33
+
34
+ HEDGING_MARKERS = [
35
+ # Modal verbs of uncertainty
36
+ re.compile(r"\b(may|might|could|would)\b", re.IGNORECASE),
37
+ # Epistemic adverbs
38
+ re.compile(r"\b(possibly|probably|perhaps|likely|unlikely|apparently|"
39
+ r"allegedly|purportedly|supposedly|seemingly|arguably|"
40
+ r"conceivably|presumably|ostensibly)\b", re.IGNORECASE),
41
+ # Hedging verbs
42
+ re.compile(r"\b(suggest|imply|indicate|appear|seem|tend|believe|"
43
+ r"estimate|speculate|hypothesize|propose|conjecture)\b",
44
+ re.IGNORECASE),
45
+ # Hedging phrases
46
+ re.compile(r"\b(it is (thought|believed|estimated|assumed)|"
47
+ r"according to( some)?|some (researchers|scientists|experts)|"
48
+ r"there is (some )?evidence|in (some|certain) cases|"
49
+ r"not (entirely |fully )?clear)\b", re.IGNORECASE),
50
+ ]
51
+
52
+ # Each matched marker reduces certainty by this factor
53
+ HEDGE_PENALTY_PER_MARKER = 0.15
54
+ HEDGE_FLOOR = 0.20 # minimum confidence from hedging alone
55
+
56
+
57
+ _FALLBACK_CONTENT_POS = frozenset({"NOUN", "PROPN", "VERB", "ADJ"})
58
+
59
+
60
+ def _is_negated(sent) -> bool:
61
+ """Return True iff the sentence contains a negation particle scoping the
62
+ main predication.
63
+
64
+ spaCy tags ``not``, ``n't``, ``never`` (and similar) as ``dep_ == "neg"``
65
+ attached to the ROOT verb or copular AUX. When a negation is present,
66
+ the SVO structure still parses — but its semantic polarity is inverted
67
+ relative to what the bare triple would assert. Emitting a positive
68
+ (s, p, o) from a negated source sentence is worse than emitting nothing:
69
+ it silently ships a false assertion into the Gödel state with no
70
+ surface marker that the original sentence denied it.
71
+
72
+ The hedging detector (``detect_hedging``) handles the weaker modal
73
+ class (``may``, ``might``, ``possibly``) by lowering a certainty score.
74
+ Negation is not uncertainty — it is an inversion — so the correct
75
+ response is to refuse extraction, not to annotate it.
76
+
77
+ Scope: any ``dep_ == "neg"`` anywhere in the sentence triggers suppression.
78
+ This is intentionally aggressive: a doubly-negated sentence is ambiguous
79
+ under SUM's SVO frame, and false negatives (missing a triple) are
80
+ strictly preferable to false positives (asserting an inverted fact).
81
+ """
82
+ for token in sent:
83
+ if token.dep_ == "neg":
84
+ return True
85
+ return False
86
+
87
+
88
+ def _is_passive(sent) -> bool:
89
+ """Return True iff the sentence's ROOT verb carries a passive-voice
90
+ grammatical subject (``dep_ == "nsubjpass"``).
91
+
92
+ A passive construction inverts the surface order: the grammatical
93
+ subject is the semantic OBJECT, and the semantic subject (if
94
+ recoverable) lives inside the agent prepositional phrase — spaCy
95
+ tags ``by`` with ``dep_ == "agent"`` and the agent noun as a
96
+ ``pobj`` child of the ``by`` token. Emitting a triple in surface
97
+ (s,p,o) order from such a sentence produces the inverted fact —
98
+ "Hamlet was written by Shakespeare" → (hamlet, write, shakespeare)
99
+ which asserts the opposite of the source. The POS fallback is
100
+ especially dangerous here because for three-content-token passives
101
+ (e.g. "Hamlet/written/Shakespeare") it produces the inverted
102
+ triple even when the dep-based path bails out. Callers that detect
103
+ passive should either run the swap-and-emit path below
104
+ (``_extract_passive``) or refuse to extract at all.
105
+ """
106
+ for child in sent.root.children:
107
+ if child.dep_ == "nsubjpass":
108
+ return True
109
+ return False
110
+
111
+
112
+ def _extract_passive(sent) -> Optional[Tuple[str, str, str]]:
113
+ """Extract an active-form triple from a passive-voice sentence.
114
+
115
+ Strategy (works for both "Hamlet was written by Shakespeare" and
116
+ any other ``nsubjpass + agent-by-pobj`` surface):
117
+
118
+ real subject = the pobj under the agent ``by`` (semantic agent)
119
+ real object = the nsubjpass noun (semantic patient)
120
+ predicate = ROOT verb's lemma
121
+
122
+ If the passive is agentless ("The paper was submitted."), the
123
+ agent is grammatically absent and the semantic subject cannot be
124
+ recovered — return None. This is the same discipline as negation:
125
+ refusing to extract is strictly preferable to asserting an
126
+ inverted fact.
127
+ """
128
+ root = sent.root
129
+ subj_token = None
130
+ obj_token = None
131
+ for child in root.children:
132
+ if child.dep_ == "nsubjpass" and obj_token is None:
133
+ obj_token = child
134
+ elif child.dep_ == "agent":
135
+ for grandchild in child.children:
136
+ if grandchild.dep_ == "pobj":
137
+ subj_token = grandchild
138
+ break
139
+ if subj_token is None or obj_token is None:
140
+ return None
141
+
142
+ subj_modifiers = [
143
+ c.text for c in subj_token.children
144
+ if c.dep_ in ("amod", "compound")
145
+ ]
146
+ subject = "_".join(subj_modifiers + [subj_token.lemma_]).strip()
147
+ obj_modifiers = [
148
+ c.text for c in obj_token.children
149
+ if c.dep_ in ("amod", "compound")
150
+ ]
151
+ object_ = " ".join(obj_modifiers + [obj_token.lemma_]).strip()
152
+ predicate = root.lemma_
153
+
154
+ if not (subject and predicate and object_):
155
+ return None
156
+ if len(subject.split("_")) > 5 or len(object_.split()) > 8:
157
+ return None
158
+ return (subject.lower(), predicate.lower(), object_.lower())
159
+
160
+
161
+ def _extract_from_sent(sent) -> Optional[Tuple[str, str, str]]:
162
+ """Extract at most one (subject, predicate, object) triple from a sentence.
163
+
164
+ Returns None if the sentence is negated, produces no valid ROOT verb, or
165
+ yields a parse whose subject/object exceed the size filters. The POS
166
+ fallback is consulted only when dependency-based extraction fails.
167
+
168
+ This helper is the single source of truth for per-sentence extraction.
169
+ ``extract_triplets`` and ``extract_with_provenance`` both call it, so
170
+ their outputs remain triple-for-triple identical — the provenance path
171
+ just adds metadata around the same extraction decisions.
172
+ """
173
+ if _is_negated(sent):
174
+ return None
175
+
176
+ # Passive voice inverts surface (s,p,o) order. Handle it with a
177
+ # dedicated extractor that swaps the agent phrase's pobj into the
178
+ # subject position and the nsubjpass into the object position. An
179
+ # agentless passive ("The paper was submitted.") cannot recover
180
+ # its semantic subject, so _extract_passive returns None and the
181
+ # sentence is suppressed — the POS fallback is skipped because its
182
+ # left-to-right heuristic would re-emit the inverted triple for
183
+ # three-content-token passives.
184
+ if _is_passive(sent):
185
+ return _extract_passive(sent)
186
+
187
+ subject = None
188
+ predicate = None
189
+ object_ = None
190
+
191
+ for token in sent:
192
+ if token.dep_ == "ROOT" or token.pos_ == "VERB":
193
+ predicate = token.lemma_
194
+ # Compound modifiers are joined with '_' for subject (not space)
195
+ # so multi-word subjects satisfy the canonical template's "\S+"
196
+ # parser in OuroborosVerifier. Object keeps space-joining because
197
+ # the canonical regex for object is ".+" and accommodates spaces.
198
+ for child in token.children:
199
+ if child.dep_ in ("nsubj", "nsubjpass", "csubj", "npadvmod"):
200
+ modifiers = [
201
+ c.text for c in child.children
202
+ if c.dep_ in ("amod", "compound")
203
+ ]
204
+ subject = "_".join(modifiers + [child.lemma_]).strip()
205
+ for child in token.children:
206
+ if child.dep_ in ("dobj", "pobj", "attr", "acomp"):
207
+ modifiers = [
208
+ c.text for c in child.children
209
+ if c.dep_ in ("amod", "compound")
210
+ ]
211
+ object_ = " ".join(modifiers + [child.lemma_]).strip()
212
+
213
+ if subject and predicate and object_:
214
+ if len(subject.split()) <= 5 and len(object_.split()) <= 8:
215
+ return (subject.lower(), predicate.lower(), object_.lower())
216
+
217
+ return _pos_fallback_triplet(sent)
218
+
219
+
220
+ def _pos_fallback_triplet(sent):
221
+ """POS-based fallback extraction for sentences the dep parser misparses.
222
+
223
+ Activates only when dep-based extraction yielded nothing for the sentence.
224
+ Strategy: if the sentence contains EXACTLY three content tokens
225
+ (NOUN / PROPN / VERB / ADJ — excluding DET / AUX / ADV / ADP / PUNCT / PART),
226
+ emit them in order as (subject, predicate, object).
227
+
228
+ This targets the known spaCy en_core_web_sm failure mode on sentences
229
+ like "Dogs chase cats" where the verb is mis-tagged as NOUN and the
230
+ ROOT is shifted to the object noun. Conservative: the exact three-content
231
+ rule refuses to fire on sentences with adverbial modifiers, adjectives
232
+ stacking on the object, passive-voice auxiliaries, or prepositional
233
+ phrases — all of which the dep-based path handles correctly.
234
+
235
+ Returns (subject_lemma, predicate_lemma, object_lemma) all lowercased,
236
+ or None if the pattern does not match.
237
+ """
238
+ content = [t for t in sent if t.pos_ in _FALLBACK_CONTENT_POS]
239
+ if len(content) != 3:
240
+ return None
241
+ s, p, o = content
242
+ if not (p.lemma_.isalpha() and 1 < len(p.lemma_) <= 20):
243
+ return None
244
+
245
+ # When spaCy mis-tags a plural noun as ADJ (e.g. "Dogs" in "Dogs chase
246
+ # cats"), the token lemma preserves the plural form. Reverse the
247
+ # common -s plural so the canonical key matches the expected singular.
248
+ s_lemma = s.lemma_.lower()
249
+ if (
250
+ s.tag_.startswith("JJ")
251
+ and s_lemma.endswith("s")
252
+ and len(s_lemma) > 2
253
+ and s_lemma[:-1].isalpha()
254
+ ):
255
+ s_lemma = s_lemma[:-1]
256
+
257
+ return (s_lemma, p.lemma_.lower(), o.lemma_.lower())
258
+
259
+
260
+ def detect_hedging(text: str) -> float:
261
+ """Score the linguistic certainty of a text.
262
+
263
+ Returns a value in [HEDGE_FLOOR, 1.0] where 1.0 means no hedging
264
+ detected and lower values indicate increasing uncertainty.
265
+
266
+ This is a metadata-only signal — it does NOT affect the algebra.
267
+ """
268
+ if not text:
269
+ return 1.0
270
+
271
+ hit_count = 0
272
+ for pattern in HEDGING_MARKERS:
273
+ hits = pattern.findall(text)
274
+ hit_count += len(hits)
275
+
276
+ if hit_count == 0:
277
+ return 1.0
278
+
279
+ certainty = 1.0 - (hit_count * HEDGE_PENALTY_PER_MARKER)
280
+ return max(HEDGE_FLOOR, certainty)
281
+
282
+
283
+ class DeterministicSieve:
284
+ """
285
+ High-Fidelity Edge NLP.
286
+
287
+ Extracts topological (Subject, Predicate, Object) triplets using
288
+ strict grammatical dependency parsing.
289
+
290
+ Cost: $0. Speed: 10,000+ words per second.
291
+ """
292
+
293
+ def __init__(self):
294
+ import spacy # Lazy import: only required when sieve is instantiated
295
+
296
+ try:
297
+ self.nlp = spacy.load("en_core_web_sm")
298
+ except OSError:
299
+ import subprocess
300
+ import sys
301
+
302
+ # CRITICAL: route spaCy's download progress to stderr so it does
303
+ # not contaminate the CLI's stdout. `sum attest > bundle.json`
304
+ # must emit nothing but the CanonicalBundle JSON; the CI's
305
+ # pip-install smoke test catches this regression. Announcing the
306
+ # fallback on stderr is also more honest than silent auto-install.
307
+ print(
308
+ "sum: spaCy model 'en_core_web_sm' missing; downloading "
309
+ "(~50 MB, one-time)…",
310
+ file=sys.stderr,
311
+ )
312
+ subprocess.check_call(
313
+ [sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
314
+ stdout=sys.stderr,
315
+ )
316
+ self.nlp = spacy.load("en_core_web_sm")
317
+
318
+ def extract_triplets(self, text: str) -> List[Tuple[str, str, str]]:
319
+ """
320
+ Parse text into semantic triplets using dependency grammar.
321
+
322
+ Walks each sentence's dependency tree to find the ROOT verb,
323
+ then extracts its nominal subject and direct/prepositional
324
+ object, including adjectival and compound modifiers.
325
+
326
+ Args:
327
+ text: Raw text to parse.
328
+
329
+ Returns:
330
+ Deduplicated list of (subject, predicate, object) tuples.
331
+ """
332
+ doc = self.nlp(text)
333
+ triplets = []
334
+ for sent in doc.sents:
335
+ triple = _extract_from_sent(sent)
336
+ if triple is not None:
337
+ triplets.append(triple)
338
+ return list(set(triplets)) # Deduplicate
339
+
340
+ def extract_with_provenance(
341
+ self,
342
+ text: str,
343
+ source_uri: Optional[str] = None,
344
+ timestamp: Optional[str] = None,
345
+ ) -> List[Tuple[Tuple[str, str, str], ProvenanceRecord]]:
346
+ """Extract (s, p, o) triples paired with per-sentence ProvenanceRecords.
347
+
348
+ Each returned record locates the originating sentence's byte range in
349
+ ``source_uri``'s bytes, names the extractor version, and carries a
350
+ literal text excerpt (up to EXCERPT_MAX_CHARS) so third-party auditors
351
+ can validate the claim without refetching the source.
352
+
353
+ Args:
354
+ text: Input text. Also becomes the content-addressable
355
+ source if ``source_uri`` is omitted.
356
+ source_uri: Optional override. Defaults to ``sha256:<hex>`` of
357
+ ``text``'s UTF-8 bytes, which makes the byte
358
+ ranges self-consistent and third-party-verifiable
359
+ without any network dependency.
360
+ timestamp: Optional ISO-8601 UTC timestamp. Defaults to
361
+ ``datetime.now(timezone.utc).isoformat()``.
362
+
363
+ Returns:
364
+ List of ``((s, p, o), ProvenanceRecord)`` pairs — NOT deduplicated
365
+ at the triple level. Two sentences producing the same triple yield
366
+ two records with different byte ranges and different prov_ids.
367
+ The AkashicLedger is the dedup boundary, not this method.
368
+ """
369
+ src = source_uri or sha256_uri_for_text(text)
370
+ ts = timestamp or datetime.now(timezone.utc).isoformat()
371
+ doc = self.nlp(text)
372
+ out: List[Tuple[Tuple[str, str, str], ProvenanceRecord]] = []
373
+ for sent in doc.sents:
374
+ triple = _extract_from_sent(sent)
375
+ if triple is None:
376
+ continue
377
+ # spaCy's sent.start_char / end_char are character offsets in
378
+ # the original text; convert to byte offsets in the UTF-8
379
+ # representation so the byte_range is correct for any consumer
380
+ # that stores bytes, not Python strings.
381
+ byte_start = len(text[: sent.start_char].encode("utf-8"))
382
+ byte_end = len(text[: sent.end_char].encode("utf-8"))
383
+ excerpt = sent.text[:EXCERPT_MAX_CHARS]
384
+ record = ProvenanceRecord(
385
+ source_uri=src,
386
+ byte_start=byte_start,
387
+ byte_end=byte_end,
388
+ extractor_id=SIEVE_EXTRACTOR_ID,
389
+ timestamp=ts,
390
+ text_excerpt=excerpt,
391
+ )
392
+ out.append((triple, record))
393
+ return out
394
+
395
+ def extract_annotated_triplets(
396
+ self, text: str
397
+ ) -> List[Dict[str, object]]:
398
+ """Extract triplets with per-sentence hedging annotation.
399
+
400
+ Returns a list of dicts:
401
+ {
402
+ "subject": str,
403
+ "predicate": str,
404
+ "object": str,
405
+ "linguistic_certainty": float, # 1.0 = definite, <1.0 = hedged
406
+ }
407
+
408
+ The linguistic_certainty score is a metadata-only signal
409
+ that does NOT affect the Gödel algebra.
410
+ """
411
+ doc = self.nlp(text)
412
+ results = []
413
+
414
+ for sent in doc.sents:
415
+ # Negated sentences produce no triple — see _is_negated.
416
+ if _is_negated(sent):
417
+ continue
418
+
419
+ subject = None
420
+ predicate = None
421
+ object_ = None
422
+
423
+ for token in sent:
424
+ if token.dep_ == "ROOT" or token.pos_ == "VERB":
425
+ predicate = token.lemma_
426
+ for child in token.children:
427
+ if child.dep_ in ("nsubj", "nsubjpass", "csubj", "npadvmod"):
428
+ modifiers = [
429
+ c.text for c in child.children
430
+ if c.dep_ in ("amod", "compound")
431
+ ]
432
+ # '_'-joined to satisfy canonical "\S+" subject invariant.
433
+ subject = "_".join(modifiers + [child.lemma_]).strip()
434
+ for child in token.children:
435
+ if child.dep_ in ("dobj", "pobj", "attr", "acomp"):
436
+ modifiers = [
437
+ c.text for c in child.children
438
+ if c.dep_ in ("amod", "compound")
439
+ ]
440
+ object_ = " ".join(modifiers + [child.lemma_]).strip()
441
+
442
+ if subject and predicate and object_:
443
+ if len(subject.split()) <= 5 and len(object_.split()) <= 8:
444
+ certainty = detect_hedging(sent.text)
445
+ results.append({
446
+ "subject": subject.lower(),
447
+ "predicate": predicate.lower(),
448
+ "object": object_.lower(),
449
+ "linguistic_certainty": certainty,
450
+ })
451
+
452
+ return results
@@ -0,0 +1,90 @@
1
+ """
2
+ Zero-Knowledge Semantic Proofs
3
+
4
+ Implements cryptographic commitments over Gödel-State entailment:
5
+ a node can mathematically prove it knows a specific axiom (= prime
6
+ factor of its global state) **without revealing the full state integer**.
7
+
8
+ Protocol:
9
+ 1. Prover computes Q = State // prime (the co-factor).
10
+ 2. Prover generates a random salt and publishes
11
+ Commitment = SHA-256( Q || salt ).
12
+ 3. Verifier receives (commitment, salt, Q, prime) and re-hashes to
13
+ confirm the prover genuinely held the factor.
14
+
15
+ This is a simplified Pedersen-style commitment scheme optimised for
16
+ the Gödel integer domain. It is non-interactive.
17
+
18
+ Author: ototao
19
+ License: Apache License 2.0
20
+ """
21
+
22
+ import hashlib
23
+ import os
24
+
25
+
26
+ class ZKSemanticProver:
27
+ """
28
+ Zero-Knowledge proofs for Gödel State Entailment.
29
+
30
+ Proves ``State % prime == 0`` without revealing the full State
31
+ integer, using a salted hash commitment over the quotient.
32
+ """
33
+
34
+ @staticmethod
35
+ def generate_proof(global_state: int, prime: int) -> dict:
36
+ """
37
+ Generate a ZK proof that ``global_state`` contains ``prime``.
38
+
39
+ Args:
40
+ global_state: The full Gödel BigInt.
41
+ prime: The semantic prime to prove knowledge of.
42
+
43
+ Returns:
44
+ A proof dict with ``commitment``, ``salt``, ``prime``,
45
+ and ``quotient`` (as a string for BigInt JSON safety).
46
+
47
+ Raises:
48
+ ValueError: If the state does not actually entail the prime.
49
+ """
50
+ if global_state % prime != 0:
51
+ raise ValueError("State does not entail this prime.")
52
+
53
+ quotient = global_state // prime
54
+ salt = os.urandom(16).hex()
55
+
56
+ # Commitment = Hash(Quotient || Salt)
57
+ commitment = hashlib.sha256(
58
+ f"{quotient}:{salt}".encode()
59
+ ).hexdigest()
60
+
61
+ return {
62
+ "commitment": commitment,
63
+ "salt": salt,
64
+ "prime": prime,
65
+ "quotient": str(quotient),
66
+ }
67
+
68
+ @staticmethod
69
+ def verify_proof(proof: dict) -> bool:
70
+ """
71
+ Verify a ZK semantic proof by re-computing the commitment.
72
+
73
+ The verifier checks that SHA-256(Q || salt) matches the
74
+ published commitment, confirming the prover genuinely held
75
+ a state divisible by the claimed prime.
76
+
77
+ Args:
78
+ proof: Dict with ``commitment``, ``salt``, ``quotient``.
79
+
80
+ Returns:
81
+ True if the commitment is valid.
82
+ """
83
+ q = int(proof["quotient"])
84
+ salt = proof["salt"]
85
+
86
+ expected = hashlib.sha256(
87
+ f"{q}:{salt}".encode()
88
+ ).hexdigest()
89
+
90
+ return expected == proof["commitment"]
@@ -0,0 +1 @@
1
+ """Tome generation + LLM adapter + orchestration modules."""
@@ -0,0 +1,138 @@
1
+ """
2
+ Automated Scientist Daemon — Machine Synthesis
3
+
4
+ Background asyncio task that continuously evaluates the Gödel Integer,
5
+ discovers novel topological relationships via transitive closure,
6
+ batch-mints primes via the Zig C-ABI, and permanently expands the
7
+ system's intelligence.
8
+
9
+ Every discovery is logged to the Akashic Ledger as a ``DEDUCED`` event,
10
+ providing a complete provenance trail of machine-generated knowledge.
11
+
12
+ Author: ototao
13
+ License: Apache License 2.0
14
+ """
15
+
16
+ import math
17
+ import asyncio
18
+ import logging
19
+ from typing import Optional
20
+
21
+ from internal.algorithms.causal_discovery import CausalDiscoveryEngine
22
+
23
+ logger = logging.getLogger("sum.scientist")
24
+
25
+
26
+ class AutomatedScientistDaemon:
27
+ """
28
+ Horizon V: The Dreaming Machine.
29
+
30
+ Runs in the background, sweeping the global Gödel state for
31
+ logically entailed but unminted axioms. Uses Zig FFI batch
32
+ minting when available.
33
+ """
34
+
35
+ def __init__(self, kos_instance, interval_seconds: int = 15):
36
+ self.kos = kos_instance
37
+ self.interval = interval_seconds
38
+ self.discovery_engine = CausalDiscoveryEngine(self.kos.algebra)
39
+ self.running = False
40
+ self.total_discoveries = 0
41
+
42
+ async def start_dreaming(self):
43
+ """Begin the autonomous deduction loop."""
44
+ self.running = True
45
+
46
+ # Broadcast startup
47
+ try:
48
+ from internal.ensemble.epistemic_arbiter import kos_telemetry
49
+ await kos_telemetry.broadcast(
50
+ "🔬 Automated Scientist initialized. Awaiting REM sleep cycles..."
51
+ )
52
+ except Exception:
53
+ pass
54
+
55
+ while self.running:
56
+ await asyncio.sleep(self.interval)
57
+ try:
58
+ await self._dream_cycle()
59
+ except Exception as e:
60
+ logger.error("Dream cycle error: %s", e)
61
+
62
+ async def stop(self):
63
+ """Graceful shutdown."""
64
+ self.running = False
65
+
66
+ async def _dream_cycle(self):
67
+ """
68
+ Single deduction sweep:
69
+ 1. Get current state
70
+ 2. Discover novel transitive relationships
71
+ 3. Batch-mint primes (Zig if available, Python fallback)
72
+ 4. LCM into global state
73
+ 5. Log to Akashic Ledger
74
+ """
75
+ current_state = self.kos.branches.get("main", 1)
76
+ if current_state == 1:
77
+ return # Nothing to analyze
78
+
79
+ novel_triplets = self.discovery_engine.sweep_for_discoveries(current_state)
80
+ if not novel_triplets:
81
+ return
82
+
83
+ # Broadcast discovery
84
+ try:
85
+ from internal.ensemble.epistemic_arbiter import kos_telemetry
86
+ await kos_telemetry.broadcast(
87
+ f"🧠 EUREKA! Synthesizing {len(novel_triplets)} novel discoveries..."
88
+ )
89
+ except Exception:
90
+ pass
91
+
92
+ axiom_strings = [f"{s}||{p}||{o}" for s, p, o in novel_triplets]
93
+
94
+ # Batch mint — Zig bare-metal if available
95
+ try:
96
+ from internal.infrastructure.zig_bridge import zig_engine
97
+ if zig_engine and hasattr(zig_engine, 'batch_mint_primes'):
98
+ new_primes = zig_engine.batch_mint_primes(axiom_strings)
99
+ else:
100
+ new_primes = None
101
+ except ImportError:
102
+ new_primes = None
103
+
104
+ if new_primes is None:
105
+ new_primes = [
106
+ self.kos.algebra.get_or_mint_prime(s, p, o)
107
+ for s, p, o in novel_triplets
108
+ ]
109
+
110
+ async with self.kos.branch_lock("main"):
111
+ current_state = self.kos.branches.get("main", 1)
112
+ new_state = current_state
113
+ for prime, axiom_str in zip(new_primes, axiom_strings):
114
+ new_state = math.lcm(new_state, prime)
115
+ await self.kos.ledger.append_event("DEDUCED", prime, axiom_str)
116
+ await self.kos.ledger.append_event("MUL", prime)
117
+
118
+ self.kos.branches["main"] = new_state
119
+ # 19D coherence: daemon mutation must update index
120
+ if hasattr(self.kos, 'prime_index'):
121
+ self.kos.prime_index.rebuild("main", new_state, self.kos.algebra)
122
+ self.kos.prime_index.assert_coherent(
123
+ "main", new_state, self.kos.algebra, context="automated_scientist"
124
+ )
125
+ self.total_discoveries += len(novel_triplets)
126
+
127
+ logger.info(
128
+ "Scientist: %d new discoveries (total: %d)",
129
+ len(novel_triplets), self.total_discoveries
130
+ )
131
+
132
+ try:
133
+ from internal.ensemble.epistemic_arbiter import kos_telemetry
134
+ await kos_telemetry.broadcast(
135
+ f"🧬 State multiplied. Total autonomous discoveries: {self.total_discoveries}"
136
+ )
137
+ except Exception:
138
+ pass