sum-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- internal/__init__.py +8 -0
- internal/algorithms/__init__.py +1 -0
- internal/algorithms/causal_discovery.py +96 -0
- internal/algorithms/predicate_canon.py +137 -0
- internal/algorithms/semantic_arithmetic.py +890 -0
- internal/algorithms/syntactic_sieve.py +452 -0
- internal/algorithms/zk_semantics.py +90 -0
- internal/ensemble/__init__.py +1 -0
- internal/ensemble/automated_scientist.py +138 -0
- internal/ensemble/autonomous_agent.py +157 -0
- internal/ensemble/causal_triggers.py +121 -0
- internal/ensemble/confidence_calibrator.py +284 -0
- internal/ensemble/epistemic_arbiter.py +159 -0
- internal/ensemble/epistemic_loop.py +136 -0
- internal/ensemble/extraction_validator.py +172 -0
- internal/ensemble/gauge_orchestrator.py +150 -0
- internal/ensemble/live_llm_adapter.py +183 -0
- internal/ensemble/llm_entailment.py +117 -0
- internal/ensemble/mass_semantic_engine.py +138 -0
- internal/ensemble/ouroboros.py +281 -0
- internal/ensemble/semantic_dedup.py +261 -0
- internal/ensemble/tome_generator.py +286 -0
- internal/ensemble/tome_sliders.py +104 -0
- internal/ensemble/vector_bridge.py +195 -0
- internal/ensemble/venn_abers.py +211 -0
- internal/infrastructure/__init__.py +1 -0
- internal/infrastructure/akashic_ledger.py +812 -0
- internal/infrastructure/canonical_codec.py +452 -0
- internal/infrastructure/jcs.py +115 -0
- internal/infrastructure/key_manager.py +239 -0
- internal/infrastructure/p2p_mesh.py +168 -0
- internal/infrastructure/prov_o.py +159 -0
- internal/infrastructure/provenance.py +181 -0
- internal/infrastructure/rate_limiter.py +81 -0
- internal/infrastructure/resource_guards.py +117 -0
- internal/infrastructure/scheme_registry.py +136 -0
- internal/infrastructure/state_encoding.py +94 -0
- internal/infrastructure/telemetry.py +91 -0
- internal/infrastructure/tome_parser.py +55 -0
- internal/infrastructure/verifiable_credential.py +412 -0
- internal/infrastructure/zig_bridge.py +256 -0
- sum_cli/__init__.py +18 -0
- sum_cli/main.py +688 -0
- sum_engine-0.1.0.dist-info/METADATA +590 -0
- sum_engine-0.1.0.dist-info/RECORD +49 -0
- sum_engine-0.1.0.dist-info/WHEEL +5 -0
- sum_engine-0.1.0.dist-info/entry_points.txt +2 -0
- sum_engine-0.1.0.dist-info/licenses/LICENSE +201 -0
- sum_engine-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Autoregressive Tome Generator — Lossless Semantic Rehydration
|
|
3
|
+
|
|
4
|
+
Unpacks a Gödel Integer into structured knowledge with two operating modes:
|
|
5
|
+
|
|
6
|
+
**Proof Mode (Canonical):**
|
|
7
|
+
Deterministic, template-based rendering of every active axiom grouped
|
|
8
|
+
by subject entity. This is the layer on which round-trip conservation
|
|
9
|
+
is mathematically verified. Works without any LLM.
|
|
10
|
+
|
|
11
|
+
**Narrative Mode (Optional):**
|
|
12
|
+
If an ``extrapolator`` (QuantumExtrapolator) is available, each
|
|
13
|
+
chapter's axioms are expanded into readable prose via the epistemic
|
|
14
|
+
loop. The canonical appendix is preserved so the proof path is
|
|
15
|
+
never lost.
|
|
16
|
+
|
|
17
|
+
Phase 14: The Ouroboros Protocol.
|
|
18
|
+
|
|
19
|
+
Author: ototao
|
|
20
|
+
License: Apache License 2.0
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from typing import Dict, List, Optional
|
|
25
|
+
|
|
26
|
+
from internal.algorithms.semantic_arithmetic import GodelStateAlgebra
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# The canonical format version — treat this as an ABI contract.
|
|
31
|
+
# Bump when the template grammar changes.
|
|
32
|
+
CANONICAL_FORMAT_VERSION = "1.0.0"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AutoregressiveTomeGenerator:
|
|
36
|
+
"""
|
|
37
|
+
Unpacks a Gödel Integer into a complete, structured Tome.
|
|
38
|
+
|
|
39
|
+
Supports two rendering modes:
|
|
40
|
+
* ``proof`` — deterministic canonical output (always available)
|
|
41
|
+
* ``narrative`` — LLM-enhanced prose (requires extrapolator)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
algebra: GodelStateAlgebra,
|
|
47
|
+
extrapolator=None,
|
|
48
|
+
):
|
|
49
|
+
self.algebra = algebra
|
|
50
|
+
self.extrapolator = extrapolator # None in math-only mode
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------
|
|
53
|
+
# Core: cluster active axioms by subject
|
|
54
|
+
# ------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
def extract_active_axioms(self, target_state: int) -> List[str]:
|
|
57
|
+
"""Return all axiom keys whose primes divide the target state."""
|
|
58
|
+
active = []
|
|
59
|
+
for prime, axiom in self.algebra.prime_to_axiom.items():
|
|
60
|
+
if target_state % prime == 0:
|
|
61
|
+
active.append(axiom)
|
|
62
|
+
return active
|
|
63
|
+
|
|
64
|
+
def cluster_by_subject(
|
|
65
|
+
self, target_state: int
|
|
66
|
+
) -> Dict[str, List[str]]:
|
|
67
|
+
"""Group active axioms by their subject entity."""
|
|
68
|
+
chapters: Dict[str, List[str]] = {}
|
|
69
|
+
for axiom in self.extract_active_axioms(target_state):
|
|
70
|
+
parts = axiom.split("||")
|
|
71
|
+
if len(parts) == 3:
|
|
72
|
+
subject = parts[0].strip()
|
|
73
|
+
if subject not in chapters:
|
|
74
|
+
chapters[subject] = []
|
|
75
|
+
chapters[subject].append(axiom)
|
|
76
|
+
return chapters
|
|
77
|
+
|
|
78
|
+
# ------------------------------------------------------------------
|
|
79
|
+
# Proof Mode: deterministic canonical rendering
|
|
80
|
+
# ------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
def generate_canonical(
|
|
83
|
+
self, target_state: int, title: str = "Canonical Tome"
|
|
84
|
+
) -> str:
|
|
85
|
+
"""
|
|
86
|
+
Deterministic, template-based rendering.
|
|
87
|
+
|
|
88
|
+
Every active axiom is emitted in a canonical ``subject PREDICATE
|
|
89
|
+
object`` format, grouped by subject and sorted lexicographically.
|
|
90
|
+
This output is guaranteed to round-trip through the Sieve and
|
|
91
|
+
produce the same Gödel Integer.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
target_state: The Gödel integer to unpack.
|
|
95
|
+
title: Human-readable title for the Tome.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
A structured text representation of the integer's knowledge.
|
|
99
|
+
"""
|
|
100
|
+
chapters = self.cluster_by_subject(target_state)
|
|
101
|
+
|
|
102
|
+
if not chapters:
|
|
103
|
+
return f"@canonical_version: {CANONICAL_FORMAT_VERSION}\n# {title}\n\nThe Gödel State is empty (= 1). No axioms to rehydrate."
|
|
104
|
+
|
|
105
|
+
lines = [f"@canonical_version: {CANONICAL_FORMAT_VERSION}", f"# {title}", ""]
|
|
106
|
+
|
|
107
|
+
# Sort subjects lexicographically for determinism
|
|
108
|
+
for subject in sorted(chapters.keys()):
|
|
109
|
+
axioms = sorted(chapters[subject]) # Sort axioms too
|
|
110
|
+
lines.append(f"## {subject.title()}")
|
|
111
|
+
lines.append("")
|
|
112
|
+
for axiom in axioms:
|
|
113
|
+
parts = axiom.split("||")
|
|
114
|
+
if len(parts) == 3:
|
|
115
|
+
s, p, o = parts
|
|
116
|
+
# Canonical sentence: "The {subject} {predicate} {object}."
|
|
117
|
+
lines.append(f"The {s} {p} {o}.")
|
|
118
|
+
lines.append("")
|
|
119
|
+
|
|
120
|
+
return "\n".join(lines)
|
|
121
|
+
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
# Narrative Mode: LLM-enhanced prose with canonical fallback
|
|
124
|
+
# ------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
async def generate_narrative(
|
|
127
|
+
self, target_state: int, title: str = "The Quantum Tome"
|
|
128
|
+
) -> str:
|
|
129
|
+
"""
|
|
130
|
+
LLM-enhanced rendering with canonical fallback.
|
|
131
|
+
|
|
132
|
+
If an extrapolator is available, each chapter's axioms are
|
|
133
|
+
expanded into readable prose. If the extrapolator is absent
|
|
134
|
+
or fails, falls back to canonical rendering.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
target_state: The Gödel integer to unpack.
|
|
138
|
+
title: Human-readable title for the Tome.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A narrative text representation.
|
|
142
|
+
"""
|
|
143
|
+
chapters = self.cluster_by_subject(target_state)
|
|
144
|
+
|
|
145
|
+
if not chapters:
|
|
146
|
+
return f"# {title}\n\nThe Gödel State is empty (= 1). No axioms to rehydrate."
|
|
147
|
+
|
|
148
|
+
lines = [f"# {title}", ""]
|
|
149
|
+
|
|
150
|
+
for subject in sorted(chapters.keys()):
|
|
151
|
+
axioms = sorted(chapters[subject])
|
|
152
|
+
lines.append(f"## {subject.title()}")
|
|
153
|
+
lines.append("")
|
|
154
|
+
|
|
155
|
+
if self.extrapolator is not None:
|
|
156
|
+
try:
|
|
157
|
+
narrative = await self.extrapolator.extrapolate_with_proof(
|
|
158
|
+
target_state, axioms
|
|
159
|
+
)
|
|
160
|
+
lines.append(narrative)
|
|
161
|
+
except RuntimeError:
|
|
162
|
+
# Fallback: canonical rendering
|
|
163
|
+
for axiom in axioms:
|
|
164
|
+
parts = axiom.split("||")
|
|
165
|
+
if len(parts) == 3:
|
|
166
|
+
lines.append(f"The {parts[0]} {parts[1]} {parts[2]}.")
|
|
167
|
+
else:
|
|
168
|
+
# No LLM — canonical rendering
|
|
169
|
+
for axiom in axioms:
|
|
170
|
+
parts = axiom.split("||")
|
|
171
|
+
if len(parts) == 3:
|
|
172
|
+
lines.append(f"The {parts[0]} {parts[1]} {parts[2]}.")
|
|
173
|
+
|
|
174
|
+
lines.append("")
|
|
175
|
+
|
|
176
|
+
return "\n".join(lines)
|
|
177
|
+
|
|
178
|
+
# ------------------------------------------------------------------
|
|
179
|
+
# Controlled rendering — TomeSliders (founder's dream surface)
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
def generate_controlled(
|
|
183
|
+
self,
|
|
184
|
+
target_state: int,
|
|
185
|
+
sliders: "Optional[object]" = None,
|
|
186
|
+
title: str = "Controlled Tome",
|
|
187
|
+
) -> str:
|
|
188
|
+
"""Parameterized canonical rendering under TomeSliders control.
|
|
189
|
+
|
|
190
|
+
The density slider is actioned here: axioms are deterministically
|
|
191
|
+
subsetted by lexicographic order before canonical rendering. The
|
|
192
|
+
remaining sliders (length, formality, audience, perspective) are
|
|
193
|
+
LLM-gated; their values are captured in the output header so a
|
|
194
|
+
downstream narrative generator can honour them while the
|
|
195
|
+
deterministic path remains unchanged.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
target_state: The Gödel integer to unpack.
|
|
199
|
+
sliders: A TomeSliders instance. Defaults to all-balanced
|
|
200
|
+
(equivalent to generate_canonical).
|
|
201
|
+
title: Human-readable title.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
A canonical-format tome reflecting the density slider. Output
|
|
205
|
+
includes the slider header so the rendering is reproducible.
|
|
206
|
+
"""
|
|
207
|
+
from internal.ensemble.tome_sliders import (
|
|
208
|
+
TomeSliders,
|
|
209
|
+
apply_density,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
cfg = sliders if sliders is not None else TomeSliders()
|
|
213
|
+
if not isinstance(cfg, TomeSliders):
|
|
214
|
+
raise TypeError(
|
|
215
|
+
"sliders must be a TomeSliders instance or None"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
active_axioms = self.extract_active_axioms(target_state)
|
|
219
|
+
selected = apply_density(active_axioms, cfg.density)
|
|
220
|
+
|
|
221
|
+
header = [
|
|
222
|
+
f"@canonical_version: {CANONICAL_FORMAT_VERSION}",
|
|
223
|
+
cfg.header_line(),
|
|
224
|
+
f"# {title}",
|
|
225
|
+
"",
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
if not selected:
|
|
229
|
+
header.append(
|
|
230
|
+
f"No axioms survive at density={cfg.density:.3f} "
|
|
231
|
+
f"(source had {len(active_axioms)})."
|
|
232
|
+
)
|
|
233
|
+
return "\n".join(header)
|
|
234
|
+
|
|
235
|
+
chapters: Dict[str, List[str]] = {}
|
|
236
|
+
for axiom in selected:
|
|
237
|
+
parts = axiom.split("||")
|
|
238
|
+
if len(parts) == 3:
|
|
239
|
+
subject = parts[0].strip()
|
|
240
|
+
chapters.setdefault(subject, []).append(axiom)
|
|
241
|
+
|
|
242
|
+
lines = list(header)
|
|
243
|
+
for subject in sorted(chapters.keys()):
|
|
244
|
+
axioms = sorted(chapters[subject])
|
|
245
|
+
lines.append(f"## {subject.title()}")
|
|
246
|
+
lines.append("")
|
|
247
|
+
for axiom in axioms:
|
|
248
|
+
parts = axiom.split("||")
|
|
249
|
+
if len(parts) == 3:
|
|
250
|
+
s, p, o = parts
|
|
251
|
+
lines.append(f"The {s} {p} {o}.")
|
|
252
|
+
lines.append("")
|
|
253
|
+
|
|
254
|
+
if cfg.requires_extrapolator() and self.extrapolator is None:
|
|
255
|
+
logger.info(
|
|
256
|
+
"TomeSliders non-density axes set but no extrapolator present; "
|
|
257
|
+
"non-density sliders are recorded in the header as metadata only."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
return "\n".join(lines)
|
|
261
|
+
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
# Unified entry point
|
|
264
|
+
# ------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
async def generate_tome(
|
|
267
|
+
self,
|
|
268
|
+
target_state: int,
|
|
269
|
+
title: str = "The Quantum Tome",
|
|
270
|
+
mode: str = "proof",
|
|
271
|
+
) -> str:
|
|
272
|
+
"""
|
|
273
|
+
Generate a Tome from a Gödel Integer.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
target_state: The integer to unpack.
|
|
277
|
+
title: Tome title.
|
|
278
|
+
mode: ``"proof"`` for deterministic canonical output,
|
|
279
|
+
``"narrative"`` for LLM-enhanced prose.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
The generated Tome text.
|
|
283
|
+
"""
|
|
284
|
+
if mode == "narrative":
|
|
285
|
+
return await self.generate_narrative(target_state, title)
|
|
286
|
+
return self.generate_canonical(target_state, title)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tome Sliders — Control Parameters for Bidirectional Knowledge Rendering
|
|
3
|
+
|
|
4
|
+
The founder's core dream made concrete: tunable knobs for the tag→tome
|
|
5
|
+
direction. Every SUM rendering takes a TomeSliders record in [0.0, 1.0]^5
|
|
6
|
+
and produces output conformant to those controls.
|
|
7
|
+
|
|
8
|
+
Implementation status (as of module authoring):
|
|
9
|
+
density — implemented on the deterministic canonical path
|
|
10
|
+
(axiom subsetting via lexicographic ordering)
|
|
11
|
+
length — LLM-gated; no-op without an extrapolator
|
|
12
|
+
formality — LLM-gated
|
|
13
|
+
audience — LLM-gated
|
|
14
|
+
perspective — LLM-gated
|
|
15
|
+
|
|
16
|
+
Slider values are captured in the output artefact's header so the same
|
|
17
|
+
narrative can be regenerated with adjusted parameters and the difference
|
|
18
|
+
audited.
|
|
19
|
+
|
|
20
|
+
Author: ototao
|
|
21
|
+
License: Apache License 2.0
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from typing import Sequence
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class TomeSliders:
|
|
31
|
+
"""Slider configuration for controlled tome rendering.
|
|
32
|
+
|
|
33
|
+
All values in [0.0, 1.0]. Defaults preserve lossless / balanced behavior.
|
|
34
|
+
|
|
35
|
+
- density: 0.0 = empty, 1.0 = full axiom coverage
|
|
36
|
+
- length: 0.0 = telegraphic, 1.0 = expansive
|
|
37
|
+
- formality: 0.0 = casual, 1.0 = academic
|
|
38
|
+
- audience: 0.0 = novice, 1.0 = expert / jargon-dense
|
|
39
|
+
- perspective: 0.0 = first-person, 1.0 = omniscient / third-person
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
density: float = 1.0
|
|
43
|
+
length: float = 0.5
|
|
44
|
+
formality: float = 0.5
|
|
45
|
+
audience: float = 0.5
|
|
46
|
+
perspective: float = 0.5
|
|
47
|
+
|
|
48
|
+
def __post_init__(self) -> None:
|
|
49
|
+
for name in (
|
|
50
|
+
"density",
|
|
51
|
+
"length",
|
|
52
|
+
"formality",
|
|
53
|
+
"audience",
|
|
54
|
+
"perspective",
|
|
55
|
+
):
|
|
56
|
+
v = getattr(self, name)
|
|
57
|
+
if not (0.0 <= v <= 1.0):
|
|
58
|
+
raise ValueError(f"{name} out of [0, 1]: {v}")
|
|
59
|
+
|
|
60
|
+
def requires_extrapolator(self) -> bool:
|
|
61
|
+
"""True if any slider besides density deviates from its balanced
|
|
62
|
+
default (0.5), meaning an LLM extrapolator is needed to honour it.
|
|
63
|
+
The canonical deterministic path can only action the density slider.
|
|
64
|
+
"""
|
|
65
|
+
return not (
|
|
66
|
+
self.length == 0.5
|
|
67
|
+
and self.formality == 0.5
|
|
68
|
+
and self.audience == 0.5
|
|
69
|
+
and self.perspective == 0.5
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def header_line(self) -> str:
|
|
73
|
+
"""Single-line serialization for canonical tome headers."""
|
|
74
|
+
return (
|
|
75
|
+
f"@sliders: density={self.density:.3f} "
|
|
76
|
+
f"length={self.length:.3f} "
|
|
77
|
+
f"formality={self.formality:.3f} "
|
|
78
|
+
f"audience={self.audience:.3f} "
|
|
79
|
+
f"perspective={self.perspective:.3f}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def apply_density(
|
|
84
|
+
axiom_keys: Sequence[str], density: float
|
|
85
|
+
) -> list[str]:
|
|
86
|
+
"""Deterministic axiom subsetting by density.
|
|
87
|
+
|
|
88
|
+
Sorts axiom keys lexicographically and keeps the first floor(N * density)
|
|
89
|
+
entries. Deterministic across runs and machines. Empty list when density
|
|
90
|
+
rounds to zero; full sorted list when density >= 1.0.
|
|
91
|
+
|
|
92
|
+
The lexicographic ordering ensures stability: running the same state
|
|
93
|
+
through the same density on two hosts produces identical subsets.
|
|
94
|
+
"""
|
|
95
|
+
if not axiom_keys:
|
|
96
|
+
return []
|
|
97
|
+
if density >= 1.0:
|
|
98
|
+
return sorted(axiom_keys)
|
|
99
|
+
if density <= 0.0:
|
|
100
|
+
return []
|
|
101
|
+
sorted_keys = sorted(axiom_keys)
|
|
102
|
+
n = len(sorted_keys)
|
|
103
|
+
k = int(n * density)
|
|
104
|
+
return sorted_keys[:k]
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Continuous-Discrete Bridge — Vector Space ↔ Gödel Space
|
|
3
|
+
|
|
4
|
+
Maps the discrete world of Gödel prime integers to the continuous world
|
|
5
|
+
of LLM embedding vectors, enabling semantic (fuzzy) search over a
|
|
6
|
+
mathematically exact state.
|
|
7
|
+
|
|
8
|
+
Architecture:
|
|
9
|
+
- Every minted prime gets a vector embedding of its natural-language
|
|
10
|
+
axiom text.
|
|
11
|
+
- ``semantic_search_godel_state`` filters to primes *alive* in the
|
|
12
|
+
current state (``global_state % prime == 0``) before ranking by
|
|
13
|
+
cosine similarity.
|
|
14
|
+
- Deleted axioms are automatically excluded because their primes no
|
|
15
|
+
longer divide the state.
|
|
16
|
+
|
|
17
|
+
Horizon III — Universal Vector Alignment:
|
|
18
|
+
- Supports optional affine transformation matrices (W*, b*) from
|
|
19
|
+
Gorbett & Jana (2026) for cross-model linear alignment.
|
|
20
|
+
- Heterogeneous P2P nodes (Llama, Qwen, Mistral, etc.) can perfectly
|
|
21
|
+
align their embeddings into a single Canonical Geometry before
|
|
22
|
+
discrete prime extraction via O(1) linear affine maps.
|
|
23
|
+
|
|
24
|
+
Author: ototao
|
|
25
|
+
License: Apache License 2.0
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from typing import Callable, Awaitable, List, Dict, Optional, Tuple
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
|
|
33
|
+
from internal.algorithms.semantic_arithmetic import GodelStateAlgebra
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ContinuousDiscreteBridge:
|
|
39
|
+
"""
|
|
40
|
+
Connects the absolute mathematical certainty of Gödel integers
|
|
41
|
+
with the fuzzy, semantic search capabilities of LLM Embeddings.
|
|
42
|
+
|
|
43
|
+
The bridge maintains a mapping from each Semantic Prime to its
|
|
44
|
+
vector embedding. Queries are projected into the same space and
|
|
45
|
+
ranked by cosine similarity — but *only* against primes that are
|
|
46
|
+
currently alive in the global state.
|
|
47
|
+
|
|
48
|
+
Horizon III:
|
|
49
|
+
When ``affine_map`` (W*) and optional ``bias_map`` (b*) are
|
|
50
|
+
provided, all embeddings are translated into the Canonical
|
|
51
|
+
Geometry via an O(1) linear affine transformation before
|
|
52
|
+
indexing and search. This allows heterogeneous LLM nodes
|
|
53
|
+
in a P2P swarm to perfectly align their latent spaces.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
algebra: GodelStateAlgebra,
|
|
59
|
+
embedding_model: Callable[[str], Awaitable[List[float]]],
|
|
60
|
+
affine_map: Optional[np.ndarray] = None,
|
|
61
|
+
bias_map: Optional[np.ndarray] = None,
|
|
62
|
+
):
|
|
63
|
+
"""
|
|
64
|
+
Args:
|
|
65
|
+
algebra: A GodelStateAlgebra instance with minted primes.
|
|
66
|
+
embedding_model: Async callable (text) → List[float] vector.
|
|
67
|
+
affine_map: Optional W* matrix (d×d) for cross-model alignment.
|
|
68
|
+
bias_map: Optional b* bias vector (d,) for cross-model alignment.
|
|
69
|
+
"""
|
|
70
|
+
self.algebra = algebra
|
|
71
|
+
self.get_embedding = embedding_model
|
|
72
|
+
self.prime_embeddings: Dict[int, np.ndarray] = {}
|
|
73
|
+
|
|
74
|
+
# W* and b* matrices from Gorbett & Jana (2026) for cross-model
|
|
75
|
+
# linear alignment into the Canonical Geometry.
|
|
76
|
+
self.affine_map = affine_map
|
|
77
|
+
self.bias_map = bias_map
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
# Affine alignment
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
def _align_vector(self, vector: np.ndarray) -> np.ndarray:
|
|
84
|
+
"""
|
|
85
|
+
Applies O(1) Linear Alignment to translate heterogeneous latent
|
|
86
|
+
spaces into the Canonical Geometry.
|
|
87
|
+
|
|
88
|
+
If no affine map is configured, returns the input unchanged.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
vector: Raw embedding vector (np.ndarray, float32).
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Aligned (and re-normalised) vector.
|
|
95
|
+
"""
|
|
96
|
+
if self.affine_map is None:
|
|
97
|
+
return vector
|
|
98
|
+
|
|
99
|
+
v = np.dot(vector, self.affine_map)
|
|
100
|
+
if self.bias_map is not None:
|
|
101
|
+
v = v + self.bias_map
|
|
102
|
+
|
|
103
|
+
# Re-normalise to unit length for cosine similarity
|
|
104
|
+
norm = np.linalg.norm(v)
|
|
105
|
+
if norm > 0:
|
|
106
|
+
v = v / norm
|
|
107
|
+
|
|
108
|
+
return v.astype(np.float32)
|
|
109
|
+
|
|
110
|
+
# ------------------------------------------------------------------
|
|
111
|
+
# Indexing
|
|
112
|
+
# ------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
async def index_new_primes(self) -> int:
|
|
115
|
+
"""
|
|
116
|
+
Generate embeddings for any un-indexed Semantic Primes.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Number of newly indexed primes.
|
|
120
|
+
"""
|
|
121
|
+
newly_indexed = 0
|
|
122
|
+
|
|
123
|
+
for prime, axiom in self.algebra.prime_to_axiom.items():
|
|
124
|
+
if prime not in self.prime_embeddings:
|
|
125
|
+
# "alice||age||30" → "alice age 30"
|
|
126
|
+
natural_text = " ".join(axiom.split("||"))
|
|
127
|
+
raw_vector = await self.get_embedding(natural_text)
|
|
128
|
+
aligned = self._align_vector(
|
|
129
|
+
np.array(raw_vector, dtype=np.float32)
|
|
130
|
+
)
|
|
131
|
+
self.prime_embeddings[prime] = aligned
|
|
132
|
+
newly_indexed += 1
|
|
133
|
+
|
|
134
|
+
if newly_indexed:
|
|
135
|
+
logger.info("Indexed %d new primes into vector space.", newly_indexed)
|
|
136
|
+
|
|
137
|
+
return newly_indexed
|
|
138
|
+
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
# Semantic search
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
async def semantic_search_godel_state(
|
|
144
|
+
self,
|
|
145
|
+
global_state: int,
|
|
146
|
+
query: str,
|
|
147
|
+
top_k: int = 5,
|
|
148
|
+
) -> List[Tuple[str, float]]:
|
|
149
|
+
"""
|
|
150
|
+
Query the exact Gödel state using fuzzy natural language.
|
|
151
|
+
|
|
152
|
+
Only primes that are currently *alive* in the global state are
|
|
153
|
+
considered. Deleted axioms are automatically excluded because
|
|
154
|
+
their primes no longer divide the state.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
global_state: The current global Gödel integer.
|
|
158
|
+
query: Natural language query string.
|
|
159
|
+
top_k: Maximum number of results.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
List of (axiom_key, similarity_score) tuples, descending.
|
|
163
|
+
"""
|
|
164
|
+
raw_query = np.array(
|
|
165
|
+
await self.get_embedding(query), dtype=np.float32
|
|
166
|
+
)
|
|
167
|
+
query_vector = self._align_vector(raw_query)
|
|
168
|
+
query_norm = np.linalg.norm(query_vector)
|
|
169
|
+
if query_norm == 0:
|
|
170
|
+
return []
|
|
171
|
+
|
|
172
|
+
active_primes: List[int] = []
|
|
173
|
+
similarities: List[float] = []
|
|
174
|
+
|
|
175
|
+
for prime, vector in self.prime_embeddings.items():
|
|
176
|
+
# Only search facts alive in the current state
|
|
177
|
+
if global_state % prime == 0:
|
|
178
|
+
vec_norm = np.linalg.norm(vector)
|
|
179
|
+
if vec_norm == 0:
|
|
180
|
+
continue
|
|
181
|
+
sim = float(
|
|
182
|
+
np.dot(query_vector, vector) / (query_norm * vec_norm)
|
|
183
|
+
)
|
|
184
|
+
active_primes.append(prime)
|
|
185
|
+
similarities.append(sim)
|
|
186
|
+
|
|
187
|
+
if not active_primes:
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
# Sort by similarity descending
|
|
191
|
+
top_indices = np.argsort(similarities)[::-1][:top_k]
|
|
192
|
+
return [
|
|
193
|
+
(self.algebra.prime_to_axiom[active_primes[i]], similarities[i])
|
|
194
|
+
for i in top_indices
|
|
195
|
+
]
|