sum-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- internal/__init__.py +8 -0
- internal/algorithms/__init__.py +1 -0
- internal/algorithms/causal_discovery.py +96 -0
- internal/algorithms/predicate_canon.py +137 -0
- internal/algorithms/semantic_arithmetic.py +890 -0
- internal/algorithms/syntactic_sieve.py +452 -0
- internal/algorithms/zk_semantics.py +90 -0
- internal/ensemble/__init__.py +1 -0
- internal/ensemble/automated_scientist.py +138 -0
- internal/ensemble/autonomous_agent.py +157 -0
- internal/ensemble/causal_triggers.py +121 -0
- internal/ensemble/confidence_calibrator.py +284 -0
- internal/ensemble/epistemic_arbiter.py +159 -0
- internal/ensemble/epistemic_loop.py +136 -0
- internal/ensemble/extraction_validator.py +172 -0
- internal/ensemble/gauge_orchestrator.py +150 -0
- internal/ensemble/live_llm_adapter.py +183 -0
- internal/ensemble/llm_entailment.py +117 -0
- internal/ensemble/mass_semantic_engine.py +138 -0
- internal/ensemble/ouroboros.py +281 -0
- internal/ensemble/semantic_dedup.py +261 -0
- internal/ensemble/tome_generator.py +286 -0
- internal/ensemble/tome_sliders.py +104 -0
- internal/ensemble/vector_bridge.py +195 -0
- internal/ensemble/venn_abers.py +211 -0
- internal/infrastructure/__init__.py +1 -0
- internal/infrastructure/akashic_ledger.py +812 -0
- internal/infrastructure/canonical_codec.py +452 -0
- internal/infrastructure/jcs.py +115 -0
- internal/infrastructure/key_manager.py +239 -0
- internal/infrastructure/p2p_mesh.py +168 -0
- internal/infrastructure/prov_o.py +159 -0
- internal/infrastructure/provenance.py +181 -0
- internal/infrastructure/rate_limiter.py +81 -0
- internal/infrastructure/resource_guards.py +117 -0
- internal/infrastructure/scheme_registry.py +136 -0
- internal/infrastructure/state_encoding.py +94 -0
- internal/infrastructure/telemetry.py +91 -0
- internal/infrastructure/tome_parser.py +55 -0
- internal/infrastructure/verifiable_credential.py +412 -0
- internal/infrastructure/zig_bridge.py +256 -0
- sum_cli/__init__.py +18 -0
- sum_cli/main.py +688 -0
- sum_engine-0.1.0.dist-info/METADATA +590 -0
- sum_engine-0.1.0.dist-info/RECORD +49 -0
- sum_engine-0.1.0.dist-info/WHEEL +5 -0
- sum_engine-0.1.0.dist-info/entry_points.txt +2 -0
- sum_engine-0.1.0.dist-info/licenses/LICENSE +201 -0
- sum_engine-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Autonomous Crystallizer — The Subconscious Graph Optimizer
|
|
3
|
+
|
|
4
|
+
A background daemon that monitors the Gödel State and autonomously
|
|
5
|
+
compresses dense topological clusters into Macro-Primes using the
|
|
6
|
+
Fractal Crystallization method from Phase 6.
|
|
7
|
+
|
|
8
|
+
When a node's topological degree exceeds a configurable threshold,
|
|
9
|
+
the daemon asks the LLM to summarize the cluster and collapses the
|
|
10
|
+
micro-primes into a single macro-prime — all while you sleep.
|
|
11
|
+
|
|
12
|
+
Author: ototao
|
|
13
|
+
License: Apache License 2.0
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import math
|
|
18
|
+
import logging
|
|
19
|
+
from typing import Callable, Optional
|
|
20
|
+
|
|
21
|
+
from internal.algorithms.semantic_arithmetic import GodelStateAlgebra
|
|
22
|
+
from internal.infrastructure.akashic_ledger import AkashicLedger
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _zig():
|
|
28
|
+
try:
|
|
29
|
+
from internal.infrastructure.zig_bridge import zig_engine
|
|
30
|
+
return zig_engine
|
|
31
|
+
except ImportError:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AutonomousCrystallizer:
|
|
36
|
+
"""
|
|
37
|
+
The Subconscious Graph Optimizer.
|
|
38
|
+
|
|
39
|
+
Monitors the Gödel State and autonomously compresses dense
|
|
40
|
+
topological clusters into Macro-Primes.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
algebra: GodelStateAlgebra,
|
|
46
|
+
ledger: AkashicLedger,
|
|
47
|
+
summarizer_llm: Callable,
|
|
48
|
+
):
|
|
49
|
+
self.algebra = algebra
|
|
50
|
+
self.ledger = ledger
|
|
51
|
+
self.summarize = summarizer_llm # async func(list[str]) -> str
|
|
52
|
+
self.is_running = False
|
|
53
|
+
|
|
54
|
+
async def run_compaction_cycle(
|
|
55
|
+
self, global_state: int, threshold: int = 5
|
|
56
|
+
) -> int:
|
|
57
|
+
"""
|
|
58
|
+
Scans the node registry for highly-connected nodes and
|
|
59
|
+
crystallises their edge clusters into macro-primes.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
global_state: Current Gödel integer.
|
|
63
|
+
threshold: Minimum edges before a node triggers compaction.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Potentially compressed global state.
|
|
67
|
+
"""
|
|
68
|
+
new_state = global_state
|
|
69
|
+
|
|
70
|
+
for node, node_integer in list(self.algebra.node_registry.items()):
|
|
71
|
+
z = _zig()
|
|
72
|
+
zg = z.bigint_gcd(new_state, node_integer) if z else None
|
|
73
|
+
alive_node_integer = zg if zg is not None else math.gcd(new_state, node_integer)
|
|
74
|
+
|
|
75
|
+
# Extract alive axioms connected to this node
|
|
76
|
+
cluster_axioms: list[str] = []
|
|
77
|
+
temp_int = alive_node_integer
|
|
78
|
+
for prime, axiom in self.algebra.prime_to_axiom.items():
|
|
79
|
+
if temp_int % prime == 0:
|
|
80
|
+
cluster_axioms.append(axiom)
|
|
81
|
+
while temp_int % prime == 0:
|
|
82
|
+
temp_int //= prime
|
|
83
|
+
if temp_int == 1:
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
# If the node has too many connections, compress it
|
|
87
|
+
if len(cluster_axioms) >= threshold:
|
|
88
|
+
logger.info(
|
|
89
|
+
"[Subconscious] Compacting dense node '%s' "
|
|
90
|
+
"(%d edges)...",
|
|
91
|
+
node,
|
|
92
|
+
len(cluster_axioms),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Ask LLM to summarize the cluster
|
|
96
|
+
macro_predicate = await self.summarize(cluster_axioms)
|
|
97
|
+
macro_key = (
|
|
98
|
+
f"{node}||"
|
|
99
|
+
f"{macro_predicate.lower().replace(' ', '_')}||"
|
|
100
|
+
f"compressed_cluster"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Fractal Compression (Phase 6)
|
|
104
|
+
new_state = self.algebra.crystallize_axioms(
|
|
105
|
+
new_state, cluster_axioms, macro_key
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Log to Akashic Ledger
|
|
109
|
+
if macro_key in self.algebra.axiom_to_prime:
|
|
110
|
+
macro_prime = self.algebra.axiom_to_prime[macro_key]
|
|
111
|
+
await self.ledger.append_event(
|
|
112
|
+
"MINT", macro_prime, macro_key
|
|
113
|
+
)
|
|
114
|
+
await self.ledger.append_event("MUL", macro_prime)
|
|
115
|
+
for axiom in cluster_axioms:
|
|
116
|
+
p = self.algebra.axiom_to_prime.get(axiom)
|
|
117
|
+
if p:
|
|
118
|
+
await self.ledger.append_event("DIV", p)
|
|
119
|
+
|
|
120
|
+
return new_state
|
|
121
|
+
|
|
122
|
+
async def start_daemon(
|
|
123
|
+
self,
|
|
124
|
+
get_state_func: Callable[[], int],
|
|
125
|
+
set_state_func: Callable[[int], None],
|
|
126
|
+
interval: int = 60,
|
|
127
|
+
):
|
|
128
|
+
"""
|
|
129
|
+
Background loop that runs compaction cycles at a fixed interval.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
get_state_func: Callable returning the current global state.
|
|
133
|
+
set_state_func: Callable to update the global state.
|
|
134
|
+
interval: Seconds between cycles.
|
|
135
|
+
"""
|
|
136
|
+
self.is_running = True
|
|
137
|
+
logger.info(
|
|
138
|
+
"[Subconscious] Daemon started — compaction every %ds", interval
|
|
139
|
+
)
|
|
140
|
+
while self.is_running:
|
|
141
|
+
await asyncio.sleep(interval)
|
|
142
|
+
try:
|
|
143
|
+
current_state = get_state_func()
|
|
144
|
+
new_state = await self.run_compaction_cycle(current_state)
|
|
145
|
+
if new_state != current_state:
|
|
146
|
+
set_state_func(new_state)
|
|
147
|
+
logger.info(
|
|
148
|
+
"[Subconscious] State compacted: %d → %d bits",
|
|
149
|
+
current_state.bit_length(),
|
|
150
|
+
new_state.bit_length(),
|
|
151
|
+
)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error("[Subconscious] Compaction error: %s", e)
|
|
154
|
+
|
|
155
|
+
def stop_daemon(self):
|
|
156
|
+
"""Gracefully stop the background daemon."""
|
|
157
|
+
self.is_running = False
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Causal Trigger Map — Interacting Theory Engine (Yaroslavtsev §5.7)
|
|
3
|
+
|
|
4
|
+
Transitions the Knowledge OS from "Free Theory" (isolated axiom minting)
|
|
5
|
+
to "Interacting Theory" where logical rules cascade deductive inferences
|
|
6
|
+
in ACID-compliant transactions.
|
|
7
|
+
|
|
8
|
+
If A implies B, minting A automatically cascades and mints B in the same
|
|
9
|
+
operation. Idempotency is guaranteed by Gödel LCM arithmetic — attempting
|
|
10
|
+
to mint an already-entailed prime is a no-op (LCM(state, p) == state).
|
|
11
|
+
|
|
12
|
+
Author: ototao
|
|
13
|
+
License: Apache License 2.0
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import math
|
|
17
|
+
from typing import Callable, List, Tuple
|
|
18
|
+
|
|
19
|
+
from internal.algorithms.semantic_arithmetic import GodelStateAlgebra
|
|
20
|
+
from internal.infrastructure.akashic_ledger import AkashicLedger
|
|
21
|
+
from internal.ensemble.epistemic_arbiter import kos_telemetry
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _zig():
|
|
25
|
+
try:
|
|
26
|
+
from internal.infrastructure.zig_bridge import zig_engine
|
|
27
|
+
return zig_engine
|
|
28
|
+
except ImportError:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class CausalTriggerMap:
|
|
33
|
+
"""
|
|
34
|
+
Implements Yaroslavtsev's Interacting Theory (Section 5.7).
|
|
35
|
+
|
|
36
|
+
Expands primitive axioms into ACID-compliant causal cascades of
|
|
37
|
+
deductive reasoning. Rules are registered as (condition, inference)
|
|
38
|
+
pairs. When an axiom matching the condition is minted, the inference
|
|
39
|
+
function produces consequent axioms that are automatically locked
|
|
40
|
+
into the state.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, algebra: GodelStateAlgebra, ledger: AkashicLedger):
|
|
44
|
+
self.algebra = algebra
|
|
45
|
+
self.ledger = ledger
|
|
46
|
+
self.rules: List[Tuple[Callable, Callable]] = []
|
|
47
|
+
|
|
48
|
+
def register_rule(self, condition_func: Callable, inference_func: Callable):
|
|
49
|
+
"""
|
|
50
|
+
Registers an ontological rule (e.g., Transitivity, Syllogisms).
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
condition_func: (s, p, o, state, algebra) -> bool
|
|
54
|
+
inference_func: (s, p, o, state, algebra) -> List[(s, p, o)]
|
|
55
|
+
"""
|
|
56
|
+
self.rules.append((condition_func, inference_func))
|
|
57
|
+
|
|
58
|
+
async def apply_cascade(
|
|
59
|
+
self, current_state: int, delta_axioms: List[str]
|
|
60
|
+
) -> int:
|
|
61
|
+
"""
|
|
62
|
+
Cascades inferences recursively until the semantic state
|
|
63
|
+
reaches equilibrium.
|
|
64
|
+
|
|
65
|
+
Idempotency is guaranteed by Gödel LCM arithmetic —
|
|
66
|
+
re-minting an existing prime changes nothing.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
current_state: The current Gödel integer.
|
|
70
|
+
delta_axioms: Newly added axiom keys (``s||p||o`` format).
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The updated Gödel integer with all cascaded inferences.
|
|
74
|
+
"""
|
|
75
|
+
new_state = current_state
|
|
76
|
+
cascade_queue = list(delta_axioms)
|
|
77
|
+
visited: set = set()
|
|
78
|
+
|
|
79
|
+
while cascade_queue:
|
|
80
|
+
current_axiom = cascade_queue.pop(0)
|
|
81
|
+
if current_axiom in visited:
|
|
82
|
+
continue
|
|
83
|
+
visited.add(current_axiom)
|
|
84
|
+
|
|
85
|
+
parts = current_axiom.split("||")
|
|
86
|
+
if len(parts) != 3:
|
|
87
|
+
continue
|
|
88
|
+
s, p, o = parts
|
|
89
|
+
|
|
90
|
+
for condition, infer in self.rules:
|
|
91
|
+
if condition(s, p, o, new_state, self.algebra):
|
|
92
|
+
inferred_triplets = infer(s, p, o, new_state, self.algebra)
|
|
93
|
+
|
|
94
|
+
for inf_s, inf_p, inf_o in inferred_triplets:
|
|
95
|
+
inf_axiom = f"{inf_s}||{inf_p}||{inf_o}"
|
|
96
|
+
inf_prime = self.algebra.get_or_mint_prime(
|
|
97
|
+
inf_s, inf_p, inf_o
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Novel deductive leap — not yet in state
|
|
101
|
+
if new_state % inf_prime != 0:
|
|
102
|
+
await kos_telemetry.broadcast(
|
|
103
|
+
f"🔗 Causal Inference: "
|
|
104
|
+
f"[{current_axiom}] ⟹ [{inf_axiom}]"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Lock the inference into the state
|
|
108
|
+
z = _zig()
|
|
109
|
+
r = z.bigint_lcm(new_state, inf_prime) if z else None
|
|
110
|
+
new_state = r if r is not None else math.lcm(new_state, inf_prime)
|
|
111
|
+
|
|
112
|
+
# Persist to Akashic Ledger
|
|
113
|
+
await self.ledger.append_event(
|
|
114
|
+
"MINT", inf_prime, inf_axiom
|
|
115
|
+
)
|
|
116
|
+
await self.ledger.append_event("MUL", inf_prime)
|
|
117
|
+
|
|
118
|
+
# Push to queue for further domino effects
|
|
119
|
+
cascade_queue.append(inf_axiom)
|
|
120
|
+
|
|
121
|
+
return new_state
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Confidence Calibrator — Multi-Signal Automatic Confidence Scoring
|
|
3
|
+
|
|
4
|
+
Phase 24: Replaces the static 0.5 default with a research-informed
|
|
5
|
+
multi-signal calibration pipeline that runs at zero cost.
|
|
6
|
+
|
|
7
|
+
Signals:
|
|
8
|
+
1. Source-type heuristic — URL domain → base confidence
|
|
9
|
+
2. Redundancy boost — same axiom from N sources → +0.05 per source
|
|
10
|
+
3. Contradiction penalty — conflicts with existing axioms → ×0.5
|
|
11
|
+
4. Linguistic certainty — hedging words in source text → multiplier
|
|
12
|
+
|
|
13
|
+
Optional:
|
|
14
|
+
4. LLM verbalized confidence — ask model to self-rate (opt-in)
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
calibrator = ConfidenceCalibrator()
|
|
18
|
+
score = await calibrator.calibrate(
|
|
19
|
+
axiom_key="earth||orbits||sun",
|
|
20
|
+
source_url="https://nasa.gov/solar-system",
|
|
21
|
+
ledger=ledger,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
Author: ototao
|
|
25
|
+
License: Apache License 2.0
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import re
|
|
30
|
+
import logging
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Optional, Union
|
|
33
|
+
from urllib.parse import urlparse
|
|
34
|
+
|
|
35
|
+
from internal.ensemble.venn_abers import ConfidenceInterval, VennAbersCalibrator
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ─── Source-Type Confidence Map ───────────────────────────────────────
|
|
41
|
+
# Based on NAACL 2024 research on source credibility and calibration.
|
|
42
|
+
# These represent base confidence scores for different source types.
|
|
43
|
+
|
|
44
|
+
SOURCE_PATTERNS = [
|
|
45
|
+
# (compiled regex for domain, base_confidence, label)
|
|
46
|
+
# ── Specific domains first (before generic TLDs) ──
|
|
47
|
+
(re.compile(r"arxiv\.org"), 0.88, "arxiv"),
|
|
48
|
+
(re.compile(r"pubmed|ncbi\.nlm\.nih"), 0.88, "pubmed"),
|
|
49
|
+
(re.compile(r"doi\.org"), 0.87, "doi"),
|
|
50
|
+
(re.compile(r"(reuters|apnews|bbc)\."), 0.75, "news_wire"),
|
|
51
|
+
(re.compile(r"(en\.)?wikipedia\.org"), 0.70, "wikipedia"),
|
|
52
|
+
(re.compile(r"(medium\.com|substack|blogspot|wordpress)"), 0.40, "blog"),
|
|
53
|
+
(re.compile(r"(reddit\.com|twitter\.com|x\.com)"), 0.35, "social"),
|
|
54
|
+
# ── Generic TLDs last ──
|
|
55
|
+
(re.compile(r"\.(edu|ac\.\w+)$"), 0.90, "academic"),
|
|
56
|
+
(re.compile(r"\.(gov|mil)$"), 0.85, "government"),
|
|
57
|
+
(re.compile(r"\.(org)$"), 0.65, "organization"),
|
|
58
|
+
(re.compile(r"\.(com|net|io)$"), 0.50, "commercial"),
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
DEFAULT_CONFIDENCE = 0.50
|
|
62
|
+
REDUNDANCY_BOOST = 0.05
|
|
63
|
+
REDUNDANCY_CAP = 1.0
|
|
64
|
+
CONTRADICTION_PENALTY = 0.5
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load_venn_abers_fixture(path: Union[str, Path]) -> VennAbersCalibrator:
|
|
68
|
+
"""Load a Venn-Abers calibration fixture from JSON.
|
|
69
|
+
|
|
70
|
+
Expected JSON shape:
|
|
71
|
+
{"scores": [0.10, 0.55, 0.82, ...], "labels": [0, 1, 1, ...]}
|
|
72
|
+
|
|
73
|
+
Scores are typically the output of ``ConfidenceCalibrator.calibrate``
|
|
74
|
+
on a held-out set; labels are 1 if the calibrator was right about the
|
|
75
|
+
axiom (ground truth confirmed), 0 otherwise. Fixture files should be
|
|
76
|
+
regenerated whenever the calibrator's signals or coefficients change.
|
|
77
|
+
"""
|
|
78
|
+
data = json.loads(Path(path).read_text(encoding="utf-8"))
|
|
79
|
+
return VennAbersCalibrator(
|
|
80
|
+
calibration_scores=data["scores"],
|
|
81
|
+
calibration_labels=data["labels"],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ConfidenceCalibrator:
|
|
86
|
+
"""Multi-signal automatic confidence scoring engine.
|
|
87
|
+
|
|
88
|
+
Computes a calibrated confidence score for an axiom based on:
|
|
89
|
+
1. Source URL domain classification
|
|
90
|
+
2. Redundancy across independent sources
|
|
91
|
+
3. Contradiction detection against existing state
|
|
92
|
+
4. Linguistic certainty (hedging markers in source text)
|
|
93
|
+
|
|
94
|
+
Optional Venn-Abers distribution-free interval via constructor injection.
|
|
95
|
+
When a VennAbersCalibrator is provided, ``calibrate_interval`` returns a
|
|
96
|
+
two-sided probability bound. Otherwise that method degrades to a
|
|
97
|
+
zero-width interval around the scalar, making the lack of calibration
|
|
98
|
+
data explicit rather than silently claiming certainty.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
venn_abers: Optional[VennAbersCalibrator] = None,
|
|
104
|
+
) -> None:
|
|
105
|
+
self._venn_abers = venn_abers
|
|
106
|
+
|
|
107
|
+
def source_type_score(self, source_url: str) -> float:
|
|
108
|
+
"""Score confidence based on the URL's domain type.
|
|
109
|
+
|
|
110
|
+
Returns a base confidence in [0.0, 1.0] based on domain
|
|
111
|
+
pattern matching against known source categories.
|
|
112
|
+
"""
|
|
113
|
+
if not source_url:
|
|
114
|
+
return DEFAULT_CONFIDENCE
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
parsed = urlparse(source_url)
|
|
118
|
+
domain = parsed.netloc.lower() or parsed.path.lower()
|
|
119
|
+
except Exception:
|
|
120
|
+
return DEFAULT_CONFIDENCE
|
|
121
|
+
|
|
122
|
+
for pattern, score, label in SOURCE_PATTERNS:
|
|
123
|
+
if pattern.search(domain):
|
|
124
|
+
logger.debug(
|
|
125
|
+
"Source %s classified as %s (confidence=%.2f)",
|
|
126
|
+
domain, label, score,
|
|
127
|
+
)
|
|
128
|
+
return score
|
|
129
|
+
|
|
130
|
+
return DEFAULT_CONFIDENCE
|
|
131
|
+
|
|
132
|
+
async def redundancy_boost(
|
|
133
|
+
self, axiom_key: str, ledger
|
|
134
|
+
) -> float:
|
|
135
|
+
"""Boost confidence if the axiom has been ingested from multiple sources.
|
|
136
|
+
|
|
137
|
+
Each independent ingestion adds +0.05, capped at 1.0.
|
|
138
|
+
Returns the additive boost (not the total score).
|
|
139
|
+
"""
|
|
140
|
+
if not ledger:
|
|
141
|
+
return 0.0
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
chain = await ledger.get_axiom_provenance(axiom_key)
|
|
145
|
+
# Count unique sources
|
|
146
|
+
unique_sources = set()
|
|
147
|
+
for entry in chain:
|
|
148
|
+
src = entry.get("source_url", "")
|
|
149
|
+
if src:
|
|
150
|
+
unique_sources.add(src)
|
|
151
|
+
|
|
152
|
+
n = len(unique_sources)
|
|
153
|
+
if n <= 1:
|
|
154
|
+
return 0.0
|
|
155
|
+
# Boost for each additional source beyond the first
|
|
156
|
+
return min((n - 1) * REDUNDANCY_BOOST, 0.5)
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.warning("Redundancy check failed: %s", e)
|
|
159
|
+
return 0.0
|
|
160
|
+
|
|
161
|
+
def contradiction_penalty(
|
|
162
|
+
self,
|
|
163
|
+
axiom_key: str,
|
|
164
|
+
current_state: int,
|
|
165
|
+
algebra,
|
|
166
|
+
) -> float:
|
|
167
|
+
"""Apply a penalty if the axiom contradicts existing knowledge.
|
|
168
|
+
|
|
169
|
+
Returns a multiplier in (0.0, 1.0]. 1.0 = no contradiction.
|
|
170
|
+
"""
|
|
171
|
+
if current_state <= 1 or not algebra:
|
|
172
|
+
return 1.0
|
|
173
|
+
|
|
174
|
+
# Check if axiom contains a predicate that might conflict
|
|
175
|
+
parts = axiom_key.split("||")
|
|
176
|
+
if len(parts) != 3:
|
|
177
|
+
return 1.0
|
|
178
|
+
|
|
179
|
+
subject, predicate, obj = parts
|
|
180
|
+
|
|
181
|
+
# Look for conflicting axioms: same subject+predicate, different object
|
|
182
|
+
try:
|
|
183
|
+
active_axioms = algebra.get_active_axioms(current_state)
|
|
184
|
+
for existing in active_axioms:
|
|
185
|
+
ex_parts = existing.split("||")
|
|
186
|
+
if len(ex_parts) == 3:
|
|
187
|
+
ex_s, ex_p, ex_o = ex_parts
|
|
188
|
+
if (ex_s == subject and ex_p == predicate
|
|
189
|
+
and ex_o != obj):
|
|
190
|
+
logger.info(
|
|
191
|
+
"Contradiction detected: '%s' vs existing '%s'",
|
|
192
|
+
axiom_key, existing,
|
|
193
|
+
)
|
|
194
|
+
return CONTRADICTION_PENALTY
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning("Contradiction check failed: %s", e)
|
|
197
|
+
|
|
198
|
+
return 1.0
|
|
199
|
+
|
|
200
|
+
async def calibrate(
|
|
201
|
+
self,
|
|
202
|
+
axiom_key: str,
|
|
203
|
+
source_url: str = "",
|
|
204
|
+
current_state: int = 1,
|
|
205
|
+
algebra=None,
|
|
206
|
+
ledger=None,
|
|
207
|
+
manual_confidence: Optional[float] = None,
|
|
208
|
+
linguistic_certainty: float = 1.0,
|
|
209
|
+
) -> float:
|
|
210
|
+
"""Compute calibrated confidence using all available signals.
|
|
211
|
+
|
|
212
|
+
Pipeline:
|
|
213
|
+
1. Start with source-type base score
|
|
214
|
+
2. Add redundancy boost (if ledger available)
|
|
215
|
+
3. Apply contradiction penalty (if algebra + state available)
|
|
216
|
+
4. Apply linguistic certainty multiplier (if < 1.0)
|
|
217
|
+
5. Clamp to [0.0, 1.0]
|
|
218
|
+
|
|
219
|
+
If manual_confidence is provided, it is used as-is (no calibration).
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
linguistic_certainty: Output from detect_hedging() in
|
|
223
|
+
syntactic_sieve.py. 1.0 = definite, <1.0 = hedged.
|
|
224
|
+
This is a metadata-only signal.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
float: Calibrated confidence score in [0.0, 1.0]
|
|
228
|
+
"""
|
|
229
|
+
if manual_confidence is not None:
|
|
230
|
+
return max(0.0, min(1.0, manual_confidence))
|
|
231
|
+
|
|
232
|
+
# Signal 1: Source type
|
|
233
|
+
base = self.source_type_score(source_url)
|
|
234
|
+
|
|
235
|
+
# Signal 2: Redundancy boost
|
|
236
|
+
boost = await self.redundancy_boost(axiom_key, ledger)
|
|
237
|
+
|
|
238
|
+
# Signal 3: Contradiction penalty
|
|
239
|
+
penalty = self.contradiction_penalty(
|
|
240
|
+
axiom_key, current_state, algebra
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Signal 4: Linguistic certainty
|
|
244
|
+
ling = max(0.0, min(1.0, linguistic_certainty))
|
|
245
|
+
|
|
246
|
+
# Combine: (base + redundancy_boost) × contradiction × linguistic
|
|
247
|
+
score = (base + boost) * penalty * ling
|
|
248
|
+
|
|
249
|
+
# Clamp
|
|
250
|
+
return max(0.0, min(REDUNDANCY_CAP, score))
|
|
251
|
+
|
|
252
|
+
async def calibrate_interval(
|
|
253
|
+
self,
|
|
254
|
+
axiom_key: str,
|
|
255
|
+
source_url: str = "",
|
|
256
|
+
current_state: int = 1,
|
|
257
|
+
algebra=None,
|
|
258
|
+
ledger=None,
|
|
259
|
+
manual_confidence: Optional[float] = None,
|
|
260
|
+
linguistic_certainty: float = 1.0,
|
|
261
|
+
) -> ConfidenceInterval:
|
|
262
|
+
"""Distribution-free calibrated-confidence interval for an axiom.
|
|
263
|
+
|
|
264
|
+
Computes the multi-signal scalar via ``calibrate`` and then wraps it
|
|
265
|
+
in a Venn-Abers interval if a calibration fixture is present.
|
|
266
|
+
Absent a fixture, returns the zero-width interval ``[scalar, scalar]``
|
|
267
|
+
to signal that no distribution-free coverage guarantee is available.
|
|
268
|
+
|
|
269
|
+
This method is additive — the existing scalar ``calibrate`` remains
|
|
270
|
+
as-is for downstream callers that assume a float. New callers that
|
|
271
|
+
want calibrated bounds (Polytaxis §2 discipline) should prefer this.
|
|
272
|
+
"""
|
|
273
|
+
scalar = await self.calibrate(
|
|
274
|
+
axiom_key=axiom_key,
|
|
275
|
+
source_url=source_url,
|
|
276
|
+
current_state=current_state,
|
|
277
|
+
algebra=algebra,
|
|
278
|
+
ledger=ledger,
|
|
279
|
+
manual_confidence=manual_confidence,
|
|
280
|
+
linguistic_certainty=linguistic_certainty,
|
|
281
|
+
)
|
|
282
|
+
if self._venn_abers is None:
|
|
283
|
+
return ConfidenceInterval(lower=scalar, upper=scalar)
|
|
284
|
+
return self._venn_abers.predict_interval(scalar)
|