sum-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. internal/__init__.py +8 -0
  2. internal/algorithms/__init__.py +1 -0
  3. internal/algorithms/causal_discovery.py +96 -0
  4. internal/algorithms/predicate_canon.py +137 -0
  5. internal/algorithms/semantic_arithmetic.py +890 -0
  6. internal/algorithms/syntactic_sieve.py +452 -0
  7. internal/algorithms/zk_semantics.py +90 -0
  8. internal/ensemble/__init__.py +1 -0
  9. internal/ensemble/automated_scientist.py +138 -0
  10. internal/ensemble/autonomous_agent.py +157 -0
  11. internal/ensemble/causal_triggers.py +121 -0
  12. internal/ensemble/confidence_calibrator.py +284 -0
  13. internal/ensemble/epistemic_arbiter.py +159 -0
  14. internal/ensemble/epistemic_loop.py +136 -0
  15. internal/ensemble/extraction_validator.py +172 -0
  16. internal/ensemble/gauge_orchestrator.py +150 -0
  17. internal/ensemble/live_llm_adapter.py +183 -0
  18. internal/ensemble/llm_entailment.py +117 -0
  19. internal/ensemble/mass_semantic_engine.py +138 -0
  20. internal/ensemble/ouroboros.py +281 -0
  21. internal/ensemble/semantic_dedup.py +261 -0
  22. internal/ensemble/tome_generator.py +286 -0
  23. internal/ensemble/tome_sliders.py +104 -0
  24. internal/ensemble/vector_bridge.py +195 -0
  25. internal/ensemble/venn_abers.py +211 -0
  26. internal/infrastructure/__init__.py +1 -0
  27. internal/infrastructure/akashic_ledger.py +812 -0
  28. internal/infrastructure/canonical_codec.py +452 -0
  29. internal/infrastructure/jcs.py +115 -0
  30. internal/infrastructure/key_manager.py +239 -0
  31. internal/infrastructure/p2p_mesh.py +168 -0
  32. internal/infrastructure/prov_o.py +159 -0
  33. internal/infrastructure/provenance.py +181 -0
  34. internal/infrastructure/rate_limiter.py +81 -0
  35. internal/infrastructure/resource_guards.py +117 -0
  36. internal/infrastructure/scheme_registry.py +136 -0
  37. internal/infrastructure/state_encoding.py +94 -0
  38. internal/infrastructure/telemetry.py +91 -0
  39. internal/infrastructure/tome_parser.py +55 -0
  40. internal/infrastructure/verifiable_credential.py +412 -0
  41. internal/infrastructure/zig_bridge.py +256 -0
  42. sum_cli/__init__.py +18 -0
  43. sum_cli/main.py +688 -0
  44. sum_engine-0.1.0.dist-info/METADATA +590 -0
  45. sum_engine-0.1.0.dist-info/RECORD +49 -0
  46. sum_engine-0.1.0.dist-info/WHEEL +5 -0
  47. sum_engine-0.1.0.dist-info/entry_points.txt +2 -0
  48. sum_engine-0.1.0.dist-info/licenses/LICENSE +201 -0
  49. sum_engine-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,159 @@
1
+ """
2
+ Epistemic Arbiter & Event Broadcaster — Wave Function Collapse Engine
3
+
4
+ The Arbiter resolves Level 3 Curvature (semantic contradictions) by
5
+ invoking an LLM judge to determine which conflicting fact survives.
6
+ The EventBroadcaster streams the internal "thinking" process to the
7
+ frontend via Server-Sent Events (SSE).
8
+
9
+ Author: ototao
10
+ License: Apache License 2.0
11
+ """
12
+
13
+ import asyncio
14
+ import logging
15
+ from typing import Callable, Dict, List, Tuple
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class EventBroadcaster:
21
+ """Streams internal mathematical thoughts to the frontend via SSE."""
22
+
23
+ def __init__(self):
24
+ self.queues: List[asyncio.Queue] = []
25
+
26
+ async def broadcast(self, message: str):
27
+ """Push a message to all connected SSE subscribers."""
28
+ logger.info("[KOS Telemetry] %s", message)
29
+ for queue in self.queues:
30
+ await queue.put(message)
31
+
32
+ def subscribe(self) -> asyncio.Queue:
33
+ """Create a new subscriber queue."""
34
+ q: asyncio.Queue = asyncio.Queue()
35
+ self.queues.append(q)
36
+ return q
37
+
38
+ def unsubscribe(self, queue: asyncio.Queue):
39
+ """Remove a subscriber queue."""
40
+ if queue in self.queues:
41
+ self.queues.remove(queue)
42
+
43
+
44
+ # Global singleton — imported by quantum_router for SSE streaming
45
+ kos_telemetry = EventBroadcaster()
46
+
47
+
48
+ class EpistemicArbiter:
49
+ """
50
+ Resolves Level 3 Curvature (Semantic Contradictions).
51
+
52
+ Collapses the wave function of conflicting primes into
53
+ absolute truth by invoking an LLM judge.
54
+ """
55
+
56
+ def __init__(self, llm_judge: Callable):
57
+ self.judge = llm_judge # async func(prompt: str) -> str
58
+
59
+ async def collapse_wave_function(
60
+ self, conflicts: List[Tuple[str, str, str, str]]
61
+ ) -> Dict[Tuple[str, str], str]:
62
+ """
63
+ Takes a list of conflicts: (subject, predicate, object_a, object_b).
64
+ Returns the winning mapping: {(subject, predicate): winning_object}.
65
+ """
66
+ resolutions: Dict[Tuple[str, str], str] = {}
67
+
68
+ for subject, predicate, obj_a, obj_b in conflicts:
69
+ await kos_telemetry.broadcast(
70
+ f"⚠️ Level 3 Curvature Detected: "
71
+ f"{subject} {predicate} [{obj_a} OR {obj_b}]"
72
+ )
73
+ await kos_telemetry.broadcast(
74
+ "🌀 Entering Epistemic Superposition..."
75
+ )
76
+
77
+ prompt = (
78
+ f"You are a strict logic arbiter. We have a contradiction "
79
+ f"regarding '{subject}'.\n"
80
+ f"Claim A: {subject} {predicate} {obj_a}\n"
81
+ f"Claim B: {subject} {predicate} {obj_b}\n"
82
+ f"Analyze standard logical precedence, general knowledge, "
83
+ f"or temporal recency, and return ONLY the correct object "
84
+ f"value. If tied, pick the most specific."
85
+ )
86
+
87
+ # Call the LLM Judge
88
+ winner = await self.judge(prompt)
89
+ winner_clean = winner.strip().lower()
90
+
91
+ # Fallback if the judge hallucinates an entirely new answer
92
+ if winner_clean not in [obj_a.lower(), obj_b.lower()]:
93
+ winner_clean = obj_a.lower()
94
+
95
+ resolutions[(subject, predicate)] = winner_clean
96
+ await kos_telemetry.broadcast(
97
+ f"⚡ Wave Function Collapsed: "
98
+ f"{subject} {predicate} → {winner_clean}"
99
+ )
100
+
101
+ return resolutions
102
+
103
+
104
+ class DeterministicArbiter:
105
+ """
106
+ Deterministic contradiction resolution without LLM.
107
+
108
+ Resolves Level 3 Curvature using SHA-256 lexicographic ordering:
109
+ for each conflict (subject, predicate, obj_a, obj_b), the winner
110
+ is whichever object has the lower SHA-256 hash of
111
+ ``f"{subject}||{predicate}||{object}"``.
112
+
113
+ This guarantees:
114
+ - Identical resolution on every node (deterministic)
115
+ - No LLM cost or latency
116
+ - Consistent ordering regardless of minting order
117
+ - Reproducibility across runtimes (SHA-256 is universal)
118
+ """
119
+
120
+ @staticmethod
121
+ def _canonical_hash(subject: str, predicate: str, obj: str) -> str:
122
+ """SHA-256 of the canonical triplet key."""
123
+ import hashlib
124
+ return hashlib.sha256(
125
+ f"{subject}||{predicate}||{obj}".encode()
126
+ ).hexdigest()
127
+
128
+ async def collapse_wave_function(
129
+ self, conflicts: List[Tuple[str, str, str, str]]
130
+ ) -> Dict[Tuple[str, str], str]:
131
+ """
132
+ Resolve conflicts deterministically via SHA-256 ordering.
133
+
134
+ For each (subject, predicate, obj_a, obj_b), the object with
135
+ the lexicographically lower SHA-256 hash wins.
136
+ """
137
+ resolutions: Dict[Tuple[str, str], str] = {}
138
+
139
+ for subject, predicate, obj_a, obj_b in conflicts:
140
+ hash_a = self._canonical_hash(subject, predicate, obj_a)
141
+ hash_b = self._canonical_hash(subject, predicate, obj_b)
142
+
143
+ winner = obj_a if hash_a <= hash_b else obj_b
144
+
145
+ await kos_telemetry.broadcast(
146
+ f"⚠️ Level 3 Curvature: "
147
+ f"{subject} {predicate} [{obj_a} OR {obj_b}]"
148
+ )
149
+ await kos_telemetry.broadcast(
150
+ f"🔬 Deterministic Resolution: SHA-256({obj_a})={hash_a[:8]}… "
151
+ f"vs SHA-256({obj_b})={hash_b[:8]}…"
152
+ )
153
+ await kos_telemetry.broadcast(
154
+ f"⚡ Collapsed → {subject} {predicate} → {winner}"
155
+ )
156
+
157
+ resolutions[(subject, predicate)] = winner
158
+
159
+ return resolutions
@@ -0,0 +1,136 @@
1
+ """
2
+ Epistemic Feedback Loop — "Tags to Tomes then Back"
3
+
4
+ Governs the closed-loop extrapolation pipeline:
5
+ 1. TOMES: Generate narrative text from verified Gödel axioms.
6
+ 2. TAGS: Extract triplets from the narrative and re-encode as a
7
+ Gödel integer.
8
+ 3. VERIFY: modulo check — ``global_state % generated_state == 0``.
9
+ 4. DIAGNOSE: If verification fails, GCD-based hallucination isolation
10
+ identifies the exact fabricated claims.
11
+ 5. SELF-CORRECT: Feed hallucinated axioms back as strict negative
12
+ constraints and re-generate.
13
+
14
+ The loop refuses to return a string until it is *mathematically proven*
15
+ to be a pure subset of the global truth.
16
+
17
+ Author: ototao
18
+ License: Apache License 2.0
19
+ """
20
+
21
+ import logging
22
+ from typing import Callable, Awaitable, List, Tuple, Dict, Any
23
+
24
+ from internal.algorithms.semantic_arithmetic import GodelStateAlgebra
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class QuantumExtrapolator:
30
+ """
31
+ Translates Gödel Integers (Tags) into Narrative Text (Tomes) and
32
+ verifies them mathematically by converting them back into Integers.
33
+
34
+ The extrapolation loop guarantees zero hallucination through an
35
+ unbreakable epistemic cage: no text is returned until
36
+ ``global_state % generated_state == 0``.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ godel_algebra: GodelStateAlgebra,
42
+ llm_generator: Callable[
43
+ [List[str], List[str]], Awaitable[str]
44
+ ],
45
+ llm_extractor: Callable[
46
+ [str], Awaitable[List[Tuple[str, str, str]]]
47
+ ],
48
+ max_retries: int = 3,
49
+ ):
50
+ """
51
+ Args:
52
+ godel_algebra: A GodelStateAlgebra instance with the global
53
+ truth already encoded.
54
+ llm_generator: Async callable (axioms, negative_constraints) → text.
55
+ llm_extractor: Async callable (text) → List[(subj, pred, obj)].
56
+ max_retries: Maximum correction attempts before raising.
57
+ """
58
+ self.algebra = godel_algebra
59
+ self.generate_text = llm_generator
60
+ self.extract_triplets = llm_extractor
61
+ self.max_retries = max_retries
62
+
63
+ async def extrapolate_with_proof(
64
+ self,
65
+ global_state: int,
66
+ target_axioms: List[str],
67
+ ) -> str:
68
+ """
69
+ The Tags-to-Tomes pipeline.
70
+
71
+ Guarantees the output text strictly entails the global state with
72
+ zero hallucinations.
73
+
74
+ Args:
75
+ global_state: The verified global Gödel integer.
76
+ target_axioms: Axiom key strings to expand into narrative.
77
+
78
+ Returns:
79
+ A narrative string that is *mathematically proven* to contain
80
+ only claims present in the global state.
81
+
82
+ Raises:
83
+ RuntimeError: If the LLM fails to self-correct within
84
+ ``max_retries`` attempts.
85
+ """
86
+ negative_constraints: List[str] = []
87
+
88
+ for attempt in range(self.max_retries):
89
+ # ── 1. TOMES: Generate narrative from verified axioms ────
90
+ narrative = await self.generate_text(
91
+ target_axioms, negative_constraints
92
+ )
93
+
94
+ # ── 2. TAGS: Map narrative back to a Gödel integer ───────
95
+ extracted_triplets = await self.extract_triplets(narrative)
96
+
97
+ if not extracted_triplets:
98
+ negative_constraints.append(
99
+ "Failed to extract any verifiable axioms. "
100
+ "Be more explicit."
101
+ )
102
+ continue
103
+
104
+ generated_state = self.algebra.encode_chunk_state(
105
+ extracted_triplets
106
+ )
107
+
108
+ # ── 3. VERIFY: The Epistemic Hardware Filter ────────────
109
+ if self.algebra.verify_entailment(global_state, generated_state):
110
+ logger.info(
111
+ "Mathematical Proof of Zero Hallucination achieved "
112
+ "on attempt %d.",
113
+ attempt + 1,
114
+ )
115
+ return narrative
116
+
117
+ # ── 4. DIAGNOSE: Isolate hallucinated primes via GCD ─────
118
+ hallucinations = self.algebra.isolate_hallucinations(
119
+ global_state, generated_state
120
+ )
121
+ logger.warning(
122
+ "Modulo check failed (attempt %d). "
123
+ "Hallucinations detected: %s",
124
+ attempt + 1,
125
+ hallucinations,
126
+ )
127
+
128
+ # ── 5. SELF-CORRECT: Feed exact errors back ──────────────
129
+ if hallucinations:
130
+ negative_constraints.extend(hallucinations)
131
+
132
+ raise RuntimeError(
133
+ f"Epistemic Failure: LLM failed to mathematically align "
134
+ f"after {self.max_retries} attempts. "
135
+ f"Residual hallucinations: {negative_constraints[-5:]}"
136
+ )
@@ -0,0 +1,172 @@
1
+ """
2
+ Extraction Validator — Structural Gate for LLM→Algebra Boundary
3
+
4
+ Phase 19A: Validates, canonicalizes, and deduplicates extracted triplets
5
+ BEFORE they enter the Gödel State Algebra. Malformed or underspecified
6
+ outputs are rejected with audit reasons, not silently ingested.
7
+
8
+ This is the system's immune system at the NLP boundary.
9
+
10
+ Pipeline:
11
+ 1. Structural validation (non-empty, length bounds, illegal chars)
12
+ 2. Predicate canonicalization (synonym collapse)
13
+ 3. Batch deduplication (identical triplets within one extraction)
14
+ 4. Return accepted + rejected with audit trail
15
+
16
+ Author: ototao
17
+ License: Apache License 2.0
18
+ """
19
+
20
+ import re
21
+ import logging
22
+ from dataclasses import dataclass, field
23
+ from typing import List, Tuple, Optional
24
+
25
+ from internal.algorithms.predicate_canon import canonicalize
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # ── Constraints ───────────────────────────────────────────────────────
30
+
31
+ MIN_FIELD_LENGTH = 2 # Single-char subjects/objects are garbage
32
+ MAX_FIELD_LENGTH = 200 # Absurdly long strings indicate extraction failure
33
+ CONTROL_CHAR_PATTERN = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]')
34
+ JSON_FRAGMENT_PATTERN = re.compile(r'[{}\[\]]')
35
+
36
+
37
+ @dataclass
38
+ class RejectedTriplet:
39
+ """A triplet that failed validation, with the reason why."""
40
+ subject: str
41
+ predicate: str
42
+ object_: str
43
+ reason: str
44
+
45
+
46
+ @dataclass
47
+ class ValidatedExtraction:
48
+ """Result of running extraction through the validation gate."""
49
+ accepted: List[Tuple[str, str, str]] = field(default_factory=list)
50
+ rejected: List[RejectedTriplet] = field(default_factory=list)
51
+
52
+ @property
53
+ def accepted_count(self) -> int:
54
+ return len(self.accepted)
55
+
56
+ @property
57
+ def rejected_count(self) -> int:
58
+ return len(self.rejected)
59
+
60
+ @property
61
+ def valid_schema_rate(self) -> float:
62
+ total = self.accepted_count + self.rejected_count
63
+ return self.accepted_count / total if total > 0 else 0.0
64
+
65
+
66
+ class ExtractionValidator:
67
+ """
68
+ Structural gate between LLM extraction and Gödel algebra.
69
+
70
+ Validates each triplet, canonicalizes predicates, deduplicates
71
+ within a batch, and returns an auditable result.
72
+ """
73
+
74
+ def validate_field(self, value: str, field_name: str) -> Optional[str]:
75
+ """
76
+ Validate a single triplet field. Returns rejection reason or None.
77
+ """
78
+ if not value or not value.strip():
79
+ return f"{field_name} is empty"
80
+
81
+ stripped = value.strip()
82
+
83
+ if len(stripped) < MIN_FIELD_LENGTH:
84
+ return f"{field_name} too short ({len(stripped)} chars, min {MIN_FIELD_LENGTH})"
85
+
86
+ if len(stripped) > MAX_FIELD_LENGTH:
87
+ return f"{field_name} too long ({len(stripped)} chars, max {MAX_FIELD_LENGTH})"
88
+
89
+ if CONTROL_CHAR_PATTERN.search(stripped):
90
+ return f"{field_name} contains control characters"
91
+
92
+ if JSON_FRAGMENT_PATTERN.search(stripped) and len(stripped) < 10:
93
+ return f"{field_name} appears to be a JSON fragment"
94
+
95
+ return None
96
+
97
+ def validate_triplet(
98
+ self,
99
+ subject: str,
100
+ predicate: str,
101
+ object_: str,
102
+ ) -> Optional[str]:
103
+ """
104
+ Validate a full triplet. Returns rejection reason or None if valid.
105
+ """
106
+ for val, name in [
107
+ (subject, "subject"),
108
+ (predicate, "predicate"),
109
+ (object_, "object"),
110
+ ]:
111
+ reason = self.validate_field(val, name)
112
+ if reason:
113
+ return reason
114
+
115
+ return None
116
+
117
+ def validate_batch(
118
+ self,
119
+ triplets: List[Tuple[str, str, str]],
120
+ canonicalize_predicates: bool = True,
121
+ ) -> ValidatedExtraction:
122
+ """
123
+ Validate, canonicalize, and deduplicate a batch of extracted triplets.
124
+
125
+ Args:
126
+ triplets: Raw (subject, predicate, object) tuples from LLM.
127
+ canonicalize_predicates: If True, run predicate canonicalization.
128
+
129
+ Returns:
130
+ ValidatedExtraction with accepted and rejected lists.
131
+ """
132
+ result = ValidatedExtraction()
133
+ seen: set = set()
134
+
135
+ for s, p, o in triplets:
136
+ # Normalize
137
+ s_clean = s.strip().lower()
138
+ p_clean = p.strip().lower().replace(" ", "_")
139
+ o_clean = o.strip().lower()
140
+
141
+ # Structural validation
142
+ reason = self.validate_triplet(s_clean, p_clean, o_clean)
143
+ if reason:
144
+ result.rejected.append(
145
+ RejectedTriplet(s_clean, p_clean, o_clean, reason)
146
+ )
147
+ continue
148
+
149
+ # Predicate canonicalization
150
+ if canonicalize_predicates:
151
+ p_clean = canonicalize(p_clean)
152
+
153
+ # Batch deduplication
154
+ key = (s_clean, p_clean, o_clean)
155
+ if key in seen:
156
+ result.rejected.append(
157
+ RejectedTriplet(s_clean, p_clean, o_clean, "duplicate in batch")
158
+ )
159
+ continue
160
+
161
+ seen.add(key)
162
+ result.accepted.append((s_clean, p_clean, o_clean))
163
+
164
+ if result.rejected_count > 0:
165
+ logger.info(
166
+ "Extraction gate: %d accepted, %d rejected (%.0f%% valid)",
167
+ result.accepted_count,
168
+ result.rejected_count,
169
+ result.valid_schema_rate * 100,
170
+ )
171
+
172
+ return result
@@ -0,0 +1,150 @@
1
+ """
2
+ Gauge-Theoretic Orchestrator — Commutativity Hierarchy Engine
3
+
4
+ Implements Yaroslavtsev's three-level commutativity detection:
5
+ - Level 1 (Commutative): Independent facts, safe to merge in any order.
6
+ - Level 2 (Conditionally commutative): Same entity, different predicates.
7
+ - Level 3 (Curvature): Same entity, same predicate, different objects.
8
+ These MUST be serialized — triggers the EpistemicArbiter.
9
+
10
+ Author: ototao
11
+ License: Apache License 2.0
12
+ """
13
+
14
+ import logging
15
+ from enum import IntEnum
16
+ from typing import Callable, List, Tuple, Optional
17
+
18
+ import networkx as nx
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CommutativityLevel(IntEnum):
24
+ """Yaroslavtsev's Commutativity Hierarchy."""
25
+
26
+ L1_COMMUTATIVE = 1 # Independent facts — parallel merge safe
27
+ L2_CONDITIONAL = 2 # Same entity, different predicates
28
+ L3_CURVATURE = 3 # Same entity, same predicate, different objects
29
+
30
+
31
+ class GaugeTheoreticOrchestrator:
32
+ """
33
+ Manages the merge of knowledge graph extractions according to
34
+ the Commutativity Hierarchy.
35
+
36
+ When Level 3 Curvature is detected, the ``arbitrate`` callback
37
+ (an ``EpistemicArbiter.collapse_wave_function``) is invoked to
38
+ select the winning fact.
39
+ """
40
+
41
+ def __init__(self, arbitrate_fn: Optional[Callable] = None):
42
+ self.arbitrate = arbitrate_fn # async func(conflicts) -> resolutions
43
+
44
+ def detect_curvature(
45
+ self,
46
+ base_graph: nx.MultiDiGraph,
47
+ new_graph: nx.MultiDiGraph,
48
+ ) -> Tuple[CommutativityLevel, List[Tuple[str, str, str, str]]]:
49
+ """
50
+ Detect the commutativity level between two knowledge graphs.
51
+
52
+ Args:
53
+ base_graph: Existing knowledge graph.
54
+ new_graph: Newly extracted graph.
55
+
56
+ Returns:
57
+ (level, conflicts) where conflicts is a list of
58
+ (subject, predicate, old_object, new_object) tuples.
59
+ """
60
+ conflicts: List[Tuple[str, str, str, str]] = []
61
+ max_level = CommutativityLevel.L1_COMMUTATIVE
62
+
63
+ # Build an index of (source, relation) -> target from the base graph
64
+ base_index: dict[Tuple[str, str], str] = {}
65
+ for src, tgt, data in base_graph.edges(data=True):
66
+ rel = data.get("relation", "related_to")
67
+ base_index[(src, rel)] = tgt
68
+
69
+ # Check new graph edges against the base index
70
+ for src, tgt, data in new_graph.edges(data=True):
71
+ rel = data.get("relation", "related_to")
72
+
73
+ if (src, rel) in base_index:
74
+ old_tgt = base_index[(src, rel)]
75
+ if old_tgt != tgt:
76
+ # Level 3: Same subject + predicate, different object
77
+ conflicts.append((src, rel, old_tgt, tgt))
78
+ max_level = CommutativityLevel.L3_CURVATURE
79
+ # else: identical fact, no conflict
80
+ elif src in {s for s, _, _ in base_graph.edges(data=True)}:
81
+ # Level 2: Same entity, different predicates
82
+ if max_level < CommutativityLevel.L2_CONDITIONAL:
83
+ max_level = CommutativityLevel.L2_CONDITIONAL
84
+
85
+ return max_level, conflicts
86
+
87
+ async def merge_extractions(
88
+ self,
89
+ base_graph: nx.MultiDiGraph,
90
+ new_graphs: List[nx.MultiDiGraph],
91
+ ) -> nx.MultiDiGraph:
92
+ """
93
+ Merge multiple extraction graphs into the base graph,
94
+ resolving Level 3 Curvature via arbitration.
95
+
96
+ Args:
97
+ base_graph: The canonical knowledge graph.
98
+ new_graphs: List of newly extracted graphs to merge.
99
+
100
+ Returns:
101
+ Merged graph with contradictions resolved.
102
+ """
103
+ merged = base_graph.copy()
104
+
105
+ for new_graph in new_graphs:
106
+ level, conflicts = self.detect_curvature(merged, new_graph)
107
+
108
+ if level == CommutativityLevel.L3_CURVATURE and conflicts:
109
+ if self.arbitrate:
110
+ resolutions = await self.arbitrate(conflicts)
111
+ else:
112
+ # Default: new information wins (recency bias)
113
+ resolutions = {
114
+ (subj, pred): new_obj
115
+ for subj, pred, _old, new_obj in conflicts
116
+ }
117
+
118
+ # Apply resolutions
119
+ for (subj, pred), winner in resolutions.items():
120
+ # Remove conflicting edges
121
+ edges_to_remove = []
122
+ for u, v, key, data in merged.edges(
123
+ keys=True, data=True
124
+ ):
125
+ if u == subj and data.get("relation") == pred:
126
+ edges_to_remove.append((u, v, key))
127
+
128
+ for u, v, key in edges_to_remove:
129
+ merged.remove_edge(u, v, key=key)
130
+
131
+ # Add the winner
132
+ merged.add_edge(
133
+ subj,
134
+ winner,
135
+ relation=pred,
136
+ verified_curvature=True,
137
+ )
138
+
139
+ logger.info(
140
+ "Curvature resolved: %s %s → %s", subj, pred, winner
141
+ )
142
+
143
+ # Add all non-conflicting edges from new_graph
144
+ conflict_keys = {(s, p) for s, p, _, _ in conflicts}
145
+ for src, tgt, data in new_graph.edges(data=True):
146
+ rel = data.get("relation", "related_to")
147
+ if (src, rel) not in conflict_keys:
148
+ merged.add_edge(src, tgt, **data)
149
+
150
+ return merged