synap 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synap/__init__.py +69 -0
- synap/_utils.py +51 -0
- synap/backends/__init__.py +6 -0
- synap/backends/kuzu.py +438 -0
- synap/backends/postgres.py +411 -0
- synap/backends/sqlite.py +260 -0
- synap/bootstrap.py +316 -0
- synap/consolidation.py +382 -0
- synap/episodic.py +353 -0
- synap/facade.py +549 -0
- synap/graph.py +259 -0
- synap/mcp_server.py +239 -0
- synap/persistent_graph.py +292 -0
- synap/procedural.py +200 -0
- synap/protocols.py +171 -0
- synap/py.typed +0 -0
- synap/semantic.py +283 -0
- synap/tools.py +201 -0
- synap/types.py +172 -0
- synap-0.1.0.dist-info/METADATA +271 -0
- synap-0.1.0.dist-info/RECORD +23 -0
- synap-0.1.0.dist-info/WHEEL +4 -0
- synap-0.1.0.dist-info/licenses/LICENSE +21 -0
synap/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Engram — Cognitive memory architecture for LLM agents."""
|
|
2
|
+
|
|
3
|
+
from synap.types import (
|
|
4
|
+
CapacityHints,
|
|
5
|
+
ConsolidationEvent,
|
|
6
|
+
ConsolidationTrigger,
|
|
7
|
+
DomainResult,
|
|
8
|
+
Episode,
|
|
9
|
+
EpisodeOutcome,
|
|
10
|
+
MemoryEdge,
|
|
11
|
+
MemoryNode,
|
|
12
|
+
MemoryType,
|
|
13
|
+
PreparedContext,
|
|
14
|
+
Procedure,
|
|
15
|
+
ToolCall,
|
|
16
|
+
)
|
|
17
|
+
from synap.protocols import (
|
|
18
|
+
AsyncStorageBackend,
|
|
19
|
+
EmbeddingProvider,
|
|
20
|
+
GraphStore,
|
|
21
|
+
LLMProvider,
|
|
22
|
+
SemanticDomain,
|
|
23
|
+
StorageBackend,
|
|
24
|
+
)
|
|
25
|
+
from synap.graph import MemoryGraph
|
|
26
|
+
from synap.persistent_graph import PersistentGraph
|
|
27
|
+
from synap.semantic import SemanticMemory
|
|
28
|
+
from synap.procedural import ProceduralMemory
|
|
29
|
+
from synap.episodic import EpisodicMemory
|
|
30
|
+
from synap.consolidation import ConsolidationConfig, ConsolidationResult
|
|
31
|
+
from synap.episodic import EpisodicPattern
|
|
32
|
+
from synap.semantic import SemanticResult
|
|
33
|
+
from synap.bootstrap import Bootstrap, ProposedKnowledge
|
|
34
|
+
from synap.facade import CognitiveMemory, EvaluationReport, MemoryStats
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"AsyncStorageBackend",
|
|
38
|
+
"Bootstrap",
|
|
39
|
+
"CapacityHints",
|
|
40
|
+
"CognitiveMemory",
|
|
41
|
+
"ConsolidationConfig",
|
|
42
|
+
"ConsolidationEvent",
|
|
43
|
+
"ConsolidationResult",
|
|
44
|
+
"ConsolidationTrigger",
|
|
45
|
+
"DomainResult",
|
|
46
|
+
"Episode",
|
|
47
|
+
"EpisodeOutcome",
|
|
48
|
+
"EpisodicMemory",
|
|
49
|
+
"EpisodicPattern",
|
|
50
|
+
"EmbeddingProvider",
|
|
51
|
+
"EvaluationReport",
|
|
52
|
+
"GraphStore",
|
|
53
|
+
"LLMProvider",
|
|
54
|
+
"MemoryEdge",
|
|
55
|
+
"MemoryGraph",
|
|
56
|
+
"MemoryNode",
|
|
57
|
+
"MemoryStats",
|
|
58
|
+
"MemoryType",
|
|
59
|
+
"PersistentGraph",
|
|
60
|
+
"PreparedContext",
|
|
61
|
+
"Procedure",
|
|
62
|
+
"ProceduralMemory",
|
|
63
|
+
"ProposedKnowledge",
|
|
64
|
+
"SemanticDomain",
|
|
65
|
+
"SemanticMemory",
|
|
66
|
+
"SemanticResult",
|
|
67
|
+
"StorageBackend",
|
|
68
|
+
"ToolCall",
|
|
69
|
+
]
|
synap/_utils.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Shared utility functions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
10
|
+
"""Cosine similarity between two vectors."""
|
|
11
|
+
if len(a) != len(b) or len(a) == 0:
|
|
12
|
+
return 0.0
|
|
13
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
14
|
+
norm_a = sum(x * x for x in a) ** 0.5
|
|
15
|
+
norm_b = sum(x * x for x in b) ** 0.5
|
|
16
|
+
if norm_a == 0 or norm_b == 0:
|
|
17
|
+
return 0.0
|
|
18
|
+
return dot / (norm_a * norm_b)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def safe_parse_json(text: str) -> dict[str, Any] | None:
|
|
22
|
+
"""Parse JSON from LLM output, handling common formatting issues."""
|
|
23
|
+
text = text.strip()
|
|
24
|
+
|
|
25
|
+
# Try direct parse
|
|
26
|
+
try:
|
|
27
|
+
return json.loads(text)
|
|
28
|
+
except json.JSONDecodeError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
# Try extracting JSON from markdown code block
|
|
32
|
+
if "```" in text:
|
|
33
|
+
start = text.find("```")
|
|
34
|
+
start = text.find("\n", start) + 1
|
|
35
|
+
end = text.find("```", start)
|
|
36
|
+
if end > start:
|
|
37
|
+
try:
|
|
38
|
+
return json.loads(text[start:end].strip())
|
|
39
|
+
except json.JSONDecodeError:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
# Try finding first { to last }
|
|
43
|
+
first_brace = text.find("{")
|
|
44
|
+
last_brace = text.rfind("}")
|
|
45
|
+
if first_brace >= 0 and last_brace > first_brace:
|
|
46
|
+
try:
|
|
47
|
+
return json.loads(text[first_brace : last_brace + 1])
|
|
48
|
+
except json.JSONDecodeError:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
return None
|
synap/backends/kuzu.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""Kùzu graph database backend — native graph traversal + vector search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import kuzu
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
# Schema constants
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
EMBEDDING_DIM_DEFAULT = 8 # Overridden at init based on actual embeddings
|
|
18
|
+
|
|
19
|
+
_SCHEMA_SQL = """
|
|
20
|
+
CREATE NODE TABLE IF NOT EXISTS MemoryNode(
|
|
21
|
+
id STRING,
|
|
22
|
+
node_type STRING,
|
|
23
|
+
content STRING,
|
|
24
|
+
embedding DOUBLE[{dim}],
|
|
25
|
+
utility_score DOUBLE DEFAULT 1.0,
|
|
26
|
+
access_count INT64 DEFAULT 0,
|
|
27
|
+
created_at STRING,
|
|
28
|
+
last_accessed STRING,
|
|
29
|
+
metadata STRING DEFAULT '{{}}',
|
|
30
|
+
PRIMARY KEY(id)
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE REL TABLE IF NOT EXISTS MemoryEdge(
|
|
34
|
+
FROM MemoryNode TO MemoryNode,
|
|
35
|
+
id STRING,
|
|
36
|
+
relation_type STRING,
|
|
37
|
+
weight DOUBLE DEFAULT 1.0,
|
|
38
|
+
created_at STRING,
|
|
39
|
+
metadata STRING DEFAULT '{{}}'
|
|
40
|
+
);
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _now_iso() -> str:
|
|
45
|
+
return datetime.now(timezone.utc).isoformat()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class KuzuBackend:
|
|
49
|
+
"""Graph-native storage backend using Kùzu.
|
|
50
|
+
|
|
51
|
+
Provides native graph traversal via Cypher, native vector
|
|
52
|
+
similarity via array_cosine_similarity, and file-based
|
|
53
|
+
persistence with zero server infrastructure.
|
|
54
|
+
|
|
55
|
+
Modeled after the Synthesis/dialectical-workstation Kùzu
|
|
56
|
+
integration: MERGE-based upserts, parameterized queries,
|
|
57
|
+
idempotent schema creation.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
path: str | Path,
|
|
63
|
+
embedding_dim: int = EMBEDDING_DIM_DEFAULT,
|
|
64
|
+
buffer_pool_mb: int = 256,
|
|
65
|
+
) -> None:
|
|
66
|
+
self._path = str(path)
|
|
67
|
+
self._embedding_dim = embedding_dim
|
|
68
|
+
self._db = kuzu.Database(self._path, buffer_pool_size=buffer_pool_mb * 1024 * 1024)
|
|
69
|
+
self._conn = kuzu.Connection(self._db)
|
|
70
|
+
self._ensure_schema()
|
|
71
|
+
|
|
72
|
+
def _ensure_schema(self) -> None:
|
|
73
|
+
"""Idempotent schema creation."""
|
|
74
|
+
for stmt in _SCHEMA_SQL.format(dim=self._embedding_dim).split(";"):
|
|
75
|
+
stmt = stmt.strip()
|
|
76
|
+
if stmt:
|
|
77
|
+
try:
|
|
78
|
+
self._conn.execute(stmt)
|
|
79
|
+
except RuntimeError:
|
|
80
|
+
pass # Table already exists
|
|
81
|
+
|
|
82
|
+
# --- Node operations ---
|
|
83
|
+
|
|
84
|
+
def save_node(self, node: dict[str, Any]) -> None:
|
|
85
|
+
"""Upsert a node using MERGE."""
|
|
86
|
+
embedding = node.get("embedding")
|
|
87
|
+
embedding_val = self._format_embedding(embedding) if embedding else None
|
|
88
|
+
|
|
89
|
+
self._conn.execute(
|
|
90
|
+
"""
|
|
91
|
+
MERGE (n:MemoryNode {id: $id})
|
|
92
|
+
ON CREATE SET
|
|
93
|
+
n.node_type = $node_type,
|
|
94
|
+
n.content = $content,
|
|
95
|
+
n.embedding = $embedding,
|
|
96
|
+
n.utility_score = $utility_score,
|
|
97
|
+
n.access_count = $access_count,
|
|
98
|
+
n.created_at = $created_at,
|
|
99
|
+
n.last_accessed = $last_accessed,
|
|
100
|
+
n.metadata = $metadata
|
|
101
|
+
ON MATCH SET
|
|
102
|
+
n.node_type = $node_type,
|
|
103
|
+
n.content = $content,
|
|
104
|
+
n.embedding = $embedding,
|
|
105
|
+
n.utility_score = $utility_score,
|
|
106
|
+
n.access_count = $access_count,
|
|
107
|
+
n.last_accessed = $last_accessed,
|
|
108
|
+
n.metadata = $metadata
|
|
109
|
+
""",
|
|
110
|
+
parameters={
|
|
111
|
+
"id": node["id"],
|
|
112
|
+
"node_type": node["node_type"],
|
|
113
|
+
"content": node["content"],
|
|
114
|
+
"embedding": embedding_val,
|
|
115
|
+
"utility_score": float(node.get("utility_score", 1.0)),
|
|
116
|
+
"access_count": int(node.get("access_count", 0)),
|
|
117
|
+
"created_at": node.get("created_at", _now_iso()),
|
|
118
|
+
"last_accessed": node.get("last_accessed", _now_iso()),
|
|
119
|
+
"metadata": json.dumps(node.get("metadata", {})),
|
|
120
|
+
},
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def load_node(self, node_id: str) -> dict[str, Any] | None:
|
|
124
|
+
result = self._conn.execute(
|
|
125
|
+
"""
|
|
126
|
+
MATCH (n:MemoryNode {id: $id})
|
|
127
|
+
RETURN n.id, n.node_type, n.content, n.embedding,
|
|
128
|
+
n.utility_score, n.access_count,
|
|
129
|
+
n.created_at, n.last_accessed, n.metadata
|
|
130
|
+
""",
|
|
131
|
+
parameters={"id": node_id},
|
|
132
|
+
)
|
|
133
|
+
if not result.has_next():
|
|
134
|
+
return None
|
|
135
|
+
row = result.get_next()
|
|
136
|
+
return self._row_to_node(row)
|
|
137
|
+
|
|
138
|
+
# --- Edge operations ---
|
|
139
|
+
|
|
140
|
+
def save_edge(self, edge: dict[str, Any]) -> None:
|
|
141
|
+
"""Create an edge between existing nodes."""
|
|
142
|
+
self._conn.execute(
|
|
143
|
+
"""
|
|
144
|
+
MATCH (s:MemoryNode {id: $source_id}), (t:MemoryNode {id: $target_id})
|
|
145
|
+
CREATE (s)-[:MemoryEdge {
|
|
146
|
+
id: $id,
|
|
147
|
+
relation_type: $relation_type,
|
|
148
|
+
weight: $weight,
|
|
149
|
+
created_at: $created_at,
|
|
150
|
+
metadata: $metadata
|
|
151
|
+
}]->(t)
|
|
152
|
+
""",
|
|
153
|
+
parameters={
|
|
154
|
+
"source_id": edge["source_id"],
|
|
155
|
+
"target_id": edge["target_id"],
|
|
156
|
+
"id": edge["id"],
|
|
157
|
+
"relation_type": edge["relation_type"],
|
|
158
|
+
"weight": float(edge.get("weight", 1.0)),
|
|
159
|
+
"created_at": edge.get("created_at", _now_iso()),
|
|
160
|
+
"metadata": json.dumps(edge.get("metadata", {})),
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def load_edges(
|
|
165
|
+
self, node_id: str, edge_type: str | None = None
|
|
166
|
+
) -> list[dict[str, Any]]:
|
|
167
|
+
if edge_type:
|
|
168
|
+
result = self._conn.execute(
|
|
169
|
+
"""
|
|
170
|
+
MATCH (s:MemoryNode)-[e:MemoryEdge]->(t:MemoryNode)
|
|
171
|
+
WHERE (s.id = $id OR t.id = $id) AND e.relation_type = $etype
|
|
172
|
+
RETURN e.id, s.id, t.id, e.relation_type, e.weight,
|
|
173
|
+
e.created_at, e.metadata
|
|
174
|
+
""",
|
|
175
|
+
parameters={"id": node_id, "etype": edge_type},
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
result = self._conn.execute(
|
|
179
|
+
"""
|
|
180
|
+
MATCH (s:MemoryNode)-[e:MemoryEdge]->(t:MemoryNode)
|
|
181
|
+
WHERE s.id = $id OR t.id = $id
|
|
182
|
+
RETURN e.id, s.id, t.id, e.relation_type, e.weight,
|
|
183
|
+
e.created_at, e.metadata
|
|
184
|
+
""",
|
|
185
|
+
parameters={"id": node_id},
|
|
186
|
+
)
|
|
187
|
+
return [self._row_to_edge(r) for r in self._collect_rows(result)]
|
|
188
|
+
|
|
189
|
+
# --- Query ---
|
|
190
|
+
|
|
191
|
+
def query_nodes(
|
|
192
|
+
self,
|
|
193
|
+
node_type: str | None = None,
|
|
194
|
+
filters: dict[str, Any] | None = None,
|
|
195
|
+
limit: int = 100,
|
|
196
|
+
) -> list[dict[str, Any]]:
|
|
197
|
+
if node_type:
|
|
198
|
+
result = self._conn.execute(
|
|
199
|
+
"""
|
|
200
|
+
MATCH (n:MemoryNode)
|
|
201
|
+
WHERE n.node_type = $ntype
|
|
202
|
+
RETURN n.id, n.node_type, n.content, n.embedding,
|
|
203
|
+
n.utility_score, n.access_count,
|
|
204
|
+
n.created_at, n.last_accessed, n.metadata
|
|
205
|
+
ORDER BY n.utility_score DESC
|
|
206
|
+
LIMIT $lim
|
|
207
|
+
""",
|
|
208
|
+
parameters={"ntype": node_type, "lim": limit},
|
|
209
|
+
)
|
|
210
|
+
else:
|
|
211
|
+
result = self._conn.execute(
|
|
212
|
+
"""
|
|
213
|
+
MATCH (n:MemoryNode)
|
|
214
|
+
RETURN n.id, n.node_type, n.content, n.embedding,
|
|
215
|
+
n.utility_score, n.access_count,
|
|
216
|
+
n.created_at, n.last_accessed, n.metadata
|
|
217
|
+
ORDER BY n.utility_score DESC
|
|
218
|
+
LIMIT $lim
|
|
219
|
+
""",
|
|
220
|
+
parameters={"lim": limit},
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
nodes = [self._row_to_node(r) for r in self._collect_rows(result)]
|
|
224
|
+
|
|
225
|
+
if filters:
|
|
226
|
+
nodes = [
|
|
227
|
+
n for n in nodes
|
|
228
|
+
if all(
|
|
229
|
+
(n.get("metadata") or {}).get(k) == v
|
|
230
|
+
for k, v in filters.items()
|
|
231
|
+
)
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
return nodes
|
|
235
|
+
|
|
236
|
+
# --- Delete ---
|
|
237
|
+
|
|
238
|
+
def delete_node(self, node_id: str) -> None:
|
|
239
|
+
# Delete connected edges first (Kùzu requires directed deletes)
|
|
240
|
+
self._conn.execute(
|
|
241
|
+
"""
|
|
242
|
+
MATCH (n:MemoryNode {id: $id})-[e:MemoryEdge]->()
|
|
243
|
+
DELETE e
|
|
244
|
+
""",
|
|
245
|
+
parameters={"id": node_id},
|
|
246
|
+
)
|
|
247
|
+
self._conn.execute(
|
|
248
|
+
"""
|
|
249
|
+
MATCH ()-[e:MemoryEdge]->(n:MemoryNode {id: $id})
|
|
250
|
+
DELETE e
|
|
251
|
+
""",
|
|
252
|
+
parameters={"id": node_id},
|
|
253
|
+
)
|
|
254
|
+
self._conn.execute(
|
|
255
|
+
"""
|
|
256
|
+
MATCH (n:MemoryNode {id: $id})
|
|
257
|
+
DELETE n
|
|
258
|
+
""",
|
|
259
|
+
parameters={"id": node_id},
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def delete_edge(self, edge_id: str) -> None:
|
|
263
|
+
self._conn.execute(
|
|
264
|
+
"""
|
|
265
|
+
MATCH ()-[e:MemoryEdge {id: $id}]->()
|
|
266
|
+
DELETE e
|
|
267
|
+
""",
|
|
268
|
+
parameters={"id": edge_id},
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# --- Counts ---
|
|
272
|
+
|
|
273
|
+
def node_count(self, node_type: str | None = None) -> int:
|
|
274
|
+
if node_type:
|
|
275
|
+
result = self._conn.execute(
|
|
276
|
+
"MATCH (n:MemoryNode) WHERE n.node_type = $ntype RETURN count(n)",
|
|
277
|
+
parameters={"ntype": node_type},
|
|
278
|
+
)
|
|
279
|
+
else:
|
|
280
|
+
result = self._conn.execute(
|
|
281
|
+
"MATCH (n:MemoryNode) RETURN count(n)"
|
|
282
|
+
)
|
|
283
|
+
return result.get_next()[0] if result.has_next() else 0
|
|
284
|
+
|
|
285
|
+
def edge_count(self, relation_type: str | None = None) -> int:
|
|
286
|
+
if relation_type:
|
|
287
|
+
result = self._conn.execute(
|
|
288
|
+
"MATCH ()-[e:MemoryEdge]->() WHERE e.relation_type = $rtype RETURN count(e)",
|
|
289
|
+
parameters={"rtype": relation_type},
|
|
290
|
+
)
|
|
291
|
+
else:
|
|
292
|
+
result = self._conn.execute(
|
|
293
|
+
"MATCH ()-[e:MemoryEdge]->() RETURN count(e)"
|
|
294
|
+
)
|
|
295
|
+
return result.get_next()[0] if result.has_next() else 0
|
|
296
|
+
|
|
297
|
+
# --- Vector similarity search ---
|
|
298
|
+
|
|
299
|
+
def similarity_search(
|
|
300
|
+
self,
|
|
301
|
+
embedding: list[float],
|
|
302
|
+
node_type: str | None = None,
|
|
303
|
+
limit: int = 10,
|
|
304
|
+
) -> list[dict[str, Any]]:
|
|
305
|
+
"""Native cosine similarity via Kùzu's array_cosine_similarity."""
|
|
306
|
+
cast_expr = f"cast($emb, 'DOUBLE[{self._embedding_dim}]')"
|
|
307
|
+
|
|
308
|
+
if node_type:
|
|
309
|
+
result = self._conn.execute(
|
|
310
|
+
f"""
|
|
311
|
+
MATCH (n:MemoryNode)
|
|
312
|
+
WHERE n.node_type = $ntype AND n.embedding IS NOT NULL
|
|
313
|
+
WITH n, array_cosine_similarity(n.embedding, {cast_expr}) AS sim
|
|
314
|
+
RETURN n.id, n.node_type, n.content, n.embedding,
|
|
315
|
+
n.utility_score, n.access_count,
|
|
316
|
+
n.created_at, n.last_accessed, n.metadata, sim
|
|
317
|
+
ORDER BY sim DESC
|
|
318
|
+
LIMIT $lim
|
|
319
|
+
""",
|
|
320
|
+
parameters={
|
|
321
|
+
"ntype": node_type,
|
|
322
|
+
"emb": [float(x) for x in embedding],
|
|
323
|
+
"lim": limit,
|
|
324
|
+
},
|
|
325
|
+
)
|
|
326
|
+
else:
|
|
327
|
+
result = self._conn.execute(
|
|
328
|
+
f"""
|
|
329
|
+
MATCH (n:MemoryNode)
|
|
330
|
+
WHERE n.embedding IS NOT NULL
|
|
331
|
+
WITH n, array_cosine_similarity(n.embedding, {cast_expr}) AS sim
|
|
332
|
+
RETURN n.id, n.node_type, n.content, n.embedding,
|
|
333
|
+
n.utility_score, n.access_count,
|
|
334
|
+
n.created_at, n.last_accessed, n.metadata, sim
|
|
335
|
+
ORDER BY sim DESC
|
|
336
|
+
LIMIT $lim
|
|
337
|
+
""",
|
|
338
|
+
parameters={
|
|
339
|
+
"emb": [float(x) for x in embedding],
|
|
340
|
+
"lim": limit,
|
|
341
|
+
},
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
return [self._row_to_node(r) for r in self._collect_rows(result)]
|
|
345
|
+
|
|
346
|
+
# --- Graph-native traversal ---
|
|
347
|
+
|
|
348
|
+
def traverse(
|
|
349
|
+
self,
|
|
350
|
+
start_id: str,
|
|
351
|
+
edge_types: list[str] | None = None,
|
|
352
|
+
max_depth: int = 2,
|
|
353
|
+
max_nodes: int = 50,
|
|
354
|
+
min_weight: float = 0.0,
|
|
355
|
+
) -> list[dict[str, Any]]:
|
|
356
|
+
"""BFS traversal from a start node using Cypher variable-length paths.
|
|
357
|
+
|
|
358
|
+
Returns nodes reachable within max_depth hops, optionally
|
|
359
|
+
filtered by edge relation_type and minimum weight.
|
|
360
|
+
"""
|
|
361
|
+
weight_filter = f"AND e.weight >= {min_weight}" if min_weight > 0 else ""
|
|
362
|
+
|
|
363
|
+
if edge_types:
|
|
364
|
+
type_filter = "AND e.relation_type IN $etypes"
|
|
365
|
+
params = {
|
|
366
|
+
"start": start_id,
|
|
367
|
+
"etypes": edge_types,
|
|
368
|
+
"lim": max_nodes,
|
|
369
|
+
}
|
|
370
|
+
else:
|
|
371
|
+
type_filter = ""
|
|
372
|
+
params = {"start": start_id, "lim": max_nodes}
|
|
373
|
+
|
|
374
|
+
# Use recursive MATCH for BFS
|
|
375
|
+
# Kùzu supports variable-length relationships
|
|
376
|
+
result = self._conn.execute(
|
|
377
|
+
f"""
|
|
378
|
+
MATCH (start:MemoryNode {{id: $start}})
|
|
379
|
+
MATCH (start)-[e:MemoryEdge*1..{max_depth}]-(neighbor:MemoryNode)
|
|
380
|
+
WHERE neighbor.id <> $start {type_filter} {weight_filter}
|
|
381
|
+
WITH DISTINCT neighbor
|
|
382
|
+
RETURN neighbor.id, neighbor.node_type, neighbor.content, neighbor.embedding,
|
|
383
|
+
neighbor.utility_score, neighbor.access_count,
|
|
384
|
+
neighbor.created_at, neighbor.last_accessed, neighbor.metadata
|
|
385
|
+
LIMIT $lim
|
|
386
|
+
""",
|
|
387
|
+
parameters=params,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
return [self._row_to_node(r) for r in self._collect_rows(result)]
|
|
391
|
+
|
|
392
|
+
# --- Helpers ---
|
|
393
|
+
|
|
394
|
+
def _format_embedding(self, embedding: list[float] | None) -> list[float] | None:
|
|
395
|
+
if embedding is None:
|
|
396
|
+
return None
|
|
397
|
+
# Pad or truncate to configured dimension
|
|
398
|
+
emb = [float(x) for x in embedding]
|
|
399
|
+
if len(emb) < self._embedding_dim:
|
|
400
|
+
emb.extend([0.0] * (self._embedding_dim - len(emb)))
|
|
401
|
+
elif len(emb) > self._embedding_dim:
|
|
402
|
+
emb = emb[: self._embedding_dim]
|
|
403
|
+
return emb
|
|
404
|
+
|
|
405
|
+
def _row_to_node(self, row: list) -> dict[str, Any]:
|
|
406
|
+
return {
|
|
407
|
+
"id": row[0],
|
|
408
|
+
"node_type": row[1],
|
|
409
|
+
"content": row[2],
|
|
410
|
+
"embedding": list(row[3]) if row[3] is not None else None,
|
|
411
|
+
"utility_score": row[4],
|
|
412
|
+
"access_count": row[5],
|
|
413
|
+
"created_at": row[6],
|
|
414
|
+
"last_accessed": row[7],
|
|
415
|
+
"metadata": json.loads(row[8]) if isinstance(row[8], str) else row[8],
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
def _row_to_edge(self, row: list) -> dict[str, Any]:
|
|
419
|
+
return {
|
|
420
|
+
"id": row[0],
|
|
421
|
+
"source_id": row[1],
|
|
422
|
+
"target_id": row[2],
|
|
423
|
+
"relation_type": row[3],
|
|
424
|
+
"weight": row[4],
|
|
425
|
+
"created_at": row[5],
|
|
426
|
+
"metadata": json.loads(row[6]) if isinstance(row[6], str) else row[6],
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
def _collect_rows(self, result) -> list[list]:
|
|
430
|
+
rows = []
|
|
431
|
+
while result.has_next():
|
|
432
|
+
rows.append(result.get_next())
|
|
433
|
+
return rows
|
|
434
|
+
|
|
435
|
+
def close(self) -> None:
|
|
436
|
+
# Kùzu handles cleanup on garbage collection
|
|
437
|
+
self._conn = None
|
|
438
|
+
self._db = None
|