synap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
synap/__init__.py ADDED
@@ -0,0 +1,69 @@
1
+ """Engram — Cognitive memory architecture for LLM agents."""
2
+
3
+ from synap.types import (
4
+ CapacityHints,
5
+ ConsolidationEvent,
6
+ ConsolidationTrigger,
7
+ DomainResult,
8
+ Episode,
9
+ EpisodeOutcome,
10
+ MemoryEdge,
11
+ MemoryNode,
12
+ MemoryType,
13
+ PreparedContext,
14
+ Procedure,
15
+ ToolCall,
16
+ )
17
+ from synap.protocols import (
18
+ AsyncStorageBackend,
19
+ EmbeddingProvider,
20
+ GraphStore,
21
+ LLMProvider,
22
+ SemanticDomain,
23
+ StorageBackend,
24
+ )
25
+ from synap.graph import MemoryGraph
26
+ from synap.persistent_graph import PersistentGraph
27
+ from synap.semantic import SemanticMemory
28
+ from synap.procedural import ProceduralMemory
29
+ from synap.episodic import EpisodicMemory
30
+ from synap.consolidation import ConsolidationConfig, ConsolidationResult
31
+ from synap.episodic import EpisodicPattern
32
+ from synap.semantic import SemanticResult
33
+ from synap.bootstrap import Bootstrap, ProposedKnowledge
34
+ from synap.facade import CognitiveMemory, EvaluationReport, MemoryStats
35
+
36
+ __all__ = [
37
+ "AsyncStorageBackend",
38
+ "Bootstrap",
39
+ "CapacityHints",
40
+ "CognitiveMemory",
41
+ "ConsolidationConfig",
42
+ "ConsolidationEvent",
43
+ "ConsolidationResult",
44
+ "ConsolidationTrigger",
45
+ "DomainResult",
46
+ "Episode",
47
+ "EpisodeOutcome",
48
+ "EpisodicMemory",
49
+ "EpisodicPattern",
50
+ "EmbeddingProvider",
51
+ "EvaluationReport",
52
+ "GraphStore",
53
+ "LLMProvider",
54
+ "MemoryEdge",
55
+ "MemoryGraph",
56
+ "MemoryNode",
57
+ "MemoryStats",
58
+ "MemoryType",
59
+ "PersistentGraph",
60
+ "PreparedContext",
61
+ "Procedure",
62
+ "ProceduralMemory",
63
+ "ProposedKnowledge",
64
+ "SemanticDomain",
65
+ "SemanticMemory",
66
+ "SemanticResult",
67
+ "StorageBackend",
68
+ "ToolCall",
69
+ ]
synap/_utils.py ADDED
@@ -0,0 +1,51 @@
1
+ """Shared utility functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+
9
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
10
+ """Cosine similarity between two vectors."""
11
+ if len(a) != len(b) or len(a) == 0:
12
+ return 0.0
13
+ dot = sum(x * y for x, y in zip(a, b))
14
+ norm_a = sum(x * x for x in a) ** 0.5
15
+ norm_b = sum(x * x for x in b) ** 0.5
16
+ if norm_a == 0 or norm_b == 0:
17
+ return 0.0
18
+ return dot / (norm_a * norm_b)
19
+
20
+
21
+ def safe_parse_json(text: str) -> dict[str, Any] | None:
22
+ """Parse JSON from LLM output, handling common formatting issues."""
23
+ text = text.strip()
24
+
25
+ # Try direct parse
26
+ try:
27
+ return json.loads(text)
28
+ except json.JSONDecodeError:
29
+ pass
30
+
31
+ # Try extracting JSON from markdown code block
32
+ if "```" in text:
33
+ start = text.find("```")
34
+ start = text.find("\n", start) + 1
35
+ end = text.find("```", start)
36
+ if end > start:
37
+ try:
38
+ return json.loads(text[start:end].strip())
39
+ except json.JSONDecodeError:
40
+ pass
41
+
42
+ # Try finding first { to last }
43
+ first_brace = text.find("{")
44
+ last_brace = text.rfind("}")
45
+ if first_brace >= 0 and last_brace > first_brace:
46
+ try:
47
+ return json.loads(text[first_brace : last_brace + 1])
48
+ except json.JSONDecodeError:
49
+ pass
50
+
51
+ return None
@@ -0,0 +1,6 @@
1
+ """Storage backends for synap."""
2
+
3
+ from synap.backends.sqlite import SQLiteBackend
4
+ from synap.backends.kuzu import KuzuBackend
5
+
6
+ __all__ = ["KuzuBackend", "SQLiteBackend"]
synap/backends/kuzu.py ADDED
@@ -0,0 +1,438 @@
1
+ """Kùzu graph database backend — native graph traversal + vector search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import kuzu
11
+
12
+
13
+ # ---------------------------------------------------------------------------
14
+ # Schema constants
15
+ # ---------------------------------------------------------------------------
16
+
17
+ EMBEDDING_DIM_DEFAULT = 8 # Overridden at init based on actual embeddings
18
+
19
+ _SCHEMA_SQL = """
20
+ CREATE NODE TABLE IF NOT EXISTS MemoryNode(
21
+ id STRING,
22
+ node_type STRING,
23
+ content STRING,
24
+ embedding DOUBLE[{dim}],
25
+ utility_score DOUBLE DEFAULT 1.0,
26
+ access_count INT64 DEFAULT 0,
27
+ created_at STRING,
28
+ last_accessed STRING,
29
+ metadata STRING DEFAULT '{{}}',
30
+ PRIMARY KEY(id)
31
+ );
32
+
33
+ CREATE REL TABLE IF NOT EXISTS MemoryEdge(
34
+ FROM MemoryNode TO MemoryNode,
35
+ id STRING,
36
+ relation_type STRING,
37
+ weight DOUBLE DEFAULT 1.0,
38
+ created_at STRING,
39
+ metadata STRING DEFAULT '{{}}'
40
+ );
41
+ """
42
+
43
+
44
+ def _now_iso() -> str:
45
+ return datetime.now(timezone.utc).isoformat()
46
+
47
+
48
+ class KuzuBackend:
49
+ """Graph-native storage backend using Kùzu.
50
+
51
+ Provides native graph traversal via Cypher, native vector
52
+ similarity via array_cosine_similarity, and file-based
53
+ persistence with zero server infrastructure.
54
+
55
+ Modeled after the Synthesis/dialectical-workstation Kùzu
56
+ integration: MERGE-based upserts, parameterized queries,
57
+ idempotent schema creation.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ path: str | Path,
63
+ embedding_dim: int = EMBEDDING_DIM_DEFAULT,
64
+ buffer_pool_mb: int = 256,
65
+ ) -> None:
66
+ self._path = str(path)
67
+ self._embedding_dim = embedding_dim
68
+ self._db = kuzu.Database(self._path, buffer_pool_size=buffer_pool_mb * 1024 * 1024)
69
+ self._conn = kuzu.Connection(self._db)
70
+ self._ensure_schema()
71
+
72
+ def _ensure_schema(self) -> None:
73
+ """Idempotent schema creation."""
74
+ for stmt in _SCHEMA_SQL.format(dim=self._embedding_dim).split(";"):
75
+ stmt = stmt.strip()
76
+ if stmt:
77
+ try:
78
+ self._conn.execute(stmt)
79
+ except RuntimeError:
80
+ pass # Table already exists
81
+
82
+ # --- Node operations ---
83
+
84
+ def save_node(self, node: dict[str, Any]) -> None:
85
+ """Upsert a node using MERGE."""
86
+ embedding = node.get("embedding")
87
+ embedding_val = self._format_embedding(embedding) if embedding else None
88
+
89
+ self._conn.execute(
90
+ """
91
+ MERGE (n:MemoryNode {id: $id})
92
+ ON CREATE SET
93
+ n.node_type = $node_type,
94
+ n.content = $content,
95
+ n.embedding = $embedding,
96
+ n.utility_score = $utility_score,
97
+ n.access_count = $access_count,
98
+ n.created_at = $created_at,
99
+ n.last_accessed = $last_accessed,
100
+ n.metadata = $metadata
101
+ ON MATCH SET
102
+ n.node_type = $node_type,
103
+ n.content = $content,
104
+ n.embedding = $embedding,
105
+ n.utility_score = $utility_score,
106
+ n.access_count = $access_count,
107
+ n.last_accessed = $last_accessed,
108
+ n.metadata = $metadata
109
+ """,
110
+ parameters={
111
+ "id": node["id"],
112
+ "node_type": node["node_type"],
113
+ "content": node["content"],
114
+ "embedding": embedding_val,
115
+ "utility_score": float(node.get("utility_score", 1.0)),
116
+ "access_count": int(node.get("access_count", 0)),
117
+ "created_at": node.get("created_at", _now_iso()),
118
+ "last_accessed": node.get("last_accessed", _now_iso()),
119
+ "metadata": json.dumps(node.get("metadata", {})),
120
+ },
121
+ )
122
+
123
+ def load_node(self, node_id: str) -> dict[str, Any] | None:
124
+ result = self._conn.execute(
125
+ """
126
+ MATCH (n:MemoryNode {id: $id})
127
+ RETURN n.id, n.node_type, n.content, n.embedding,
128
+ n.utility_score, n.access_count,
129
+ n.created_at, n.last_accessed, n.metadata
130
+ """,
131
+ parameters={"id": node_id},
132
+ )
133
+ if not result.has_next():
134
+ return None
135
+ row = result.get_next()
136
+ return self._row_to_node(row)
137
+
138
+ # --- Edge operations ---
139
+
140
+ def save_edge(self, edge: dict[str, Any]) -> None:
141
+ """Create an edge between existing nodes."""
142
+ self._conn.execute(
143
+ """
144
+ MATCH (s:MemoryNode {id: $source_id}), (t:MemoryNode {id: $target_id})
145
+ CREATE (s)-[:MemoryEdge {
146
+ id: $id,
147
+ relation_type: $relation_type,
148
+ weight: $weight,
149
+ created_at: $created_at,
150
+ metadata: $metadata
151
+ }]->(t)
152
+ """,
153
+ parameters={
154
+ "source_id": edge["source_id"],
155
+ "target_id": edge["target_id"],
156
+ "id": edge["id"],
157
+ "relation_type": edge["relation_type"],
158
+ "weight": float(edge.get("weight", 1.0)),
159
+ "created_at": edge.get("created_at", _now_iso()),
160
+ "metadata": json.dumps(edge.get("metadata", {})),
161
+ },
162
+ )
163
+
164
+ def load_edges(
165
+ self, node_id: str, edge_type: str | None = None
166
+ ) -> list[dict[str, Any]]:
167
+ if edge_type:
168
+ result = self._conn.execute(
169
+ """
170
+ MATCH (s:MemoryNode)-[e:MemoryEdge]->(t:MemoryNode)
171
+ WHERE (s.id = $id OR t.id = $id) AND e.relation_type = $etype
172
+ RETURN e.id, s.id, t.id, e.relation_type, e.weight,
173
+ e.created_at, e.metadata
174
+ """,
175
+ parameters={"id": node_id, "etype": edge_type},
176
+ )
177
+ else:
178
+ result = self._conn.execute(
179
+ """
180
+ MATCH (s:MemoryNode)-[e:MemoryEdge]->(t:MemoryNode)
181
+ WHERE s.id = $id OR t.id = $id
182
+ RETURN e.id, s.id, t.id, e.relation_type, e.weight,
183
+ e.created_at, e.metadata
184
+ """,
185
+ parameters={"id": node_id},
186
+ )
187
+ return [self._row_to_edge(r) for r in self._collect_rows(result)]
188
+
189
+ # --- Query ---
190
+
191
+ def query_nodes(
192
+ self,
193
+ node_type: str | None = None,
194
+ filters: dict[str, Any] | None = None,
195
+ limit: int = 100,
196
+ ) -> list[dict[str, Any]]:
197
+ if node_type:
198
+ result = self._conn.execute(
199
+ """
200
+ MATCH (n:MemoryNode)
201
+ WHERE n.node_type = $ntype
202
+ RETURN n.id, n.node_type, n.content, n.embedding,
203
+ n.utility_score, n.access_count,
204
+ n.created_at, n.last_accessed, n.metadata
205
+ ORDER BY n.utility_score DESC
206
+ LIMIT $lim
207
+ """,
208
+ parameters={"ntype": node_type, "lim": limit},
209
+ )
210
+ else:
211
+ result = self._conn.execute(
212
+ """
213
+ MATCH (n:MemoryNode)
214
+ RETURN n.id, n.node_type, n.content, n.embedding,
215
+ n.utility_score, n.access_count,
216
+ n.created_at, n.last_accessed, n.metadata
217
+ ORDER BY n.utility_score DESC
218
+ LIMIT $lim
219
+ """,
220
+ parameters={"lim": limit},
221
+ )
222
+
223
+ nodes = [self._row_to_node(r) for r in self._collect_rows(result)]
224
+
225
+ if filters:
226
+ nodes = [
227
+ n for n in nodes
228
+ if all(
229
+ (n.get("metadata") or {}).get(k) == v
230
+ for k, v in filters.items()
231
+ )
232
+ ]
233
+
234
+ return nodes
235
+
236
+ # --- Delete ---
237
+
238
+ def delete_node(self, node_id: str) -> None:
239
+ # Delete connected edges first (Kùzu requires directed deletes)
240
+ self._conn.execute(
241
+ """
242
+ MATCH (n:MemoryNode {id: $id})-[e:MemoryEdge]->()
243
+ DELETE e
244
+ """,
245
+ parameters={"id": node_id},
246
+ )
247
+ self._conn.execute(
248
+ """
249
+ MATCH ()-[e:MemoryEdge]->(n:MemoryNode {id: $id})
250
+ DELETE e
251
+ """,
252
+ parameters={"id": node_id},
253
+ )
254
+ self._conn.execute(
255
+ """
256
+ MATCH (n:MemoryNode {id: $id})
257
+ DELETE n
258
+ """,
259
+ parameters={"id": node_id},
260
+ )
261
+
262
+ def delete_edge(self, edge_id: str) -> None:
263
+ self._conn.execute(
264
+ """
265
+ MATCH ()-[e:MemoryEdge {id: $id}]->()
266
+ DELETE e
267
+ """,
268
+ parameters={"id": edge_id},
269
+ )
270
+
271
+ # --- Counts ---
272
+
273
+ def node_count(self, node_type: str | None = None) -> int:
274
+ if node_type:
275
+ result = self._conn.execute(
276
+ "MATCH (n:MemoryNode) WHERE n.node_type = $ntype RETURN count(n)",
277
+ parameters={"ntype": node_type},
278
+ )
279
+ else:
280
+ result = self._conn.execute(
281
+ "MATCH (n:MemoryNode) RETURN count(n)"
282
+ )
283
+ return result.get_next()[0] if result.has_next() else 0
284
+
285
+ def edge_count(self, relation_type: str | None = None) -> int:
286
+ if relation_type:
287
+ result = self._conn.execute(
288
+ "MATCH ()-[e:MemoryEdge]->() WHERE e.relation_type = $rtype RETURN count(e)",
289
+ parameters={"rtype": relation_type},
290
+ )
291
+ else:
292
+ result = self._conn.execute(
293
+ "MATCH ()-[e:MemoryEdge]->() RETURN count(e)"
294
+ )
295
+ return result.get_next()[0] if result.has_next() else 0
296
+
297
+ # --- Vector similarity search ---
298
+
299
+ def similarity_search(
300
+ self,
301
+ embedding: list[float],
302
+ node_type: str | None = None,
303
+ limit: int = 10,
304
+ ) -> list[dict[str, Any]]:
305
+ """Native cosine similarity via Kùzu's array_cosine_similarity."""
306
+ cast_expr = f"cast($emb, 'DOUBLE[{self._embedding_dim}]')"
307
+
308
+ if node_type:
309
+ result = self._conn.execute(
310
+ f"""
311
+ MATCH (n:MemoryNode)
312
+ WHERE n.node_type = $ntype AND n.embedding IS NOT NULL
313
+ WITH n, array_cosine_similarity(n.embedding, {cast_expr}) AS sim
314
+ RETURN n.id, n.node_type, n.content, n.embedding,
315
+ n.utility_score, n.access_count,
316
+ n.created_at, n.last_accessed, n.metadata, sim
317
+ ORDER BY sim DESC
318
+ LIMIT $lim
319
+ """,
320
+ parameters={
321
+ "ntype": node_type,
322
+ "emb": [float(x) for x in embedding],
323
+ "lim": limit,
324
+ },
325
+ )
326
+ else:
327
+ result = self._conn.execute(
328
+ f"""
329
+ MATCH (n:MemoryNode)
330
+ WHERE n.embedding IS NOT NULL
331
+ WITH n, array_cosine_similarity(n.embedding, {cast_expr}) AS sim
332
+ RETURN n.id, n.node_type, n.content, n.embedding,
333
+ n.utility_score, n.access_count,
334
+ n.created_at, n.last_accessed, n.metadata, sim
335
+ ORDER BY sim DESC
336
+ LIMIT $lim
337
+ """,
338
+ parameters={
339
+ "emb": [float(x) for x in embedding],
340
+ "lim": limit,
341
+ },
342
+ )
343
+
344
+ return [self._row_to_node(r) for r in self._collect_rows(result)]
345
+
346
+ # --- Graph-native traversal ---
347
+
348
+ def traverse(
349
+ self,
350
+ start_id: str,
351
+ edge_types: list[str] | None = None,
352
+ max_depth: int = 2,
353
+ max_nodes: int = 50,
354
+ min_weight: float = 0.0,
355
+ ) -> list[dict[str, Any]]:
356
+ """BFS traversal from a start node using Cypher variable-length paths.
357
+
358
+ Returns nodes reachable within max_depth hops, optionally
359
+ filtered by edge relation_type and minimum weight.
360
+ """
361
+ weight_filter = f"AND e.weight >= {min_weight}" if min_weight > 0 else ""
362
+
363
+ if edge_types:
364
+ type_filter = "AND e.relation_type IN $etypes"
365
+ params = {
366
+ "start": start_id,
367
+ "etypes": edge_types,
368
+ "lim": max_nodes,
369
+ }
370
+ else:
371
+ type_filter = ""
372
+ params = {"start": start_id, "lim": max_nodes}
373
+
374
+ # Use recursive MATCH for BFS
375
+ # Kùzu supports variable-length relationships
376
+ result = self._conn.execute(
377
+ f"""
378
+ MATCH (start:MemoryNode {{id: $start}})
379
+ MATCH (start)-[e:MemoryEdge*1..{max_depth}]-(neighbor:MemoryNode)
380
+ WHERE neighbor.id <> $start {type_filter} {weight_filter}
381
+ WITH DISTINCT neighbor
382
+ RETURN neighbor.id, neighbor.node_type, neighbor.content, neighbor.embedding,
383
+ neighbor.utility_score, neighbor.access_count,
384
+ neighbor.created_at, neighbor.last_accessed, neighbor.metadata
385
+ LIMIT $lim
386
+ """,
387
+ parameters=params,
388
+ )
389
+
390
+ return [self._row_to_node(r) for r in self._collect_rows(result)]
391
+
392
+ # --- Helpers ---
393
+
394
+ def _format_embedding(self, embedding: list[float] | None) -> list[float] | None:
395
+ if embedding is None:
396
+ return None
397
+ # Pad or truncate to configured dimension
398
+ emb = [float(x) for x in embedding]
399
+ if len(emb) < self._embedding_dim:
400
+ emb.extend([0.0] * (self._embedding_dim - len(emb)))
401
+ elif len(emb) > self._embedding_dim:
402
+ emb = emb[: self._embedding_dim]
403
+ return emb
404
+
405
+ def _row_to_node(self, row: list) -> dict[str, Any]:
406
+ return {
407
+ "id": row[0],
408
+ "node_type": row[1],
409
+ "content": row[2],
410
+ "embedding": list(row[3]) if row[3] is not None else None,
411
+ "utility_score": row[4],
412
+ "access_count": row[5],
413
+ "created_at": row[6],
414
+ "last_accessed": row[7],
415
+ "metadata": json.loads(row[8]) if isinstance(row[8], str) else row[8],
416
+ }
417
+
418
+ def _row_to_edge(self, row: list) -> dict[str, Any]:
419
+ return {
420
+ "id": row[0],
421
+ "source_id": row[1],
422
+ "target_id": row[2],
423
+ "relation_type": row[3],
424
+ "weight": row[4],
425
+ "created_at": row[5],
426
+ "metadata": json.loads(row[6]) if isinstance(row[6], str) else row[6],
427
+ }
428
+
429
+ def _collect_rows(self, result) -> list[list]:
430
+ rows = []
431
+ while result.has_next():
432
+ rows.append(result.get_next())
433
+ return rows
434
+
435
+ def close(self) -> None:
436
+ # Kùzu handles cleanup on garbage collection
437
+ self._conn = None
438
+ self._db = None