yourmemory 1.2.2__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yourmemory-1.2.2/yourmemory.egg-info → yourmemory-1.3.0}/PKG-INFO +16 -7
- {yourmemory-1.2.2 → yourmemory-1.3.0}/README.md +11 -5
- {yourmemory-1.2.2 → yourmemory-1.3.0}/memory_mcp.py +17 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/pyproject.toml +4 -2
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/db/migrate.py +8 -0
- yourmemory-1.3.0/src/graph/__init__.py +44 -0
- yourmemory-1.3.0/src/graph/backend.py +58 -0
- yourmemory-1.3.0/src/graph/graph_store.py +275 -0
- yourmemory-1.3.0/src/graph/neo4j_backend.py +195 -0
- yourmemory-1.3.0/src/graph/networkx_backend.py +161 -0
- yourmemory-1.3.0/src/graph/svo_extract.py +122 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/jobs/decay_job.py +23 -3
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/resolve.py +44 -23
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/retrieve.py +180 -9
- {yourmemory-1.2.2 → yourmemory-1.3.0/yourmemory.egg-info}/PKG-INFO +16 -7
- {yourmemory-1.2.2 → yourmemory-1.3.0}/yourmemory.egg-info/SOURCES.txt +6 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/yourmemory.egg-info/requires.txt +5 -1
- {yourmemory-1.2.2 → yourmemory-1.3.0}/LICENSE +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/setup.cfg +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/__init__.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/app.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/db/connection.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/db/duckdb_schema.sql +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/db/schema.sql +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/db/sqlite_schema.sql +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/routes/__init__.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/routes/agents.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/routes/memories.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/routes/retrieve.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/__init__.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/agent_registry.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/api_keys.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/decay.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/embed.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/extract.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/extract_fallback.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/src/services/resolve_fallback.py +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/yourmemory.egg-info/dependency_links.txt +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/yourmemory.egg-info/entry_points.txt +0 -0
- {yourmemory-1.2.2 → yourmemory-1.3.0}/yourmemory.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: yourmemory
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Persistent memory for Claude — Ebbinghaus forgetting curve, semantic deduplication, MCP-native
|
|
5
5
|
Author-email: Sachit Misra <mishrasachit1@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -182,6 +182,7 @@ Requires-Dist: python-dateutil
|
|
|
182
182
|
Requires-Dist: duckdb>=0.10.0
|
|
183
183
|
Requires-Dist: apscheduler
|
|
184
184
|
Requires-Dist: spacy<4.0,>=3.8.13
|
|
185
|
+
Requires-Dist: networkx>=3.0
|
|
185
186
|
Provides-Extra: postgres
|
|
186
187
|
Requires-Dist: psycopg2-binary; extra == "postgres"
|
|
187
188
|
Requires-Dist: pgvector; extra == "postgres"
|
|
@@ -189,8 +190,10 @@ Provides-Extra: sse
|
|
|
189
190
|
Requires-Dist: fastapi; extra == "sse"
|
|
190
191
|
Requires-Dist: uvicorn[standard]; extra == "sse"
|
|
191
192
|
Requires-Dist: httpx; extra == "sse"
|
|
193
|
+
Provides-Extra: neo4j
|
|
194
|
+
Requires-Dist: neo4j>=5.0; extra == "neo4j"
|
|
192
195
|
Provides-Extra: all
|
|
193
|
-
Requires-Dist: yourmemory[postgres,sse]; extra == "all"
|
|
196
|
+
Requires-Dist: yourmemory[neo4j,postgres,sse]; extra == "all"
|
|
194
197
|
Dynamic: license-file
|
|
195
198
|
|
|
196
199
|
# YourMemory
|
|
@@ -256,17 +259,23 @@ pip install yourmemory
|
|
|
256
259
|
|
|
257
260
|
All dependencies installed automatically. No clone, no Docker, no database setup.
|
|
258
261
|
|
|
259
|
-
### 2.
|
|
262
|
+
### 2. Run setup (once)
|
|
260
263
|
|
|
261
|
-
|
|
264
|
+
```bash
|
|
265
|
+
yourmemory-setup
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Downloads the spaCy language model and initialises the database. Run this once after install.
|
|
269
|
+
|
|
270
|
+
### 3. Get your config
|
|
262
271
|
|
|
263
272
|
```bash
|
|
264
273
|
yourmemory-path
|
|
265
274
|
```
|
|
266
275
|
|
|
267
|
-
|
|
276
|
+
Prints your full executable path and a ready-to-paste config for any MCP client. Copy it.
|
|
268
277
|
|
|
269
|
-
###
|
|
278
|
+
### 4. Wire into your AI client
|
|
270
279
|
|
|
271
280
|
The database is created automatically at `~/.yourmemory/memories.duckdb` on first use.
|
|
272
281
|
|
|
@@ -352,7 +361,7 @@ Restart Claude Desktop.
|
|
|
352
361
|
|
|
353
362
|
YourMemory is a standard stdio MCP server. Works with Claude Code, Claude Desktop, Cline, Cursor, Windsurf, Continue, and Zed. Use the full path from `yourmemory-path` if the client doesn't inherit shell PATH.
|
|
354
363
|
|
|
355
|
-
###
|
|
364
|
+
### 5. Add memory instructions to your project
|
|
356
365
|
|
|
357
366
|
Copy `sample_CLAUDE.md` into your project root as `CLAUDE.md` and replace:
|
|
358
367
|
- `YOUR_NAME` — your name (e.g. `Alice`)
|
|
@@ -61,17 +61,23 @@ pip install yourmemory
|
|
|
61
61
|
|
|
62
62
|
All dependencies installed automatically. No clone, no Docker, no database setup.
|
|
63
63
|
|
|
64
|
-
### 2.
|
|
64
|
+
### 2. Run setup (once)
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
```bash
|
|
67
|
+
yourmemory-setup
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Downloads the spaCy language model and initialises the database. Run this once after install.
|
|
71
|
+
|
|
72
|
+
### 3. Get your config
|
|
67
73
|
|
|
68
74
|
```bash
|
|
69
75
|
yourmemory-path
|
|
70
76
|
```
|
|
71
77
|
|
|
72
|
-
|
|
78
|
+
Prints your full executable path and a ready-to-paste config for any MCP client. Copy it.
|
|
73
79
|
|
|
74
|
-
###
|
|
80
|
+
### 4. Wire into your AI client
|
|
75
81
|
|
|
76
82
|
The database is created automatically at `~/.yourmemory/memories.duckdb` on first use.
|
|
77
83
|
|
|
@@ -157,7 +163,7 @@ Restart Claude Desktop.
|
|
|
157
163
|
|
|
158
164
|
YourMemory is a standard stdio MCP server. Works with Claude Code, Claude Desktop, Cline, Cursor, Windsurf, Continue, and Zed. Use the full path from `yourmemory-path` if the client doesn't inherit shell PATH.
|
|
159
165
|
|
|
160
|
-
###
|
|
166
|
+
### 5. Add memory instructions to your project
|
|
161
167
|
|
|
162
168
|
Copy `sample_CLAUDE.md` into your project root as `CLAUDE.md` and replace:
|
|
163
169
|
- `YOUR_NAME` — your name (e.g. `Alice`)
|
|
@@ -399,6 +399,23 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
|
|
399
399
|
cur.close()
|
|
400
400
|
conn.close()
|
|
401
401
|
|
|
402
|
+
# Index into graph (best-effort; never blocks the response)
|
|
403
|
+
if memory_id is not None:
|
|
404
|
+
try:
|
|
405
|
+
from src.graph.graph_store import index_memory as _graph_index
|
|
406
|
+
_graph_index(
|
|
407
|
+
memory_id=memory_id,
|
|
408
|
+
user_id=user_id,
|
|
409
|
+
content=final_content,
|
|
410
|
+
strength=importance,
|
|
411
|
+
importance=importance,
|
|
412
|
+
category=category,
|
|
413
|
+
embedding=list(embedding),
|
|
414
|
+
)
|
|
415
|
+
except Exception as _ge:
|
|
416
|
+
import sys as _sys
|
|
417
|
+
print(f"[graph] index_memory failed: {_ge}", file=_sys.stderr)
|
|
418
|
+
|
|
402
419
|
return [types.TextContent(type="text", text=json.dumps(
|
|
403
420
|
{"stored": 1, "id": memory_id, "content": final_content, "category": category,
|
|
404
421
|
"importance": importance, "agent_id": agent_id, "visibility": visibility,
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "yourmemory"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = "Persistent memory for Claude — Ebbinghaus forgetting curve, semantic deduplication, MCP-native"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -29,12 +29,14 @@ dependencies = [
|
|
|
29
29
|
"duckdb>=0.10.0",
|
|
30
30
|
"apscheduler",
|
|
31
31
|
"spacy>=3.8.13,<4.0",
|
|
32
|
+
"networkx>=3.0",
|
|
32
33
|
]
|
|
33
34
|
|
|
34
35
|
[project.optional-dependencies]
|
|
35
36
|
postgres = ["psycopg2-binary", "pgvector"]
|
|
36
37
|
sse = ["fastapi", "uvicorn[standard]", "httpx"]
|
|
37
|
-
|
|
38
|
+
neo4j = ["neo4j>=5.0"]
|
|
39
|
+
all = ["yourmemory[postgres,sse,neo4j]"]
|
|
38
40
|
|
|
39
41
|
[project.scripts]
|
|
40
42
|
yourmemory = "memory_mcp:run"
|
|
@@ -39,6 +39,14 @@ def migrate():
|
|
|
39
39
|
conn.close()
|
|
40
40
|
print(f"Migration complete ({backend}).", file=sys.stderr)
|
|
41
41
|
|
|
42
|
+
# Bootstrap the graph backend (creates indexes for Neo4j, touches pickle for NetworkX)
|
|
43
|
+
try:
|
|
44
|
+
from src.graph import get_graph_backend
|
|
45
|
+
get_graph_backend()
|
|
46
|
+
print("Graph backend initialised.", file=sys.stderr)
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
print(f"Graph backend init skipped: {exc}", file=sys.stderr)
|
|
49
|
+
|
|
42
50
|
|
|
43
51
|
if __name__ == "__main__":
|
|
44
52
|
migrate()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph backend factory.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from src.graph import get_graph_backend
|
|
6
|
+
graph = get_graph_backend() # NetworkX by default
|
|
7
|
+
|
|
8
|
+
Override via env var:
|
|
9
|
+
GRAPH_BACKEND=neo4j → Neo4jBackend
|
|
10
|
+
GRAPH_BACKEND=networkx (default)
|
|
11
|
+
|
|
12
|
+
The returned instance is a module-level singleton — import graph_store
|
|
13
|
+
instead of calling this directly in application code.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from src.graph.backend import GraphBackend
|
|
18
|
+
|
|
19
|
+
_instance: GraphBackend | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_graph_backend() -> GraphBackend:
|
|
23
|
+
"""Return the module-level singleton graph backend."""
|
|
24
|
+
global _instance
|
|
25
|
+
if _instance is None:
|
|
26
|
+
backend = os.getenv("GRAPH_BACKEND", "networkx").lower()
|
|
27
|
+
if backend == "neo4j":
|
|
28
|
+
from src.graph.neo4j_backend import Neo4jBackend
|
|
29
|
+
_instance = Neo4jBackend()
|
|
30
|
+
else:
|
|
31
|
+
from src.graph.networkx_backend import NetworkXBackend
|
|
32
|
+
_instance = NetworkXBackend()
|
|
33
|
+
return _instance
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def reset_graph_backend() -> None:
|
|
37
|
+
"""Force re-initialisation (used in tests)."""
|
|
38
|
+
global _instance
|
|
39
|
+
if _instance is not None:
|
|
40
|
+
try:
|
|
41
|
+
_instance.close()
|
|
42
|
+
except Exception:
|
|
43
|
+
pass
|
|
44
|
+
_instance = None
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract GraphBackend interface.
|
|
3
|
+
Implementations: NetworkXBackend (default), Neo4jBackend (opt-in via GRAPH_BACKEND=neo4j).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GraphBackend(ABC):
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def upsert_node(self, memory_id: int, user_id: str, strength: float,
|
|
14
|
+
importance: float, category: str) -> None:
|
|
15
|
+
"""Create or update a node for a memory."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def upsert_edge(self, source_id: int, target_id: int,
|
|
19
|
+
relation: str, weight: float) -> None:
|
|
20
|
+
"""Create or strengthen a directed edge between two memory nodes."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def get_neighbors(self, memory_id: int, user_id: str,
|
|
24
|
+
max_depth: int = 2) -> list:
|
|
25
|
+
"""
|
|
26
|
+
BFS from memory_id up to max_depth hops.
|
|
27
|
+
Returns list of {"memory_id": int, "distance": int, "edge_weight": float}.
|
|
28
|
+
Only traverses nodes belonging to user_id.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def boost_node_and_neighbors(self, memory_id: int, user_id: str,
|
|
33
|
+
boost: float = 0.2,
|
|
34
|
+
max_depth: int = 1) -> list:
|
|
35
|
+
"""
|
|
36
|
+
Propagate a recall boost through depth-1 neighbors.
|
|
37
|
+
Returns list of memory_ids that were boosted (for vector DB recall_count bump).
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def get_node_strength(self, memory_id: int) -> Optional[float]:
|
|
42
|
+
"""Return the cached strength of a node, or None if not found."""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def update_node_strength(self, memory_id: int, strength: float) -> None:
|
|
46
|
+
"""Refresh the cached strength after vector DB recomputes it."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def get_all_nodes_for_user(self, user_id: str) -> list:
|
|
50
|
+
"""Return all node dicts for chain-aware pruning."""
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def delete_node(self, memory_id: int) -> None:
|
|
54
|
+
"""Remove a node and all its edges from the graph."""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def close(self) -> None:
|
|
58
|
+
"""Flush and release resources."""
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level graph façade used by the rest of the application.
|
|
3
|
+
|
|
4
|
+
Four public functions:
|
|
5
|
+
index_memory(memory_id, user_id, content, strength, importance, category)
|
|
6
|
+
→ extract SVO triples, upsert node, upsert edges
|
|
7
|
+
|
|
8
|
+
expand_with_graph(seed_ids, user_id, top_k) → list of extra memory_ids
|
|
9
|
+
→ multi-hop BFS from each seed; deduplicated, sorted by edge_weight
|
|
10
|
+
|
|
11
|
+
propagate_recall(memory_id, user_id) → list of memory_ids that were boosted
|
|
12
|
+
→ boost recall_proxy on depth-1 neighbours (for recall_count bump in DB)
|
|
13
|
+
|
|
14
|
+
chain_safe_to_prune(memory_id, user_id, threshold) → bool
|
|
15
|
+
→ True only when ALL graph neighbours are also below the threshold
|
|
16
|
+
|
|
17
|
+
All four are silent no-ops if the graph backend is unavailable.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import sys
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from src.graph.svo_extract import extract_triples
|
|
24
|
+
|
|
25
|
+
# Lazy: don't crash the server if networkx is missing
|
|
26
|
+
_graph = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _g():
|
|
30
|
+
global _graph
|
|
31
|
+
if _graph is None:
|
|
32
|
+
try:
|
|
33
|
+
from src.graph import get_graph_backend
|
|
34
|
+
_graph = get_graph_backend()
|
|
35
|
+
except Exception as exc:
|
|
36
|
+
print(f"[graph_store] backend unavailable: {exc}", file=sys.stderr)
|
|
37
|
+
return _graph
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------ #
|
|
41
|
+
# Index a stored memory (call after INSERT)
|
|
42
|
+
# ------------------------------------------------------------------ #
|
|
43
|
+
|
|
44
|
+
def index_memory(
|
|
45
|
+
memory_id: int,
|
|
46
|
+
user_id: str,
|
|
47
|
+
content: str,
|
|
48
|
+
strength: float,
|
|
49
|
+
importance: float,
|
|
50
|
+
category: str,
|
|
51
|
+
embedding: list | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Register a memory node in the graph and create semantically-weighted edges.
|
|
55
|
+
|
|
56
|
+
Edge strategy:
|
|
57
|
+
- If `embedding` is provided (always the case from store_memory):
|
|
58
|
+
Query the DB for the top-5 most similar *existing* memories by cosine
|
|
59
|
+
similarity (above a 0.3 floor). Edge weight = similarity × verb_weight
|
|
60
|
+
from the SVO triple (or 0.5 default if spaCy unavailable).
|
|
61
|
+
This means edges connect memories that are *actually related*, not just
|
|
62
|
+
stored around the same time.
|
|
63
|
+
- Fallback (no embedding): connect to 5 most recent nodes at weight 0.4.
|
|
64
|
+
"""
|
|
65
|
+
g = _g()
|
|
66
|
+
if g is None:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
g.upsert_node(memory_id, user_id, strength, importance, category)
|
|
71
|
+
except Exception as exc:
|
|
72
|
+
print(f"[graph_store] upsert_node failed: {exc}", file=sys.stderr)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
# ── Find semantically similar neighbours via the vector DB ──────────
|
|
76
|
+
if embedding is not None:
|
|
77
|
+
similar = _similar_nodes(memory_id, user_id, embedding, top_k=5, min_sim=0.4)
|
|
78
|
+
else:
|
|
79
|
+
similar = [] # fallback handled below
|
|
80
|
+
|
|
81
|
+
if not similar:
|
|
82
|
+
return # no semantically related neighbours — isolated node is correct
|
|
83
|
+
|
|
84
|
+
# ── Get verb weight from SVO triple (if spaCy available) ────────────
|
|
85
|
+
triples = extract_triples(content)
|
|
86
|
+
# Use the highest-weight predicate found, or 0.5 default
|
|
87
|
+
verb_weight = max((t["weight"] for t in triples), default=0.5)
|
|
88
|
+
relation = triples[0]["predicate"] if triples else "related"
|
|
89
|
+
|
|
90
|
+
# ── Create edges: weight = cosine_similarity × verb_weight ──────────
|
|
91
|
+
for nbr in similar:
|
|
92
|
+
target_id = nbr["memory_id"]
|
|
93
|
+
sim = nbr["similarity"]
|
|
94
|
+
edge_weight = round(sim * verb_weight, 4)
|
|
95
|
+
try:
|
|
96
|
+
g.upsert_edge(memory_id, target_id, relation, edge_weight)
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
print(f"[graph_store] upsert_edge failed: {exc}", file=sys.stderr)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _similar_nodes(
|
|
102
|
+
memory_id: int,
|
|
103
|
+
user_id: str,
|
|
104
|
+
embedding: list,
|
|
105
|
+
top_k: int = 5,
|
|
106
|
+
min_sim: float = 0.3,
|
|
107
|
+
) -> list:
|
|
108
|
+
"""
|
|
109
|
+
Query the vector DB for the top-k most similar existing memories
|
|
110
|
+
(excluding memory_id itself). Returns [{memory_id, similarity}].
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from src.db.connection import get_backend, get_conn
|
|
114
|
+
from src.db.connection import duckdb_rows
|
|
115
|
+
backend = get_backend()
|
|
116
|
+
conn = get_conn()
|
|
117
|
+
|
|
118
|
+
if backend == "duckdb":
|
|
119
|
+
result = conn.execute("""
|
|
120
|
+
SELECT id,
|
|
121
|
+
array_cosine_similarity(embedding, ?::FLOAT[768]) AS sim
|
|
122
|
+
FROM memories
|
|
123
|
+
WHERE user_id = ? AND id != ?
|
|
124
|
+
AND array_cosine_similarity(embedding, ?::FLOAT[768]) >= ?
|
|
125
|
+
ORDER BY sim DESC
|
|
126
|
+
LIMIT ?
|
|
127
|
+
""", [embedding, user_id, memory_id, embedding, min_sim, top_k])
|
|
128
|
+
rows = duckdb_rows(result)
|
|
129
|
+
conn.close()
|
|
130
|
+
return [{"memory_id": r["id"], "similarity": r["sim"]} for r in rows]
|
|
131
|
+
|
|
132
|
+
elif backend == "postgres":
|
|
133
|
+
emb_str = f"[{','.join(str(x) for x in embedding)}]"
|
|
134
|
+
cur = conn.cursor()
|
|
135
|
+
cur.execute("""
|
|
136
|
+
SELECT id,
|
|
137
|
+
1 - (embedding <=> %s::vector) AS sim
|
|
138
|
+
FROM memories
|
|
139
|
+
WHERE user_id = %s AND id != %s
|
|
140
|
+
AND 1 - (embedding <=> %s::vector) >= %s
|
|
141
|
+
ORDER BY sim DESC
|
|
142
|
+
LIMIT %s
|
|
143
|
+
""", (emb_str, user_id, memory_id, emb_str, min_sim, top_k))
|
|
144
|
+
rows = cur.fetchall()
|
|
145
|
+
cur.close()
|
|
146
|
+
conn.close()
|
|
147
|
+
return [{"memory_id": r[0], "similarity": r[1]} for r in rows]
|
|
148
|
+
|
|
149
|
+
else: # sqlite — compute cosine in Python
|
|
150
|
+
import json
|
|
151
|
+
import numpy as np
|
|
152
|
+
cur = conn.cursor()
|
|
153
|
+
cur.execute(
|
|
154
|
+
"SELECT id, embedding FROM memories WHERE user_id = ? AND id != ?",
|
|
155
|
+
(user_id, memory_id),
|
|
156
|
+
)
|
|
157
|
+
va = np.array(embedding, dtype=float)
|
|
158
|
+
results = []
|
|
159
|
+
for row in cur.fetchall():
|
|
160
|
+
if row[1] is None:
|
|
161
|
+
continue
|
|
162
|
+
vb = np.array(json.loads(row[1]), dtype=float)
|
|
163
|
+
den = np.linalg.norm(va) * np.linalg.norm(vb)
|
|
164
|
+
sim = float(np.dot(va, vb) / den) if den else 0.0
|
|
165
|
+
if sim >= min_sim:
|
|
166
|
+
results.append({"memory_id": row[0], "similarity": sim})
|
|
167
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
168
|
+
cur.close()
|
|
169
|
+
conn.close()
|
|
170
|
+
return results[:top_k]
|
|
171
|
+
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
print(f"[graph_store] _similar_nodes failed: {exc}", file=sys.stderr)
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ------------------------------------------------------------------ #
|
|
178
|
+
# Expand vector search results with graph neighbours
|
|
179
|
+
# ------------------------------------------------------------------ #
|
|
180
|
+
|
|
181
|
+
def expand_with_graph(
|
|
182
|
+
seed_ids: list[int],
|
|
183
|
+
user_id: str,
|
|
184
|
+
top_k: int = 5,
|
|
185
|
+
) -> list[int]:
|
|
186
|
+
"""
|
|
187
|
+
BFS from each seed memory_id; return up to top_k extra ids not in seeds.
|
|
188
|
+
|
|
189
|
+
Returned list is sorted by cumulative edge_weight (strongest first).
|
|
190
|
+
"""
|
|
191
|
+
g = _g()
|
|
192
|
+
if g is None or not seed_ids:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
seen_seeds = set(seed_ids)
|
|
196
|
+
candidates: dict[int, float] = {} # memory_id → best edge_weight
|
|
197
|
+
|
|
198
|
+
for seed in seed_ids:
|
|
199
|
+
try:
|
|
200
|
+
neighbours = g.get_neighbors(seed, user_id, max_depth=2)
|
|
201
|
+
except Exception as exc:
|
|
202
|
+
print(f"[graph_store] get_neighbors failed: {exc}", file=sys.stderr)
|
|
203
|
+
continue
|
|
204
|
+
for nbr in neighbours:
|
|
205
|
+
nid = nbr["memory_id"]
|
|
206
|
+
if nid in seen_seeds:
|
|
207
|
+
continue
|
|
208
|
+
ew = nbr["edge_weight"]
|
|
209
|
+
if nid not in candidates or candidates[nid] < ew:
|
|
210
|
+
candidates[nid] = ew
|
|
211
|
+
|
|
212
|
+
ranked = sorted(candidates, key=lambda k: candidates[k], reverse=True)
|
|
213
|
+
return ranked[:top_k]
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# ------------------------------------------------------------------ #
|
|
217
|
+
# Propagate recall boost through graph edges
|
|
218
|
+
# ------------------------------------------------------------------ #
|
|
219
|
+
|
|
220
|
+
def propagate_recall(memory_id: int, user_id: str) -> list[int]:
|
|
221
|
+
"""
|
|
222
|
+
Boost recall_proxy on depth-1 neighbours after a memory is recalled.
|
|
223
|
+
|
|
224
|
+
Returns the list of boosted memory_ids so the caller can increment
|
|
225
|
+
recall_count in the vector DB.
|
|
226
|
+
"""
|
|
227
|
+
g = _g()
|
|
228
|
+
if g is None:
|
|
229
|
+
return []
|
|
230
|
+
try:
|
|
231
|
+
return g.boost_node_and_neighbors(memory_id, user_id,
|
|
232
|
+
boost=0.2, max_depth=1)
|
|
233
|
+
except Exception as exc:
|
|
234
|
+
print(f"[graph_store] propagate_recall failed: {exc}", file=sys.stderr)
|
|
235
|
+
return []
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------ #
|
|
239
|
+
# Chain-aware pruning gate
|
|
240
|
+
# ------------------------------------------------------------------ #
|
|
241
|
+
|
|
242
|
+
def chain_safe_to_prune(
|
|
243
|
+
memory_id: int,
|
|
244
|
+
user_id: str,
|
|
245
|
+
threshold: float,
|
|
246
|
+
) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Return True if it is safe to prune this memory.
|
|
249
|
+
|
|
250
|
+
A memory is safe to prune only when ALL of its graph neighbours are
|
|
251
|
+
also below `threshold`. If any neighbour is still strong, the memory
|
|
252
|
+
is kept alive (chain integrity).
|
|
253
|
+
|
|
254
|
+
Falls back to True (prune normally) if the graph backend is unavailable.
|
|
255
|
+
"""
|
|
256
|
+
g = _g()
|
|
257
|
+
if g is None:
|
|
258
|
+
return True
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
neighbours = g.get_neighbors(memory_id, user_id, max_depth=1)
|
|
262
|
+
except Exception as exc:
|
|
263
|
+
print(f"[graph_store] chain_safe_to_prune failed: {exc}", file=sys.stderr)
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
if not neighbours:
|
|
267
|
+
return True # isolated node — prune normally
|
|
268
|
+
|
|
269
|
+
for nbr in neighbours:
|
|
270
|
+
nid = nbr["memory_id"]
|
|
271
|
+
strength = g.get_node_strength(nid)
|
|
272
|
+
if strength is not None and strength >= threshold:
|
|
273
|
+
return False # at least one strong neighbour → keep alive
|
|
274
|
+
|
|
275
|
+
return True
|