yourmemory 1.2.3__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yourmemory-1.2.3/yourmemory.egg-info → yourmemory-1.3.0}/PKG-INFO +5 -2
- {yourmemory-1.2.3 → yourmemory-1.3.0}/memory_mcp.py +17 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/pyproject.toml +4 -2
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/db/migrate.py +8 -0
- yourmemory-1.3.0/src/graph/__init__.py +44 -0
- yourmemory-1.3.0/src/graph/backend.py +58 -0
- yourmemory-1.3.0/src/graph/graph_store.py +275 -0
- yourmemory-1.3.0/src/graph/neo4j_backend.py +195 -0
- yourmemory-1.3.0/src/graph/networkx_backend.py +161 -0
- yourmemory-1.3.0/src/graph/svo_extract.py +122 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/jobs/decay_job.py +23 -3
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/retrieve.py +180 -9
- {yourmemory-1.2.3 → yourmemory-1.3.0/yourmemory.egg-info}/PKG-INFO +5 -2
- {yourmemory-1.2.3 → yourmemory-1.3.0}/yourmemory.egg-info/SOURCES.txt +6 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/yourmemory.egg-info/requires.txt +5 -1
- {yourmemory-1.2.3 → yourmemory-1.3.0}/LICENSE +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/README.md +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/setup.cfg +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/__init__.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/app.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/db/connection.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/db/duckdb_schema.sql +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/db/schema.sql +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/db/sqlite_schema.sql +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/routes/__init__.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/routes/agents.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/routes/memories.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/routes/retrieve.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/__init__.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/agent_registry.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/api_keys.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/decay.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/embed.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/extract.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/extract_fallback.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/resolve.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/src/services/resolve_fallback.py +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/yourmemory.egg-info/dependency_links.txt +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/yourmemory.egg-info/entry_points.txt +0 -0
- {yourmemory-1.2.3 → yourmemory-1.3.0}/yourmemory.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: yourmemory
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Persistent memory for Claude — Ebbinghaus forgetting curve, semantic deduplication, MCP-native
|
|
5
5
|
Author-email: Sachit Misra <mishrasachit1@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -182,6 +182,7 @@ Requires-Dist: python-dateutil
|
|
|
182
182
|
Requires-Dist: duckdb>=0.10.0
|
|
183
183
|
Requires-Dist: apscheduler
|
|
184
184
|
Requires-Dist: spacy<4.0,>=3.8.13
|
|
185
|
+
Requires-Dist: networkx>=3.0
|
|
185
186
|
Provides-Extra: postgres
|
|
186
187
|
Requires-Dist: psycopg2-binary; extra == "postgres"
|
|
187
188
|
Requires-Dist: pgvector; extra == "postgres"
|
|
@@ -189,8 +190,10 @@ Provides-Extra: sse
|
|
|
189
190
|
Requires-Dist: fastapi; extra == "sse"
|
|
190
191
|
Requires-Dist: uvicorn[standard]; extra == "sse"
|
|
191
192
|
Requires-Dist: httpx; extra == "sse"
|
|
193
|
+
Provides-Extra: neo4j
|
|
194
|
+
Requires-Dist: neo4j>=5.0; extra == "neo4j"
|
|
192
195
|
Provides-Extra: all
|
|
193
|
-
Requires-Dist: yourmemory[postgres,sse]; extra == "all"
|
|
196
|
+
Requires-Dist: yourmemory[neo4j,postgres,sse]; extra == "all"
|
|
194
197
|
Dynamic: license-file
|
|
195
198
|
|
|
196
199
|
# YourMemory
|
|
@@ -399,6 +399,23 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
|
|
399
399
|
cur.close()
|
|
400
400
|
conn.close()
|
|
401
401
|
|
|
402
|
+
# Index into graph (best-effort; never blocks the response)
|
|
403
|
+
if memory_id is not None:
|
|
404
|
+
try:
|
|
405
|
+
from src.graph.graph_store import index_memory as _graph_index
|
|
406
|
+
_graph_index(
|
|
407
|
+
memory_id=memory_id,
|
|
408
|
+
user_id=user_id,
|
|
409
|
+
content=final_content,
|
|
410
|
+
strength=importance,
|
|
411
|
+
importance=importance,
|
|
412
|
+
category=category,
|
|
413
|
+
embedding=list(embedding),
|
|
414
|
+
)
|
|
415
|
+
except Exception as _ge:
|
|
416
|
+
import sys as _sys
|
|
417
|
+
print(f"[graph] index_memory failed: {_ge}", file=_sys.stderr)
|
|
418
|
+
|
|
402
419
|
return [types.TextContent(type="text", text=json.dumps(
|
|
403
420
|
{"stored": 1, "id": memory_id, "content": final_content, "category": category,
|
|
404
421
|
"importance": importance, "agent_id": agent_id, "visibility": visibility,
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "yourmemory"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = "Persistent memory for Claude — Ebbinghaus forgetting curve, semantic deduplication, MCP-native"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -29,12 +29,14 @@ dependencies = [
|
|
|
29
29
|
"duckdb>=0.10.0",
|
|
30
30
|
"apscheduler",
|
|
31
31
|
"spacy>=3.8.13,<4.0",
|
|
32
|
+
"networkx>=3.0",
|
|
32
33
|
]
|
|
33
34
|
|
|
34
35
|
[project.optional-dependencies]
|
|
35
36
|
postgres = ["psycopg2-binary", "pgvector"]
|
|
36
37
|
sse = ["fastapi", "uvicorn[standard]", "httpx"]
|
|
37
|
-
|
|
38
|
+
neo4j = ["neo4j>=5.0"]
|
|
39
|
+
all = ["yourmemory[postgres,sse,neo4j]"]
|
|
38
40
|
|
|
39
41
|
[project.scripts]
|
|
40
42
|
yourmemory = "memory_mcp:run"
|
|
@@ -39,6 +39,14 @@ def migrate():
|
|
|
39
39
|
conn.close()
|
|
40
40
|
print(f"Migration complete ({backend}).", file=sys.stderr)
|
|
41
41
|
|
|
42
|
+
# Bootstrap the graph backend (creates indexes for Neo4j, touches pickle for NetworkX)
|
|
43
|
+
try:
|
|
44
|
+
from src.graph import get_graph_backend
|
|
45
|
+
get_graph_backend()
|
|
46
|
+
print("Graph backend initialised.", file=sys.stderr)
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
print(f"Graph backend init skipped: {exc}", file=sys.stderr)
|
|
49
|
+
|
|
42
50
|
|
|
43
51
|
if __name__ == "__main__":
|
|
44
52
|
migrate()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph backend factory.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from src.graph import get_graph_backend
|
|
6
|
+
graph = get_graph_backend() # NetworkX by default
|
|
7
|
+
|
|
8
|
+
Override via env var:
|
|
9
|
+
GRAPH_BACKEND=neo4j → Neo4jBackend
|
|
10
|
+
GRAPH_BACKEND=networkx (default)
|
|
11
|
+
|
|
12
|
+
The returned instance is a module-level singleton — import graph_store
|
|
13
|
+
instead of calling this directly in application code.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from src.graph.backend import GraphBackend
|
|
18
|
+
|
|
19
|
+
_instance: GraphBackend | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_graph_backend() -> GraphBackend:
|
|
23
|
+
"""Return the module-level singleton graph backend."""
|
|
24
|
+
global _instance
|
|
25
|
+
if _instance is None:
|
|
26
|
+
backend = os.getenv("GRAPH_BACKEND", "networkx").lower()
|
|
27
|
+
if backend == "neo4j":
|
|
28
|
+
from src.graph.neo4j_backend import Neo4jBackend
|
|
29
|
+
_instance = Neo4jBackend()
|
|
30
|
+
else:
|
|
31
|
+
from src.graph.networkx_backend import NetworkXBackend
|
|
32
|
+
_instance = NetworkXBackend()
|
|
33
|
+
return _instance
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def reset_graph_backend() -> None:
|
|
37
|
+
"""Force re-initialisation (used in tests)."""
|
|
38
|
+
global _instance
|
|
39
|
+
if _instance is not None:
|
|
40
|
+
try:
|
|
41
|
+
_instance.close()
|
|
42
|
+
except Exception:
|
|
43
|
+
pass
|
|
44
|
+
_instance = None
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract GraphBackend interface.
|
|
3
|
+
Implementations: NetworkXBackend (default), Neo4jBackend (opt-in via GRAPH_BACKEND=neo4j).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GraphBackend(ABC):
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def upsert_node(self, memory_id: int, user_id: str, strength: float,
|
|
14
|
+
importance: float, category: str) -> None:
|
|
15
|
+
"""Create or update a node for a memory."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def upsert_edge(self, source_id: int, target_id: int,
|
|
19
|
+
relation: str, weight: float) -> None:
|
|
20
|
+
"""Create or strengthen a directed edge between two memory nodes."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def get_neighbors(self, memory_id: int, user_id: str,
|
|
24
|
+
max_depth: int = 2) -> list:
|
|
25
|
+
"""
|
|
26
|
+
BFS from memory_id up to max_depth hops.
|
|
27
|
+
Returns list of {"memory_id": int, "distance": int, "edge_weight": float}.
|
|
28
|
+
Only traverses nodes belonging to user_id.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def boost_node_and_neighbors(self, memory_id: int, user_id: str,
|
|
33
|
+
boost: float = 0.2,
|
|
34
|
+
max_depth: int = 1) -> list:
|
|
35
|
+
"""
|
|
36
|
+
Propagate a recall boost through depth-1 neighbors.
|
|
37
|
+
Returns list of memory_ids that were boosted (for vector DB recall_count bump).
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def get_node_strength(self, memory_id: int) -> Optional[float]:
|
|
42
|
+
"""Return the cached strength of a node, or None if not found."""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def update_node_strength(self, memory_id: int, strength: float) -> None:
|
|
46
|
+
"""Refresh the cached strength after vector DB recomputes it."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def get_all_nodes_for_user(self, user_id: str) -> list:
|
|
50
|
+
"""Return all node dicts for chain-aware pruning."""
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def delete_node(self, memory_id: int) -> None:
|
|
54
|
+
"""Remove a node and all its edges from the graph."""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def close(self) -> None:
|
|
58
|
+
"""Flush and release resources."""
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level graph façade used by the rest of the application.
|
|
3
|
+
|
|
4
|
+
Four public functions:
|
|
5
|
+
index_memory(memory_id, user_id, content, strength, importance, category)
|
|
6
|
+
→ extract SVO triples, upsert node, upsert edges
|
|
7
|
+
|
|
8
|
+
expand_with_graph(seed_ids, user_id, top_k) → list of extra memory_ids
|
|
9
|
+
→ multi-hop BFS from each seed; deduplicated, sorted by edge_weight
|
|
10
|
+
|
|
11
|
+
propagate_recall(memory_id, user_id) → list of memory_ids that were boosted
|
|
12
|
+
→ boost recall_proxy on depth-1 neighbours (for recall_count bump in DB)
|
|
13
|
+
|
|
14
|
+
chain_safe_to_prune(memory_id, user_id, threshold) → bool
|
|
15
|
+
→ True only when ALL graph neighbours are also below the threshold
|
|
16
|
+
|
|
17
|
+
All four are silent no-ops if the graph backend is unavailable.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import sys
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from src.graph.svo_extract import extract_triples
|
|
24
|
+
|
|
25
|
+
# Lazy: don't crash the server if networkx is missing
|
|
26
|
+
_graph = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _g():
|
|
30
|
+
global _graph
|
|
31
|
+
if _graph is None:
|
|
32
|
+
try:
|
|
33
|
+
from src.graph import get_graph_backend
|
|
34
|
+
_graph = get_graph_backend()
|
|
35
|
+
except Exception as exc:
|
|
36
|
+
print(f"[graph_store] backend unavailable: {exc}", file=sys.stderr)
|
|
37
|
+
return _graph
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------ #
|
|
41
|
+
# Index a stored memory (call after INSERT)
|
|
42
|
+
# ------------------------------------------------------------------ #
|
|
43
|
+
|
|
44
|
+
def index_memory(
|
|
45
|
+
memory_id: int,
|
|
46
|
+
user_id: str,
|
|
47
|
+
content: str,
|
|
48
|
+
strength: float,
|
|
49
|
+
importance: float,
|
|
50
|
+
category: str,
|
|
51
|
+
embedding: list | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Register a memory node in the graph and create semantically-weighted edges.
|
|
55
|
+
|
|
56
|
+
Edge strategy:
|
|
57
|
+
- If `embedding` is provided (always the case from store_memory):
|
|
58
|
+
Query the DB for the top-5 most similar *existing* memories by cosine
|
|
59
|
+
similarity (above a 0.3 floor). Edge weight = similarity × verb_weight
|
|
60
|
+
from the SVO triple (or 0.5 default if spaCy unavailable).
|
|
61
|
+
This means edges connect memories that are *actually related*, not just
|
|
62
|
+
stored around the same time.
|
|
63
|
+
- Fallback (no embedding): connect to 5 most recent nodes at weight 0.4.
|
|
64
|
+
"""
|
|
65
|
+
g = _g()
|
|
66
|
+
if g is None:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
g.upsert_node(memory_id, user_id, strength, importance, category)
|
|
71
|
+
except Exception as exc:
|
|
72
|
+
print(f"[graph_store] upsert_node failed: {exc}", file=sys.stderr)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
# ── Find semantically similar neighbours via the vector DB ──────────
|
|
76
|
+
if embedding is not None:
|
|
77
|
+
similar = _similar_nodes(memory_id, user_id, embedding, top_k=5, min_sim=0.4)
|
|
78
|
+
else:
|
|
79
|
+
similar = [] # fallback handled below
|
|
80
|
+
|
|
81
|
+
if not similar:
|
|
82
|
+
return # no semantically related neighbours — isolated node is correct
|
|
83
|
+
|
|
84
|
+
# ── Get verb weight from SVO triple (if spaCy available) ────────────
|
|
85
|
+
triples = extract_triples(content)
|
|
86
|
+
# Use the highest-weight predicate found, or 0.5 default
|
|
87
|
+
verb_weight = max((t["weight"] for t in triples), default=0.5)
|
|
88
|
+
relation = triples[0]["predicate"] if triples else "related"
|
|
89
|
+
|
|
90
|
+
# ── Create edges: weight = cosine_similarity × verb_weight ──────────
|
|
91
|
+
for nbr in similar:
|
|
92
|
+
target_id = nbr["memory_id"]
|
|
93
|
+
sim = nbr["similarity"]
|
|
94
|
+
edge_weight = round(sim * verb_weight, 4)
|
|
95
|
+
try:
|
|
96
|
+
g.upsert_edge(memory_id, target_id, relation, edge_weight)
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
print(f"[graph_store] upsert_edge failed: {exc}", file=sys.stderr)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _similar_nodes(
|
|
102
|
+
memory_id: int,
|
|
103
|
+
user_id: str,
|
|
104
|
+
embedding: list,
|
|
105
|
+
top_k: int = 5,
|
|
106
|
+
min_sim: float = 0.3,
|
|
107
|
+
) -> list:
|
|
108
|
+
"""
|
|
109
|
+
Query the vector DB for the top-k most similar existing memories
|
|
110
|
+
(excluding memory_id itself). Returns [{memory_id, similarity}].
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from src.db.connection import get_backend, get_conn
|
|
114
|
+
from src.db.connection import duckdb_rows
|
|
115
|
+
backend = get_backend()
|
|
116
|
+
conn = get_conn()
|
|
117
|
+
|
|
118
|
+
if backend == "duckdb":
|
|
119
|
+
result = conn.execute("""
|
|
120
|
+
SELECT id,
|
|
121
|
+
array_cosine_similarity(embedding, ?::FLOAT[768]) AS sim
|
|
122
|
+
FROM memories
|
|
123
|
+
WHERE user_id = ? AND id != ?
|
|
124
|
+
AND array_cosine_similarity(embedding, ?::FLOAT[768]) >= ?
|
|
125
|
+
ORDER BY sim DESC
|
|
126
|
+
LIMIT ?
|
|
127
|
+
""", [embedding, user_id, memory_id, embedding, min_sim, top_k])
|
|
128
|
+
rows = duckdb_rows(result)
|
|
129
|
+
conn.close()
|
|
130
|
+
return [{"memory_id": r["id"], "similarity": r["sim"]} for r in rows]
|
|
131
|
+
|
|
132
|
+
elif backend == "postgres":
|
|
133
|
+
emb_str = f"[{','.join(str(x) for x in embedding)}]"
|
|
134
|
+
cur = conn.cursor()
|
|
135
|
+
cur.execute("""
|
|
136
|
+
SELECT id,
|
|
137
|
+
1 - (embedding <=> %s::vector) AS sim
|
|
138
|
+
FROM memories
|
|
139
|
+
WHERE user_id = %s AND id != %s
|
|
140
|
+
AND 1 - (embedding <=> %s::vector) >= %s
|
|
141
|
+
ORDER BY sim DESC
|
|
142
|
+
LIMIT %s
|
|
143
|
+
""", (emb_str, user_id, memory_id, emb_str, min_sim, top_k))
|
|
144
|
+
rows = cur.fetchall()
|
|
145
|
+
cur.close()
|
|
146
|
+
conn.close()
|
|
147
|
+
return [{"memory_id": r[0], "similarity": r[1]} for r in rows]
|
|
148
|
+
|
|
149
|
+
else: # sqlite — compute cosine in Python
|
|
150
|
+
import json
|
|
151
|
+
import numpy as np
|
|
152
|
+
cur = conn.cursor()
|
|
153
|
+
cur.execute(
|
|
154
|
+
"SELECT id, embedding FROM memories WHERE user_id = ? AND id != ?",
|
|
155
|
+
(user_id, memory_id),
|
|
156
|
+
)
|
|
157
|
+
va = np.array(embedding, dtype=float)
|
|
158
|
+
results = []
|
|
159
|
+
for row in cur.fetchall():
|
|
160
|
+
if row[1] is None:
|
|
161
|
+
continue
|
|
162
|
+
vb = np.array(json.loads(row[1]), dtype=float)
|
|
163
|
+
den = np.linalg.norm(va) * np.linalg.norm(vb)
|
|
164
|
+
sim = float(np.dot(va, vb) / den) if den else 0.0
|
|
165
|
+
if sim >= min_sim:
|
|
166
|
+
results.append({"memory_id": row[0], "similarity": sim})
|
|
167
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
168
|
+
cur.close()
|
|
169
|
+
conn.close()
|
|
170
|
+
return results[:top_k]
|
|
171
|
+
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
print(f"[graph_store] _similar_nodes failed: {exc}", file=sys.stderr)
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ------------------------------------------------------------------ #
|
|
178
|
+
# Expand vector search results with graph neighbours
|
|
179
|
+
# ------------------------------------------------------------------ #
|
|
180
|
+
|
|
181
|
+
def expand_with_graph(
|
|
182
|
+
seed_ids: list[int],
|
|
183
|
+
user_id: str,
|
|
184
|
+
top_k: int = 5,
|
|
185
|
+
) -> list[int]:
|
|
186
|
+
"""
|
|
187
|
+
BFS from each seed memory_id; return up to top_k extra ids not in seeds.
|
|
188
|
+
|
|
189
|
+
Returned list is sorted by cumulative edge_weight (strongest first).
|
|
190
|
+
"""
|
|
191
|
+
g = _g()
|
|
192
|
+
if g is None or not seed_ids:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
seen_seeds = set(seed_ids)
|
|
196
|
+
candidates: dict[int, float] = {} # memory_id → best edge_weight
|
|
197
|
+
|
|
198
|
+
for seed in seed_ids:
|
|
199
|
+
try:
|
|
200
|
+
neighbours = g.get_neighbors(seed, user_id, max_depth=2)
|
|
201
|
+
except Exception as exc:
|
|
202
|
+
print(f"[graph_store] get_neighbors failed: {exc}", file=sys.stderr)
|
|
203
|
+
continue
|
|
204
|
+
for nbr in neighbours:
|
|
205
|
+
nid = nbr["memory_id"]
|
|
206
|
+
if nid in seen_seeds:
|
|
207
|
+
continue
|
|
208
|
+
ew = nbr["edge_weight"]
|
|
209
|
+
if nid not in candidates or candidates[nid] < ew:
|
|
210
|
+
candidates[nid] = ew
|
|
211
|
+
|
|
212
|
+
ranked = sorted(candidates, key=lambda k: candidates[k], reverse=True)
|
|
213
|
+
return ranked[:top_k]
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# ------------------------------------------------------------------ #
|
|
217
|
+
# Propagate recall boost through graph edges
|
|
218
|
+
# ------------------------------------------------------------------ #
|
|
219
|
+
|
|
220
|
+
def propagate_recall(memory_id: int, user_id: str) -> list[int]:
|
|
221
|
+
"""
|
|
222
|
+
Boost recall_proxy on depth-1 neighbours after a memory is recalled.
|
|
223
|
+
|
|
224
|
+
Returns the list of boosted memory_ids so the caller can increment
|
|
225
|
+
recall_count in the vector DB.
|
|
226
|
+
"""
|
|
227
|
+
g = _g()
|
|
228
|
+
if g is None:
|
|
229
|
+
return []
|
|
230
|
+
try:
|
|
231
|
+
return g.boost_node_and_neighbors(memory_id, user_id,
|
|
232
|
+
boost=0.2, max_depth=1)
|
|
233
|
+
except Exception as exc:
|
|
234
|
+
print(f"[graph_store] propagate_recall failed: {exc}", file=sys.stderr)
|
|
235
|
+
return []
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------ #
|
|
239
|
+
# Chain-aware pruning gate
|
|
240
|
+
# ------------------------------------------------------------------ #
|
|
241
|
+
|
|
242
|
+
def chain_safe_to_prune(
|
|
243
|
+
memory_id: int,
|
|
244
|
+
user_id: str,
|
|
245
|
+
threshold: float,
|
|
246
|
+
) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Return True if it is safe to prune this memory.
|
|
249
|
+
|
|
250
|
+
A memory is safe to prune only when ALL of its graph neighbours are
|
|
251
|
+
also below `threshold`. If any neighbour is still strong, the memory
|
|
252
|
+
is kept alive (chain integrity).
|
|
253
|
+
|
|
254
|
+
Falls back to True (prune normally) if the graph backend is unavailable.
|
|
255
|
+
"""
|
|
256
|
+
g = _g()
|
|
257
|
+
if g is None:
|
|
258
|
+
return True
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
neighbours = g.get_neighbors(memory_id, user_id, max_depth=1)
|
|
262
|
+
except Exception as exc:
|
|
263
|
+
print(f"[graph_store] chain_safe_to_prune failed: {exc}", file=sys.stderr)
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
if not neighbours:
|
|
267
|
+
return True # isolated node — prune normally
|
|
268
|
+
|
|
269
|
+
for nbr in neighbours:
|
|
270
|
+
nid = nbr["memory_id"]
|
|
271
|
+
strength = g.get_node_strength(nid)
|
|
272
|
+
if strength is not None and strength >= threshold:
|
|
273
|
+
return False # at least one strong neighbour → keep alive
|
|
274
|
+
|
|
275
|
+
return True
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Neo4j graph backend — opt-in via GRAPH_BACKEND=neo4j.
|
|
3
|
+
Requires: pip install yourmemory[neo4j] (neo4j driver)
|
|
4
|
+
|
|
5
|
+
Connection: set NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD env vars.
|
|
6
|
+
Defaults: bolt://localhost:7687 / neo4j / neo4j
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from src.graph.backend import GraphBackend
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_driver():
|
|
16
|
+
try:
|
|
17
|
+
from neo4j import GraphDatabase # type: ignore
|
|
18
|
+
except ImportError:
|
|
19
|
+
raise ImportError(
|
|
20
|
+
"neo4j driver not installed. Run: pip install 'yourmemory[neo4j]'"
|
|
21
|
+
)
|
|
22
|
+
uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
|
|
23
|
+
user = os.getenv("NEO4J_USER", "neo4j")
|
|
24
|
+
pw = os.getenv("NEO4J_PASSWORD", "neo4j")
|
|
25
|
+
return GraphDatabase.driver(uri, auth=(user, pw))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Neo4jBackend(GraphBackend):
|
|
29
|
+
"""
|
|
30
|
+
Graph backend that persists nodes/edges in Neo4j.
|
|
31
|
+
|
|
32
|
+
Schema:
|
|
33
|
+
(:Memory {memory_id, user_id, strength, importance, category, recall_proxy})
|
|
34
|
+
-[:RELATES {relation, weight}]->
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
self._driver = _get_driver()
|
|
39
|
+
self._ensure_indexes()
|
|
40
|
+
|
|
41
|
+
def _ensure_indexes(self):
|
|
42
|
+
with self._driver.session() as s:
|
|
43
|
+
s.run(
|
|
44
|
+
"CREATE INDEX memory_id_idx IF NOT EXISTS "
|
|
45
|
+
"FOR (m:Memory) ON (m.memory_id)"
|
|
46
|
+
)
|
|
47
|
+
s.run(
|
|
48
|
+
"CREATE INDEX memory_user_idx IF NOT EXISTS "
|
|
49
|
+
"FOR (m:Memory) ON (m.user_id)"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------ #
|
|
53
|
+
# Node operations
|
|
54
|
+
# ------------------------------------------------------------------ #
|
|
55
|
+
|
|
56
|
+
def upsert_node(self, memory_id: int, user_id: str, strength: float,
|
|
57
|
+
importance: float, category: str) -> None:
|
|
58
|
+
with self._driver.session() as s:
|
|
59
|
+
s.run(
|
|
60
|
+
"""
|
|
61
|
+
MERGE (m:Memory {memory_id: $mid})
|
|
62
|
+
SET m.user_id = $uid,
|
|
63
|
+
m.strength = $strength,
|
|
64
|
+
m.importance = $importance,
|
|
65
|
+
m.category = $category,
|
|
66
|
+
m.recall_proxy = coalesce(m.recall_proxy, 0.0)
|
|
67
|
+
""",
|
|
68
|
+
mid=memory_id, uid=user_id,
|
|
69
|
+
strength=strength, importance=importance, category=category,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def update_node_strength(self, memory_id: int, strength: float) -> None:
|
|
73
|
+
with self._driver.session() as s:
|
|
74
|
+
s.run(
|
|
75
|
+
"MATCH (m:Memory {memory_id: $mid}) SET m.strength = $strength",
|
|
76
|
+
mid=memory_id, strength=strength,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def get_node_strength(self, memory_id: int) -> Optional[float]:
|
|
80
|
+
with self._driver.session() as s:
|
|
81
|
+
result = s.run(
|
|
82
|
+
"MATCH (m:Memory {memory_id: $mid}) RETURN m.strength AS strength",
|
|
83
|
+
mid=memory_id,
|
|
84
|
+
).single()
|
|
85
|
+
return result["strength"] if result else None
|
|
86
|
+
|
|
87
|
+
def get_all_nodes_for_user(self, user_id: str) -> list:
|
|
88
|
+
with self._driver.session() as s:
|
|
89
|
+
records = s.run(
|
|
90
|
+
"""
|
|
91
|
+
MATCH (m:Memory {user_id: $uid})
|
|
92
|
+
RETURN m.memory_id AS memory_id,
|
|
93
|
+
m.strength AS strength,
|
|
94
|
+
m.importance AS importance,
|
|
95
|
+
m.category AS category,
|
|
96
|
+
m.recall_proxy AS recall_proxy
|
|
97
|
+
""",
|
|
98
|
+
uid=user_id,
|
|
99
|
+
)
|
|
100
|
+
return [
|
|
101
|
+
{
|
|
102
|
+
"memory_id": r["memory_id"],
|
|
103
|
+
"user_id": user_id,
|
|
104
|
+
"strength": r["strength"],
|
|
105
|
+
"importance": r["importance"],
|
|
106
|
+
"category": r["category"],
|
|
107
|
+
"recall_proxy": r["recall_proxy"],
|
|
108
|
+
}
|
|
109
|
+
for r in records
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
def delete_node(self, memory_id: int) -> None:
|
|
113
|
+
with self._driver.session() as s:
|
|
114
|
+
s.run(
|
|
115
|
+
"MATCH (m:Memory {memory_id: $mid}) DETACH DELETE m",
|
|
116
|
+
mid=memory_id,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# ------------------------------------------------------------------ #
|
|
120
|
+
# Edge operations
|
|
121
|
+
# ------------------------------------------------------------------ #
|
|
122
|
+
|
|
123
|
+
def upsert_edge(self, source_id: int, target_id: int,
|
|
124
|
+
relation: str, weight: float) -> None:
|
|
125
|
+
with self._driver.session() as s:
|
|
126
|
+
s.run(
|
|
127
|
+
"""
|
|
128
|
+
MATCH (a:Memory {memory_id: $src}), (b:Memory {memory_id: $tgt})
|
|
129
|
+
MERGE (a)-[r:RELATES]->(b)
|
|
130
|
+
ON CREATE SET r.relation = $relation, r.weight = $weight
|
|
131
|
+
ON MATCH SET r.weight = CASE
|
|
132
|
+
WHEN r.weight + $weight * 0.1 < 1.0
|
|
133
|
+
THEN r.weight + $weight * 0.1
|
|
134
|
+
ELSE 1.0
|
|
135
|
+
END
|
|
136
|
+
""",
|
|
137
|
+
src=source_id, tgt=target_id,
|
|
138
|
+
relation=relation, weight=weight,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------ #
|
|
142
|
+
# Traversal
|
|
143
|
+
# ------------------------------------------------------------------ #
|
|
144
|
+
|
|
145
|
+
def get_neighbors(self, memory_id: int, user_id: str,
|
|
146
|
+
max_depth: int = 2) -> list:
|
|
147
|
+
"""BFS up to max_depth hops (undirected) — only returns user_id nodes."""
|
|
148
|
+
with self._driver.session() as s:
|
|
149
|
+
records = s.run(
|
|
150
|
+
"""
|
|
151
|
+
MATCH path = (start:Memory {memory_id: $mid})-[:RELATES*1..$depth]-(nbr:Memory)
|
|
152
|
+
WHERE nbr.user_id = $uid AND nbr.memory_id <> $mid
|
|
153
|
+
WITH nbr,
|
|
154
|
+
length(path) AS distance,
|
|
155
|
+
reduce(w = 1.0, r IN relationships(path) | w * r.weight) AS edge_weight
|
|
156
|
+
RETURN nbr.memory_id AS memory_id, distance, edge_weight
|
|
157
|
+
ORDER BY edge_weight DESC
|
|
158
|
+
""",
|
|
159
|
+
mid=memory_id, uid=user_id, depth=max_depth,
|
|
160
|
+
)
|
|
161
|
+
# Deduplicate: keep highest edge_weight per neighbor
|
|
162
|
+
seen: dict[int, dict] = {}
|
|
163
|
+
for r in records:
|
|
164
|
+
nid = r["memory_id"]
|
|
165
|
+
if nid not in seen or r["edge_weight"] > seen[nid]["edge_weight"]:
|
|
166
|
+
seen[nid] = {
|
|
167
|
+
"memory_id": nid,
|
|
168
|
+
"distance": r["distance"],
|
|
169
|
+
"edge_weight": r["edge_weight"],
|
|
170
|
+
}
|
|
171
|
+
return list(seen.values())
|
|
172
|
+
|
|
173
|
+
def boost_node_and_neighbors(self, memory_id: int, user_id: str,
|
|
174
|
+
boost: float = 0.2,
|
|
175
|
+
max_depth: int = 1) -> list:
|
|
176
|
+
neighbors = self.get_neighbors(memory_id, user_id, max_depth=max_depth)
|
|
177
|
+
if not neighbors:
|
|
178
|
+
return []
|
|
179
|
+
boosted = []
|
|
180
|
+
with self._driver.session() as s:
|
|
181
|
+
for nbr in neighbors:
|
|
182
|
+
nid = nbr["memory_id"]
|
|
183
|
+
edge_w = nbr["edge_weight"]
|
|
184
|
+
s.run(
|
|
185
|
+
"""
|
|
186
|
+
MATCH (m:Memory {memory_id: $mid})
|
|
187
|
+
SET m.recall_proxy = coalesce(m.recall_proxy, 0.0) + $delta
|
|
188
|
+
""",
|
|
189
|
+
mid=nid, delta=boost * edge_w,
|
|
190
|
+
)
|
|
191
|
+
boosted.append(nid)
|
|
192
|
+
return boosted
|
|
193
|
+
|
|
194
|
+
def close(self) -> None:
|
|
195
|
+
self._driver.close()
|