tribalmemory 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tribalmemory/interfaces.py +22 -3
- tribalmemory/mcp/server.py +112 -0
- tribalmemory/services/graph_store.py +627 -0
- tribalmemory/services/memory.py +285 -11
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/METADATA +1 -1
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/RECORD +10 -9
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/WHEEL +0 -0
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/entry_points.txt +0 -0
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {tribalmemory-0.2.0.dist-info → tribalmemory-0.3.0.dist-info}/top_level.txt +0 -0
tribalmemory/interfaces.py
CHANGED
|
@@ -7,9 +7,12 @@ from abc import ABC, abstractmethod
|
|
|
7
7
|
from dataclasses import dataclass, field
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from enum import Enum
|
|
10
|
-
from typing import Optional
|
|
10
|
+
from typing import Literal, Optional
|
|
11
11
|
import uuid
|
|
12
12
|
|
|
13
|
+
# Valid retrieval methods for RecallResult
|
|
14
|
+
RetrievalMethod = Literal["vector", "graph", "hybrid", "entity"]
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
class MemorySource(Enum):
|
|
15
18
|
"""Source of a memory entry."""
|
|
@@ -69,13 +72,21 @@ class MemoryEntry:
|
|
|
69
72
|
|
|
70
73
|
@dataclass
|
|
71
74
|
class RecallResult:
|
|
72
|
-
"""Result of a memory recall query.
|
|
75
|
+
"""Result of a memory recall query.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
memory: The recalled memory entry.
|
|
79
|
+
similarity_score: Relevance score (0.0-1.0 for vector, 1.0 for exact entity match).
|
|
80
|
+
retrieval_time_ms: Time taken for retrieval.
|
|
81
|
+
retrieval_method: How this result was found (see RetrievalMethod type).
|
|
82
|
+
"""
|
|
73
83
|
memory: MemoryEntry
|
|
74
84
|
similarity_score: float
|
|
75
85
|
retrieval_time_ms: float
|
|
86
|
+
retrieval_method: RetrievalMethod = "vector"
|
|
76
87
|
|
|
77
88
|
def __repr__(self) -> str:
|
|
78
|
-
return f"RecallResult(score={self.similarity_score:.3f}, memory_id={self.memory.id[:8]}...)"
|
|
89
|
+
return f"RecallResult(score={self.similarity_score:.3f}, method={self.retrieval_method}, memory_id={self.memory.id[:8]}...)"
|
|
79
90
|
|
|
80
91
|
|
|
81
92
|
@dataclass
|
|
@@ -315,6 +326,7 @@ class IMemoryService(ABC):
|
|
|
315
326
|
limit: int = 5,
|
|
316
327
|
min_relevance: float = 0.7,
|
|
317
328
|
tags: Optional[list[str]] = None,
|
|
329
|
+
graph_expansion: bool = True,
|
|
318
330
|
) -> list[RecallResult]:
|
|
319
331
|
"""Recall relevant memories for a query.
|
|
320
332
|
|
|
@@ -323,6 +335,13 @@ class IMemoryService(ABC):
|
|
|
323
335
|
limit: Maximum results
|
|
324
336
|
min_relevance: Minimum similarity score
|
|
325
337
|
tags: Filter by tags (e.g., ["work", "preferences"])
|
|
338
|
+
graph_expansion: Expand candidates via entity graph (default True)
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
List of RecallResult objects with retrieval_method indicating source:
|
|
342
|
+
- "vector": Vector similarity search
|
|
343
|
+
- "hybrid": Vector + BM25 merge
|
|
344
|
+
- "graph": Entity graph traversal
|
|
326
345
|
"""
|
|
327
346
|
pass
|
|
328
347
|
|
tribalmemory/mcp/server.py
CHANGED
|
@@ -412,6 +412,118 @@ def create_server() -> FastMCP:
|
|
|
412
412
|
|
|
413
413
|
return json.dumps(stats)
|
|
414
414
|
|
|
415
|
+
@mcp.tool()
|
|
416
|
+
async def tribal_recall_entity(
|
|
417
|
+
entity_name: str,
|
|
418
|
+
hops: int = 1,
|
|
419
|
+
limit: int = 10,
|
|
420
|
+
) -> str:
|
|
421
|
+
"""Recall memories associated with an entity and its connections.
|
|
422
|
+
|
|
423
|
+
Enables entity-centric queries like:
|
|
424
|
+
- "Tell me everything about auth-service"
|
|
425
|
+
- "What do we know about PostgreSQL?"
|
|
426
|
+
- "What services connect to the user database?"
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
entity_name: Name of the entity to query (required).
|
|
430
|
+
Examples: "auth-service", "PostgreSQL", "user-db"
|
|
431
|
+
hops: Number of relationship hops to traverse (default 1).
|
|
432
|
+
1 = direct connections only
|
|
433
|
+
2 = connections of connections
|
|
434
|
+
limit: Maximum number of results (1-50, default 10)
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
JSON with: results (list of memories), entity, hops, count
|
|
438
|
+
"""
|
|
439
|
+
if not entity_name or not entity_name.strip():
|
|
440
|
+
return json.dumps({
|
|
441
|
+
"results": [],
|
|
442
|
+
"entity": entity_name,
|
|
443
|
+
"hops": hops,
|
|
444
|
+
"count": 0,
|
|
445
|
+
"error": "Entity name cannot be empty",
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
hops = max(1, min(10, hops)) # Clamp to reasonable range
|
|
449
|
+
limit = max(1, min(50, limit))
|
|
450
|
+
|
|
451
|
+
service = await get_memory_service()
|
|
452
|
+
|
|
453
|
+
if not service.graph_enabled:
|
|
454
|
+
return json.dumps({
|
|
455
|
+
"results": [],
|
|
456
|
+
"entity": entity_name,
|
|
457
|
+
"hops": hops,
|
|
458
|
+
"count": 0,
|
|
459
|
+
"error": "Graph search not enabled. Requires db_path for persistent storage.",
|
|
460
|
+
})
|
|
461
|
+
|
|
462
|
+
results = await service.recall_entity(
|
|
463
|
+
entity_name=entity_name,
|
|
464
|
+
hops=hops,
|
|
465
|
+
limit=limit,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
return json.dumps({
|
|
469
|
+
"results": [
|
|
470
|
+
{
|
|
471
|
+
"memory_id": r.memory.id,
|
|
472
|
+
"content": r.memory.content,
|
|
473
|
+
"source_type": r.memory.source_type.value,
|
|
474
|
+
"source_instance": r.memory.source_instance,
|
|
475
|
+
"tags": r.memory.tags,
|
|
476
|
+
"created_at": r.memory.created_at.isoformat(),
|
|
477
|
+
}
|
|
478
|
+
for r in results
|
|
479
|
+
],
|
|
480
|
+
"entity": entity_name,
|
|
481
|
+
"hops": hops,
|
|
482
|
+
"count": len(results),
|
|
483
|
+
})
|
|
484
|
+
|
|
485
|
+
@mcp.tool()
|
|
486
|
+
async def tribal_entity_graph(
|
|
487
|
+
entity_name: str,
|
|
488
|
+
hops: int = 2,
|
|
489
|
+
) -> str:
|
|
490
|
+
"""Get the relationship graph around an entity.
|
|
491
|
+
|
|
492
|
+
Useful for understanding how concepts/services/technologies
|
|
493
|
+
are connected in your project knowledge base.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
entity_name: Name of the entity to explore (required)
|
|
497
|
+
hops: How many relationship hops to include (default 2)
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
JSON with: entities (list with name/type), relationships (list with source/target/type)
|
|
501
|
+
"""
|
|
502
|
+
if not entity_name or not entity_name.strip():
|
|
503
|
+
return json.dumps({
|
|
504
|
+
"entities": [],
|
|
505
|
+
"relationships": [],
|
|
506
|
+
"error": "Entity name cannot be empty",
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
hops = max(1, min(5, hops)) # Clamp to reasonable range
|
|
510
|
+
|
|
511
|
+
service = await get_memory_service()
|
|
512
|
+
|
|
513
|
+
if not service.graph_enabled:
|
|
514
|
+
return json.dumps({
|
|
515
|
+
"entities": [],
|
|
516
|
+
"relationships": [],
|
|
517
|
+
"error": "Graph search not enabled. Requires db_path for persistent storage.",
|
|
518
|
+
})
|
|
519
|
+
|
|
520
|
+
graph = service.get_entity_graph(
|
|
521
|
+
entity_name=entity_name,
|
|
522
|
+
hops=hops,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
return json.dumps(graph)
|
|
526
|
+
|
|
415
527
|
@mcp.tool()
|
|
416
528
|
async def tribal_export(
|
|
417
529
|
tags: Optional[list[str]] = None,
|
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
"""Graph-enriched memory storage for entity and relationship tracking.
|
|
2
|
+
|
|
3
|
+
Provides a lightweight graph layer alongside vector search to enable:
|
|
4
|
+
- Entity-centric queries ("tell me everything about auth-service")
|
|
5
|
+
- Relationship traversal ("what does auth-service connect to?")
|
|
6
|
+
- Multi-hop reasoning ("what framework does the service that handles auth use?")
|
|
7
|
+
|
|
8
|
+
Uses SQLite for local-first, zero-cloud constraint.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
import sqlite3
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
# Constants
|
|
17
|
+
MIN_ENTITY_NAME_LENGTH = 3
|
|
18
|
+
MAX_HOP_ITERATIONS = 100 # Safety limit for graph traversal
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Entity:
|
|
23
|
+
"""An extracted entity from memory text."""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
entity_type: str # service, technology, data, concept, person, etc.
|
|
27
|
+
metadata: dict = field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Relationship:
|
|
32
|
+
"""A relationship between two entities."""
|
|
33
|
+
|
|
34
|
+
source: str # Entity name
|
|
35
|
+
target: str # Entity name
|
|
36
|
+
relation_type: str # uses, stores, connects_to, depends_on, etc.
|
|
37
|
+
metadata: dict = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class EntityExtractor:
|
|
41
|
+
"""Extract entities and relationships from text.
|
|
42
|
+
|
|
43
|
+
Uses pattern-based extraction for common software architecture terms.
|
|
44
|
+
Can be upgraded to spaCy NER or LLM extraction later.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
SERVICE_PATTERN: Regex for service-like names (kebab-case with suffix or 8+ chars).
|
|
48
|
+
TECHNOLOGIES: Set of known technology names for exact matching.
|
|
49
|
+
RELATIONSHIP_PATTERNS: List of (pattern, relation_type) tuples for extraction.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Patterns for common entity types
|
|
53
|
+
# Matches: kebab-case identifiers that look like service/component names
|
|
54
|
+
# - Must have at least one hyphen (kebab-case)
|
|
55
|
+
# - Either ends with known suffix OR has 3+ segments OR is 8+ chars
|
|
56
|
+
# - Excludes common false positives via MIN_ENTITY_NAME_LENGTH
|
|
57
|
+
SERVICE_PATTERN = re.compile(
|
|
58
|
+
r'\b('
|
|
59
|
+
r'[a-z][a-z0-9]*-(?:[a-z0-9]+-)*(?:service|api|worker|db|cache|server|client|gateway|proxy|database)' # Known suffix
|
|
60
|
+
r'|'
|
|
61
|
+
r'[a-z][a-z0-9]*(?:-[a-z0-9]+){2,}' # 3+ segments
|
|
62
|
+
r'|'
|
|
63
|
+
r'[a-z][a-z0-9]*-[a-z0-9]{4,}' # 2 segments, second is 4+ chars
|
|
64
|
+
r')\b',
|
|
65
|
+
re.IGNORECASE
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Known technology names (case-insensitive matching)
|
|
69
|
+
TECHNOLOGIES = {
|
|
70
|
+
'postgresql', 'postgres', 'mysql', 'mongodb', 'redis', 'memcached',
|
|
71
|
+
'elasticsearch', 'kafka', 'rabbitmq', 'nginx', 'docker', 'kubernetes',
|
|
72
|
+
'aws', 'gcp', 'azure', 'terraform', 'ansible', 'jenkins', 'github',
|
|
73
|
+
'python', 'javascript', 'typescript', 'rust', 'go', 'java', 'node',
|
|
74
|
+
'react', 'vue', 'angular', 'django', 'flask', 'fastapi', 'express',
|
|
75
|
+
'graphql', 'rest', 'grpc', 'websocket', 'http', 'https',
|
|
76
|
+
'sqlite', 'lancedb', 'chromadb', 'pinecone', 'weaviate',
|
|
77
|
+
'openai', 'anthropic', 'ollama', 'huggingface',
|
|
78
|
+
'pgbouncer', 'haproxy', 'traefik', 'envoy',
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Relationship patterns: (pattern, relation_type)
|
|
82
|
+
RELATIONSHIP_PATTERNS = [
|
|
83
|
+
(re.compile(r'(\S+)\s+uses\s+(\S+)', re.IGNORECASE), 'uses'),
|
|
84
|
+
(re.compile(r'(\S+)\s+connects?\s+to\s+(\S+)', re.IGNORECASE), 'connects_to'),
|
|
85
|
+
(re.compile(r'(\S+)\s+stores?\s+(?:data\s+)?in\s+(\S+)', re.IGNORECASE), 'stores_in'),
|
|
86
|
+
(re.compile(r'(\S+)\s+depends?\s+on\s+(\S+)', re.IGNORECASE), 'depends_on'),
|
|
87
|
+
(re.compile(r'(\S+)\s+talks?\s+to\s+(\S+)', re.IGNORECASE), 'connects_to'),
|
|
88
|
+
(re.compile(r'(\S+)\s+calls?\s+(\S+)', re.IGNORECASE), 'calls'),
|
|
89
|
+
(re.compile(r'(\S+)\s+handles?\s+(\S+)', re.IGNORECASE), 'handles'),
|
|
90
|
+
(re.compile(r'(\S+)\s+for\s+(?:the\s+)?(\S+)', re.IGNORECASE), 'serves'),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
def extract(self, text: str) -> list[Entity]:
|
|
94
|
+
"""Extract entities from text.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
text: Input text to extract entities from.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of extracted Entity objects.
|
|
101
|
+
"""
|
|
102
|
+
if not text or not text.strip():
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
entities = []
|
|
106
|
+
seen_names: set[str] = set()
|
|
107
|
+
|
|
108
|
+
# Extract service-like names (kebab-case identifiers)
|
|
109
|
+
for match in self.SERVICE_PATTERN.finditer(text):
|
|
110
|
+
name = match.group(1)
|
|
111
|
+
if name and name.lower() not in seen_names and len(name) >= MIN_ENTITY_NAME_LENGTH:
|
|
112
|
+
seen_names.add(name.lower())
|
|
113
|
+
entities.append(Entity(
|
|
114
|
+
name=name,
|
|
115
|
+
entity_type=self._infer_service_type(name)
|
|
116
|
+
))
|
|
117
|
+
|
|
118
|
+
# Extract known technology names
|
|
119
|
+
words = re.findall(r'\b\w+\b', text)
|
|
120
|
+
for word in words:
|
|
121
|
+
word_lower = word.lower()
|
|
122
|
+
if word_lower in self.TECHNOLOGIES and word_lower not in seen_names:
|
|
123
|
+
seen_names.add(word_lower)
|
|
124
|
+
entities.append(Entity(
|
|
125
|
+
name=word, # Preserve original case
|
|
126
|
+
entity_type='technology'
|
|
127
|
+
))
|
|
128
|
+
|
|
129
|
+
return entities
|
|
130
|
+
|
|
131
|
+
def extract_with_relationships(
|
|
132
|
+
self, text: str
|
|
133
|
+
) -> tuple[list[Entity], list[Relationship]]:
|
|
134
|
+
"""Extract both entities and relationships from text."""
|
|
135
|
+
entities = self.extract(text)
|
|
136
|
+
entity_names = {e.name.lower() for e in entities}
|
|
137
|
+
relationships = []
|
|
138
|
+
|
|
139
|
+
for pattern, rel_type in self.RELATIONSHIP_PATTERNS:
|
|
140
|
+
for match in pattern.finditer(text):
|
|
141
|
+
source = match.group(1).strip('.,;:')
|
|
142
|
+
target = match.group(2).strip('.,;:')
|
|
143
|
+
|
|
144
|
+
# Only create relationship if both entities were extracted
|
|
145
|
+
# or if they look like valid entity names
|
|
146
|
+
source_valid = (
|
|
147
|
+
source.lower() in entity_names or
|
|
148
|
+
self._looks_like_entity(source)
|
|
149
|
+
)
|
|
150
|
+
target_valid = (
|
|
151
|
+
target.lower() in entity_names or
|
|
152
|
+
self._looks_like_entity(target)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if source_valid and target_valid:
|
|
156
|
+
relationships.append(Relationship(
|
|
157
|
+
source=source,
|
|
158
|
+
target=target,
|
|
159
|
+
relation_type=rel_type
|
|
160
|
+
))
|
|
161
|
+
|
|
162
|
+
# Add entities if not already present
|
|
163
|
+
if source.lower() not in entity_names:
|
|
164
|
+
entity_names.add(source.lower())
|
|
165
|
+
entities.append(Entity(
|
|
166
|
+
name=source,
|
|
167
|
+
entity_type=self._infer_type(source)
|
|
168
|
+
))
|
|
169
|
+
if target.lower() not in entity_names:
|
|
170
|
+
entity_names.add(target.lower())
|
|
171
|
+
entities.append(Entity(
|
|
172
|
+
name=target,
|
|
173
|
+
entity_type=self._infer_type(target)
|
|
174
|
+
))
|
|
175
|
+
|
|
176
|
+
return entities, relationships
|
|
177
|
+
|
|
178
|
+
def _infer_service_type(self, name: str) -> str:
|
|
179
|
+
"""Infer entity type from service-like name.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
name: Service name to analyze.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Entity type string (e.g., 'service', 'database', 'worker').
|
|
186
|
+
"""
|
|
187
|
+
name_lower = name.lower()
|
|
188
|
+
if '-db' in name_lower or '-database' in name_lower:
|
|
189
|
+
return 'database'
|
|
190
|
+
if '-api' in name_lower or '-service' in name_lower:
|
|
191
|
+
return 'service'
|
|
192
|
+
if '-worker' in name_lower or '-job' in name_lower:
|
|
193
|
+
return 'worker'
|
|
194
|
+
if '-cache' in name_lower:
|
|
195
|
+
return 'cache'
|
|
196
|
+
if '-gateway' in name_lower or '-proxy' in name_lower:
|
|
197
|
+
return 'gateway'
|
|
198
|
+
if '-server' in name_lower:
|
|
199
|
+
return 'server'
|
|
200
|
+
if '-client' in name_lower:
|
|
201
|
+
return 'client'
|
|
202
|
+
return 'service'
|
|
203
|
+
|
|
204
|
+
def _infer_type(self, name: str) -> str:
|
|
205
|
+
"""Infer entity type from name.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
name: Entity name to analyze.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Entity type string.
|
|
212
|
+
"""
|
|
213
|
+
if name.lower() in self.TECHNOLOGIES:
|
|
214
|
+
return 'technology'
|
|
215
|
+
if self.SERVICE_PATTERN.match(name):
|
|
216
|
+
return self._infer_service_type(name)
|
|
217
|
+
return 'concept'
|
|
218
|
+
|
|
219
|
+
def _looks_like_entity(self, name: str) -> bool:
|
|
220
|
+
"""Check if a string looks like a valid entity name.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
name: String to check.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
True if the string looks like an entity name.
|
|
227
|
+
"""
|
|
228
|
+
if not name or len(name) < MIN_ENTITY_NAME_LENGTH:
|
|
229
|
+
return False
|
|
230
|
+
if name.lower() in self.TECHNOLOGIES:
|
|
231
|
+
return True
|
|
232
|
+
if self.SERVICE_PATTERN.match(name):
|
|
233
|
+
return True
|
|
234
|
+
# Capitalized words (proper nouns)
|
|
235
|
+
if name[0].isupper() and name.isalnum():
|
|
236
|
+
return True
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class GraphStore:
|
|
241
|
+
"""SQLite-backed graph storage for entities and relationships.
|
|
242
|
+
|
|
243
|
+
Schema:
|
|
244
|
+
- entities: (id, name, entity_type, metadata_json)
|
|
245
|
+
- entity_memories: (entity_id, memory_id) - many-to-many
|
|
246
|
+
- relationships: (id, source_entity_id, target_entity_id, relation_type, metadata_json)
|
|
247
|
+
- relationship_memories: (relationship_id, memory_id) - many-to-many
|
|
248
|
+
|
|
249
|
+
Note on connection management:
|
|
250
|
+
Each operation creates a fresh connection. For high-throughput scenarios,
|
|
251
|
+
consider using connection pooling. SQLite's file locking handles concurrency.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
# Known technology names for type inference
|
|
255
|
+
KNOWN_TECHNOLOGIES = EntityExtractor.TECHNOLOGIES
|
|
256
|
+
|
|
257
|
+
def __init__(self, db_path: str | Path):
|
|
258
|
+
"""Initialize graph store with SQLite database.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
db_path: Path to the SQLite database file.
|
|
262
|
+
"""
|
|
263
|
+
self.db_path = Path(db_path)
|
|
264
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
self._init_schema()
|
|
266
|
+
|
|
267
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
268
|
+
"""Get a database connection.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
SQLite connection with Row factory.
|
|
272
|
+
"""
|
|
273
|
+
conn = sqlite3.connect(self.db_path)
|
|
274
|
+
conn.row_factory = sqlite3.Row
|
|
275
|
+
return conn
|
|
276
|
+
|
|
277
|
+
def _infer_entity_type(self, name: str) -> str:
|
|
278
|
+
"""Infer entity type from name when creating from relationships.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
name: Entity name to analyze.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Inferred entity type string.
|
|
285
|
+
"""
|
|
286
|
+
if name.lower() in self.KNOWN_TECHNOLOGIES:
|
|
287
|
+
return 'technology'
|
|
288
|
+
# Check for service-like patterns
|
|
289
|
+
name_lower = name.lower()
|
|
290
|
+
if '-db' in name_lower or '-database' in name_lower:
|
|
291
|
+
return 'database'
|
|
292
|
+
if '-api' in name_lower or '-service' in name_lower:
|
|
293
|
+
return 'service'
|
|
294
|
+
if '-worker' in name_lower or '-job' in name_lower:
|
|
295
|
+
return 'worker'
|
|
296
|
+
if '-cache' in name_lower:
|
|
297
|
+
return 'cache'
|
|
298
|
+
if '-gateway' in name_lower or '-proxy' in name_lower:
|
|
299
|
+
return 'gateway'
|
|
300
|
+
if '-' in name: # Generic kebab-case, probably a service
|
|
301
|
+
return 'service'
|
|
302
|
+
return 'concept'
|
|
303
|
+
|
|
304
|
+
def _init_schema(self) -> None:
|
|
305
|
+
"""Initialize database schema."""
|
|
306
|
+
with self._get_connection() as conn:
|
|
307
|
+
conn.executescript("""
|
|
308
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
309
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
310
|
+
name TEXT NOT NULL,
|
|
311
|
+
entity_type TEXT NOT NULL,
|
|
312
|
+
metadata_json TEXT DEFAULT '{}',
|
|
313
|
+
UNIQUE(name)
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
CREATE TABLE IF NOT EXISTS entity_memories (
|
|
317
|
+
entity_id INTEGER NOT NULL,
|
|
318
|
+
memory_id TEXT NOT NULL,
|
|
319
|
+
FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
|
|
320
|
+
UNIQUE(entity_id, memory_id)
|
|
321
|
+
);
|
|
322
|
+
|
|
323
|
+
CREATE TABLE IF NOT EXISTS relationships (
|
|
324
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
325
|
+
source_entity_id INTEGER NOT NULL,
|
|
326
|
+
target_entity_id INTEGER NOT NULL,
|
|
327
|
+
relation_type TEXT NOT NULL,
|
|
328
|
+
metadata_json TEXT DEFAULT '{}',
|
|
329
|
+
FOREIGN KEY (source_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
|
|
330
|
+
FOREIGN KEY (target_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
|
|
331
|
+
UNIQUE(source_entity_id, target_entity_id, relation_type)
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
CREATE TABLE IF NOT EXISTS relationship_memories (
|
|
335
|
+
relationship_id INTEGER NOT NULL,
|
|
336
|
+
memory_id TEXT NOT NULL,
|
|
337
|
+
FOREIGN KEY (relationship_id) REFERENCES relationships(id) ON DELETE CASCADE,
|
|
338
|
+
UNIQUE(relationship_id, memory_id)
|
|
339
|
+
);
|
|
340
|
+
|
|
341
|
+
CREATE INDEX IF NOT EXISTS idx_entity_name ON entities(name);
|
|
342
|
+
CREATE INDEX IF NOT EXISTS idx_entity_memories_memory ON entity_memories(memory_id);
|
|
343
|
+
CREATE INDEX IF NOT EXISTS idx_rel_source ON relationships(source_entity_id);
|
|
344
|
+
CREATE INDEX IF NOT EXISTS idx_rel_target ON relationships(target_entity_id);
|
|
345
|
+
""")
|
|
346
|
+
|
|
347
|
+
def add_entity(self, entity: Entity, memory_id: str) -> int:
|
|
348
|
+
"""Add an entity and associate it with a memory.
|
|
349
|
+
|
|
350
|
+
Returns the entity ID.
|
|
351
|
+
"""
|
|
352
|
+
with self._get_connection() as conn:
|
|
353
|
+
# Upsert entity
|
|
354
|
+
cursor = conn.execute(
|
|
355
|
+
"""
|
|
356
|
+
INSERT INTO entities (name, entity_type, metadata_json)
|
|
357
|
+
VALUES (?, ?, ?)
|
|
358
|
+
ON CONFLICT(name) DO UPDATE SET
|
|
359
|
+
entity_type = COALESCE(excluded.entity_type, entities.entity_type)
|
|
360
|
+
RETURNING id
|
|
361
|
+
""",
|
|
362
|
+
(entity.name, entity.entity_type, '{}')
|
|
363
|
+
)
|
|
364
|
+
entity_id = cursor.fetchone()[0]
|
|
365
|
+
|
|
366
|
+
# Associate with memory
|
|
367
|
+
conn.execute(
|
|
368
|
+
"""
|
|
369
|
+
INSERT OR IGNORE INTO entity_memories (entity_id, memory_id)
|
|
370
|
+
VALUES (?, ?)
|
|
371
|
+
""",
|
|
372
|
+
(entity_id, memory_id)
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
return entity_id
|
|
376
|
+
|
|
377
|
+
def add_relationship(self, relationship: Relationship, memory_id: str) -> int:
|
|
378
|
+
"""Add a relationship and associate it with a memory.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
relationship: The relationship to store.
|
|
382
|
+
memory_id: ID of the memory this relationship was extracted from.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
The relationship ID.
|
|
386
|
+
"""
|
|
387
|
+
with self._get_connection() as conn:
|
|
388
|
+
# Get or create source entity (infer type from name)
|
|
389
|
+
source_row = conn.execute(
|
|
390
|
+
"SELECT id FROM entities WHERE name = ?",
|
|
391
|
+
(relationship.source,)
|
|
392
|
+
).fetchone()
|
|
393
|
+
if not source_row:
|
|
394
|
+
source_type = self._infer_entity_type(relationship.source)
|
|
395
|
+
cursor = conn.execute(
|
|
396
|
+
"INSERT INTO entities (name, entity_type) VALUES (?, ?) RETURNING id",
|
|
397
|
+
(relationship.source, source_type)
|
|
398
|
+
)
|
|
399
|
+
source_id = cursor.fetchone()[0]
|
|
400
|
+
else:
|
|
401
|
+
source_id = source_row[0]
|
|
402
|
+
|
|
403
|
+
# Get or create target entity (infer type from name)
|
|
404
|
+
target_row = conn.execute(
|
|
405
|
+
"SELECT id FROM entities WHERE name = ?",
|
|
406
|
+
(relationship.target,)
|
|
407
|
+
).fetchone()
|
|
408
|
+
if not target_row:
|
|
409
|
+
target_type = self._infer_entity_type(relationship.target)
|
|
410
|
+
cursor = conn.execute(
|
|
411
|
+
"INSERT INTO entities (name, entity_type) VALUES (?, ?) RETURNING id",
|
|
412
|
+
(relationship.target, target_type)
|
|
413
|
+
)
|
|
414
|
+
target_id = cursor.fetchone()[0]
|
|
415
|
+
else:
|
|
416
|
+
target_id = target_row[0]
|
|
417
|
+
|
|
418
|
+
# Upsert relationship
|
|
419
|
+
cursor = conn.execute(
|
|
420
|
+
"""
|
|
421
|
+
INSERT INTO relationships (source_entity_id, target_entity_id, relation_type)
|
|
422
|
+
VALUES (?, ?, ?)
|
|
423
|
+
ON CONFLICT(source_entity_id, target_entity_id, relation_type) DO NOTHING
|
|
424
|
+
RETURNING id
|
|
425
|
+
""",
|
|
426
|
+
(source_id, target_id, relationship.relation_type)
|
|
427
|
+
)
|
|
428
|
+
row = cursor.fetchone()
|
|
429
|
+
if row:
|
|
430
|
+
rel_id = row[0]
|
|
431
|
+
else:
|
|
432
|
+
# Relationship already exists, get its ID
|
|
433
|
+
rel_id = conn.execute(
|
|
434
|
+
"""
|
|
435
|
+
SELECT id FROM relationships
|
|
436
|
+
WHERE source_entity_id = ? AND target_entity_id = ? AND relation_type = ?
|
|
437
|
+
""",
|
|
438
|
+
(source_id, target_id, relationship.relation_type)
|
|
439
|
+
).fetchone()[0]
|
|
440
|
+
|
|
441
|
+
# Associate with memory
|
|
442
|
+
conn.execute(
|
|
443
|
+
"""
|
|
444
|
+
INSERT OR IGNORE INTO relationship_memories (relationship_id, memory_id)
|
|
445
|
+
VALUES (?, ?)
|
|
446
|
+
""",
|
|
447
|
+
(rel_id, memory_id)
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
return rel_id
|
|
451
|
+
|
|
452
|
+
def get_entities_for_memory(self, memory_id: str) -> list[Entity]:
|
|
453
|
+
"""Get all entities associated with a memory."""
|
|
454
|
+
with self._get_connection() as conn:
|
|
455
|
+
rows = conn.execute(
|
|
456
|
+
"""
|
|
457
|
+
SELECT e.name, e.entity_type, e.metadata_json
|
|
458
|
+
FROM entities e
|
|
459
|
+
JOIN entity_memories em ON e.id = em.entity_id
|
|
460
|
+
WHERE em.memory_id = ?
|
|
461
|
+
""",
|
|
462
|
+
(memory_id,)
|
|
463
|
+
).fetchall()
|
|
464
|
+
|
|
465
|
+
return [
|
|
466
|
+
Entity(name=row['name'], entity_type=row['entity_type'])
|
|
467
|
+
for row in rows
|
|
468
|
+
]
|
|
469
|
+
|
|
470
|
+
def get_relationships_for_entity(self, entity_name: str) -> list[Relationship]:
|
|
471
|
+
"""Get all relationships where entity is the source."""
|
|
472
|
+
with self._get_connection() as conn:
|
|
473
|
+
rows = conn.execute(
|
|
474
|
+
"""
|
|
475
|
+
SELECT e_source.name as source, e_target.name as target, r.relation_type
|
|
476
|
+
FROM relationships r
|
|
477
|
+
JOIN entities e_source ON r.source_entity_id = e_source.id
|
|
478
|
+
JOIN entities e_target ON r.target_entity_id = e_target.id
|
|
479
|
+
WHERE e_source.name = ?
|
|
480
|
+
""",
|
|
481
|
+
(entity_name,)
|
|
482
|
+
).fetchall()
|
|
483
|
+
|
|
484
|
+
return [
|
|
485
|
+
Relationship(
|
|
486
|
+
source=row['source'],
|
|
487
|
+
target=row['target'],
|
|
488
|
+
relation_type=row['relation_type']
|
|
489
|
+
)
|
|
490
|
+
for row in rows
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
def get_memories_for_entity(self, entity_name: str) -> list[str]:
|
|
494
|
+
"""Get all memory IDs associated with an entity."""
|
|
495
|
+
with self._get_connection() as conn:
|
|
496
|
+
rows = conn.execute(
|
|
497
|
+
"""
|
|
498
|
+
SELECT DISTINCT em.memory_id
|
|
499
|
+
FROM entity_memories em
|
|
500
|
+
JOIN entities e ON em.entity_id = e.id
|
|
501
|
+
WHERE e.name = ?
|
|
502
|
+
""",
|
|
503
|
+
(entity_name,)
|
|
504
|
+
).fetchall()
|
|
505
|
+
|
|
506
|
+
return [row['memory_id'] for row in rows]
|
|
507
|
+
|
|
508
|
+
def find_connected(
|
|
509
|
+
self,
|
|
510
|
+
entity_name: str,
|
|
511
|
+
hops: int = 1,
|
|
512
|
+
include_source: bool = False
|
|
513
|
+
) -> list[Entity]:
|
|
514
|
+
"""Find entities connected to the given entity within N hops.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
entity_name: Starting entity name.
|
|
518
|
+
hops: Maximum number of relationship hops (1 = direct connections).
|
|
519
|
+
Capped at MAX_HOP_ITERATIONS for safety.
|
|
520
|
+
include_source: Whether to include the source entity in results.
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
List of connected entities.
|
|
524
|
+
"""
|
|
525
|
+
# Safety: cap hops to prevent runaway traversal
|
|
526
|
+
safe_hops = min(hops, MAX_HOP_ITERATIONS)
|
|
527
|
+
|
|
528
|
+
with self._get_connection() as conn:
|
|
529
|
+
# Start with source entity
|
|
530
|
+
source = conn.execute(
|
|
531
|
+
"SELECT id, name, entity_type FROM entities WHERE name = ?",
|
|
532
|
+
(entity_name,)
|
|
533
|
+
).fetchone()
|
|
534
|
+
|
|
535
|
+
if not source:
|
|
536
|
+
return []
|
|
537
|
+
|
|
538
|
+
visited: set[int] = {source['id']}
|
|
539
|
+
current_frontier: set[int] = {source['id']}
|
|
540
|
+
result_ids: set[int] = set()
|
|
541
|
+
|
|
542
|
+
for _ in range(safe_hops):
|
|
543
|
+
if not current_frontier:
|
|
544
|
+
break
|
|
545
|
+
|
|
546
|
+
# Find all entities connected to current frontier
|
|
547
|
+
# SECURITY NOTE: placeholders is safe because it's computed from
|
|
548
|
+
# len(current_frontier) (an integer), not user input. The actual
|
|
549
|
+
# values are passed as parameters, not interpolated.
|
|
550
|
+
placeholders = ','.join('?' * len(current_frontier))
|
|
551
|
+
rows = conn.execute(
|
|
552
|
+
f"""
|
|
553
|
+
SELECT DISTINCT e.id, e.name, e.entity_type
|
|
554
|
+
FROM entities e
|
|
555
|
+
JOIN relationships r ON (
|
|
556
|
+
(r.source_entity_id IN ({placeholders}) AND r.target_entity_id = e.id)
|
|
557
|
+
OR
|
|
558
|
+
(r.target_entity_id IN ({placeholders}) AND r.source_entity_id = e.id)
|
|
559
|
+
)
|
|
560
|
+
""",
|
|
561
|
+
list(current_frontier) + list(current_frontier)
|
|
562
|
+
).fetchall()
|
|
563
|
+
|
|
564
|
+
next_frontier: set[int] = set()
|
|
565
|
+
for row in rows:
|
|
566
|
+
if row['id'] not in visited:
|
|
567
|
+
visited.add(row['id'])
|
|
568
|
+
next_frontier.add(row['id'])
|
|
569
|
+
result_ids.add(row['id'])
|
|
570
|
+
|
|
571
|
+
current_frontier = next_frontier
|
|
572
|
+
|
|
573
|
+
# Fetch full entity info for results
|
|
574
|
+
if not result_ids:
|
|
575
|
+
return []
|
|
576
|
+
|
|
577
|
+
placeholders = ','.join('?' * len(result_ids))
|
|
578
|
+
rows = conn.execute(
|
|
579
|
+
f"SELECT name, entity_type FROM entities WHERE id IN ({placeholders})",
|
|
580
|
+
list(result_ids)
|
|
581
|
+
).fetchall()
|
|
582
|
+
|
|
583
|
+
result = [
|
|
584
|
+
Entity(name=row['name'], entity_type=row['entity_type'])
|
|
585
|
+
for row in rows
|
|
586
|
+
]
|
|
587
|
+
|
|
588
|
+
if include_source:
|
|
589
|
+
result.insert(0, Entity(
|
|
590
|
+
name=source['name'],
|
|
591
|
+
entity_type=source['entity_type']
|
|
592
|
+
))
|
|
593
|
+
|
|
594
|
+
return result
|
|
595
|
+
|
|
596
|
+
def delete_memory(self, memory_id: str) -> None:
|
|
597
|
+
"""Delete all entity and relationship associations for a memory.
|
|
598
|
+
|
|
599
|
+
Note: Entities themselves are preserved (they may be referenced by other memories).
|
|
600
|
+
Only the associations are removed.
|
|
601
|
+
"""
|
|
602
|
+
with self._get_connection() as conn:
|
|
603
|
+
# Delete relationship associations
|
|
604
|
+
conn.execute(
|
|
605
|
+
"DELETE FROM relationship_memories WHERE memory_id = ?",
|
|
606
|
+
(memory_id,)
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Delete entity associations
|
|
610
|
+
conn.execute(
|
|
611
|
+
"DELETE FROM entity_memories WHERE memory_id = ?",
|
|
612
|
+
(memory_id,)
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Clean up orphaned relationships (no memory references)
|
|
616
|
+
conn.execute("""
|
|
617
|
+
DELETE FROM relationships
|
|
618
|
+
WHERE id NOT IN (SELECT relationship_id FROM relationship_memories)
|
|
619
|
+
""")
|
|
620
|
+
|
|
621
|
+
# Optionally clean up orphaned entities (no memory or relationship references)
|
|
622
|
+
conn.execute("""
|
|
623
|
+
DELETE FROM entities
|
|
624
|
+
WHERE id NOT IN (SELECT entity_id FROM entity_memories)
|
|
625
|
+
AND id NOT IN (SELECT source_entity_id FROM relationships)
|
|
626
|
+
AND id NOT IN (SELECT target_entity_id FROM relationships)
|
|
627
|
+
""")
|
tribalmemory/services/memory.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Tribal Memory Service - Main API for agents."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import logging
|
|
4
5
|
import os
|
|
5
6
|
from datetime import datetime
|
|
@@ -18,6 +19,7 @@ from ..interfaces import (
|
|
|
18
19
|
)
|
|
19
20
|
from .deduplication import SemanticDeduplicationService
|
|
20
21
|
from .fts_store import FTSStore, hybrid_merge
|
|
22
|
+
from .graph_store import GraphStore, EntityExtractor
|
|
21
23
|
from .reranker import IReranker, NoopReranker, create_reranker
|
|
22
24
|
|
|
23
25
|
logger = logging.getLogger(__name__)
|
|
@@ -37,6 +39,11 @@ class TribalMemoryService(IMemoryService):
|
|
|
37
39
|
results = await service.recall("What language for Wally?")
|
|
38
40
|
"""
|
|
39
41
|
|
|
42
|
+
# Graph expansion scoring constants
|
|
43
|
+
GRAPH_1HOP_SCORE = 0.85 # Score for direct entity mentions
|
|
44
|
+
GRAPH_2HOP_SCORE = 0.70 # Score for connected entity mentions
|
|
45
|
+
GRAPH_EXPANSION_BUFFER = 2 # Multiplier for candidate pool before fetching
|
|
46
|
+
|
|
40
47
|
def __init__(
|
|
41
48
|
self,
|
|
42
49
|
instance_id: str,
|
|
@@ -52,6 +59,8 @@ class TribalMemoryService(IMemoryService):
|
|
|
52
59
|
hybrid_candidate_multiplier: int = 4,
|
|
53
60
|
reranker: Optional[IReranker] = None,
|
|
54
61
|
rerank_pool_multiplier: int = 2,
|
|
62
|
+
graph_store: Optional[GraphStore] = None,
|
|
63
|
+
graph_enabled: bool = True,
|
|
55
64
|
):
|
|
56
65
|
self.instance_id = instance_id
|
|
57
66
|
self.embedding_service = embedding_service
|
|
@@ -64,6 +73,9 @@ class TribalMemoryService(IMemoryService):
|
|
|
64
73
|
self.hybrid_candidate_multiplier = hybrid_candidate_multiplier
|
|
65
74
|
self.reranker = reranker or NoopReranker()
|
|
66
75
|
self.rerank_pool_multiplier = rerank_pool_multiplier
|
|
76
|
+
self.graph_store = graph_store
|
|
77
|
+
self.graph_enabled = graph_enabled and graph_store is not None
|
|
78
|
+
self.entity_extractor = EntityExtractor() if self.graph_enabled else None
|
|
67
79
|
|
|
68
80
|
self.dedup_service = SemanticDeduplicationService(
|
|
69
81
|
vector_store=vector_store,
|
|
@@ -118,6 +130,26 @@ class TribalMemoryService(IMemoryService):
|
|
|
118
130
|
except Exception as e:
|
|
119
131
|
logger.warning("FTS indexing failed for %s: %s", entry.id, e)
|
|
120
132
|
|
|
133
|
+
# Extract and store entities for graph-enriched search
|
|
134
|
+
if result.success and self.graph_enabled and self.entity_extractor:
|
|
135
|
+
try:
|
|
136
|
+
entities, relationships = self.entity_extractor.extract_with_relationships(
|
|
137
|
+
content
|
|
138
|
+
)
|
|
139
|
+
for entity in entities:
|
|
140
|
+
self.graph_store.add_entity(entity, memory_id=entry.id)
|
|
141
|
+
for rel in relationships:
|
|
142
|
+
self.graph_store.add_relationship(rel, memory_id=entry.id)
|
|
143
|
+
if entities:
|
|
144
|
+
logger.debug(
|
|
145
|
+
"Extracted entities: %s, relationships: %s from %s",
|
|
146
|
+
[e.name for e in entities],
|
|
147
|
+
[(r.source, r.relation_type, r.target) for r in relationships],
|
|
148
|
+
entry.id
|
|
149
|
+
)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.warning("Graph indexing failed for %s: %s", entry.id, e)
|
|
152
|
+
|
|
121
153
|
return result
|
|
122
154
|
|
|
123
155
|
async def recall(
|
|
@@ -126,18 +158,29 @@ class TribalMemoryService(IMemoryService):
|
|
|
126
158
|
limit: int = 5,
|
|
127
159
|
min_relevance: float = 0.7,
|
|
128
160
|
tags: Optional[list[str]] = None,
|
|
161
|
+
graph_expansion: bool = True,
|
|
129
162
|
) -> list[RecallResult]:
|
|
130
|
-
"""Recall relevant memories using hybrid search.
|
|
163
|
+
"""Recall relevant memories using hybrid search with optional graph expansion.
|
|
131
164
|
|
|
132
165
|
When hybrid search is enabled (FTS store available), combines
|
|
133
166
|
vector similarity with BM25 keyword matching for better results.
|
|
134
167
|
Falls back to vector-only search when FTS is unavailable.
|
|
135
168
|
|
|
169
|
+
When graph expansion is enabled, entities are extracted from the query
|
|
170
|
+
and the candidate pool is expanded via entity graph traversal.
|
|
171
|
+
|
|
136
172
|
Args:
|
|
137
173
|
query: Natural language query
|
|
138
174
|
limit: Maximum results
|
|
139
175
|
min_relevance: Minimum similarity score
|
|
140
176
|
tags: Filter by tags (e.g., ["work", "preferences"])
|
|
177
|
+
graph_expansion: Expand candidates via entity graph (default True)
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of RecallResult objects with retrieval_method indicating source:
|
|
181
|
+
- "vector": Pure vector similarity search
|
|
182
|
+
- "hybrid": Vector + BM25 merge
|
|
183
|
+
- "graph": Entity graph traversal (1-hop or 2-hop)
|
|
141
184
|
"""
|
|
142
185
|
try:
|
|
143
186
|
query_embedding = await self.embedding_service.embed(query)
|
|
@@ -147,17 +190,33 @@ class TribalMemoryService(IMemoryService):
|
|
|
147
190
|
filters = {"tags": tags} if tags else None
|
|
148
191
|
|
|
149
192
|
if self.hybrid_search and self.fts_store:
|
|
150
|
-
|
|
193
|
+
results = await self._hybrid_recall(
|
|
151
194
|
query, query_embedding, limit, min_relevance, filters
|
|
152
195
|
)
|
|
196
|
+
else:
|
|
197
|
+
# Vector-only fallback
|
|
198
|
+
vector_results = await self.vector_store.recall(
|
|
199
|
+
query_embedding,
|
|
200
|
+
limit=limit,
|
|
201
|
+
min_similarity=min_relevance,
|
|
202
|
+
filters=filters,
|
|
203
|
+
)
|
|
204
|
+
# Mark as vector retrieval
|
|
205
|
+
results = [
|
|
206
|
+
RecallResult(
|
|
207
|
+
memory=r.memory,
|
|
208
|
+
similarity_score=r.similarity_score,
|
|
209
|
+
retrieval_time_ms=r.retrieval_time_ms,
|
|
210
|
+
retrieval_method="vector",
|
|
211
|
+
)
|
|
212
|
+
for r in vector_results
|
|
213
|
+
]
|
|
153
214
|
|
|
154
|
-
#
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
filters=filters,
|
|
160
|
-
)
|
|
215
|
+
# Graph expansion: find additional memories via entity connections
|
|
216
|
+
if graph_expansion and self.graph_enabled and self.entity_extractor:
|
|
217
|
+
results = await self._expand_via_graph(
|
|
218
|
+
query, results, limit, min_relevance
|
|
219
|
+
)
|
|
161
220
|
|
|
162
221
|
return self._filter_superseded(results)
|
|
163
222
|
|
|
@@ -215,7 +274,6 @@ class TribalMemoryService(IMemoryService):
|
|
|
215
274
|
bm25_only_ids.append(m)
|
|
216
275
|
|
|
217
276
|
# Batch-fetch BM25-only hits concurrently
|
|
218
|
-
import asyncio
|
|
219
277
|
fetched_entries = await asyncio.gather(
|
|
220
278
|
*(self.vector_store.get(m["id"]) for m in bm25_only_ids)
|
|
221
279
|
) if bm25_only_ids else []
|
|
@@ -223,12 +281,13 @@ class TribalMemoryService(IMemoryService):
|
|
|
223
281
|
# Build candidate list
|
|
224
282
|
candidates: list[RecallResult] = []
|
|
225
283
|
|
|
226
|
-
# Add cached vector hits
|
|
284
|
+
# Add cached vector hits (mark as hybrid since we used BM25 merge)
|
|
227
285
|
for m, recall_result in cached_hits:
|
|
228
286
|
candidates.append(RecallResult(
|
|
229
287
|
memory=recall_result.memory,
|
|
230
288
|
similarity_score=m["final_score"],
|
|
231
289
|
retrieval_time_ms=recall_result.retrieval_time_ms,
|
|
290
|
+
retrieval_method="hybrid",
|
|
232
291
|
))
|
|
233
292
|
|
|
234
293
|
# Add fetched BM25-only hits
|
|
@@ -238,12 +297,116 @@ class TribalMemoryService(IMemoryService):
|
|
|
238
297
|
memory=entry,
|
|
239
298
|
similarity_score=m["final_score"],
|
|
240
299
|
retrieval_time_ms=0,
|
|
300
|
+
retrieval_method="hybrid",
|
|
241
301
|
))
|
|
242
302
|
|
|
243
303
|
# 6. Rerank candidates
|
|
244
304
|
reranked = self.reranker.rerank(query, candidates, top_k=limit)
|
|
245
305
|
|
|
246
306
|
return self._filter_superseded(reranked)
|
|
307
|
+
|
|
308
|
+
async def _expand_via_graph(
|
|
309
|
+
self,
|
|
310
|
+
query: str,
|
|
311
|
+
existing_results: list[RecallResult],
|
|
312
|
+
limit: int,
|
|
313
|
+
min_relevance: float,
|
|
314
|
+
) -> list[RecallResult]:
|
|
315
|
+
"""Expand recall candidates via entity graph traversal.
|
|
316
|
+
|
|
317
|
+
Extracts entities from the query, finds memories connected to those
|
|
318
|
+
entities via the graph, and merges them with existing results.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
query: The original query string.
|
|
322
|
+
existing_results: Results from vector/hybrid search.
|
|
323
|
+
limit: Maximum total results.
|
|
324
|
+
min_relevance: Minimum relevance threshold (filters graph results too).
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Combined results with graph-expanded memories, sorted by score.
|
|
328
|
+
"""
|
|
329
|
+
# Extract entities from query
|
|
330
|
+
query_entities = self.entity_extractor.extract(query)
|
|
331
|
+
if not query_entities:
|
|
332
|
+
return existing_results
|
|
333
|
+
|
|
334
|
+
# Collect memory IDs from existing results to avoid duplicates
|
|
335
|
+
existing_ids = {r.memory.id for r in existing_results}
|
|
336
|
+
|
|
337
|
+
# Find memories connected to query entities via graph
|
|
338
|
+
graph_memory_ids: set[str] = set()
|
|
339
|
+
entity_to_hops: dict[str, int] = {} # Track hop distance for scoring
|
|
340
|
+
|
|
341
|
+
for entity in query_entities:
|
|
342
|
+
# Direct mentions (1 hop)
|
|
343
|
+
direct_ids = self.graph_store.get_memories_for_entity(entity.name)
|
|
344
|
+
for mid in direct_ids:
|
|
345
|
+
if mid not in existing_ids:
|
|
346
|
+
graph_memory_ids.add(mid)
|
|
347
|
+
# Use setdefault to preserve shortest path (1-hop takes precedence)
|
|
348
|
+
entity_to_hops.setdefault(mid, 1)
|
|
349
|
+
|
|
350
|
+
# Connected entities (2 hops)
|
|
351
|
+
connected = self.graph_store.find_connected(entity.name, hops=1)
|
|
352
|
+
for connected_entity in connected:
|
|
353
|
+
connected_ids = self.graph_store.get_memories_for_entity(
|
|
354
|
+
connected_entity.name
|
|
355
|
+
)
|
|
356
|
+
for mid in connected_ids:
|
|
357
|
+
if mid not in existing_ids:
|
|
358
|
+
graph_memory_ids.add(mid)
|
|
359
|
+
# Use setdefault to preserve shortest path
|
|
360
|
+
entity_to_hops.setdefault(mid, 2)
|
|
361
|
+
|
|
362
|
+
if not graph_memory_ids:
|
|
363
|
+
return existing_results
|
|
364
|
+
|
|
365
|
+
# Cap graph candidates to prevent memory leak (#2)
|
|
366
|
+
max_graph_candidates = limit * self.GRAPH_EXPANSION_BUFFER
|
|
367
|
+
if len(graph_memory_ids) > max_graph_candidates:
|
|
368
|
+
# Prioritize 1-hop over 2-hop when capping
|
|
369
|
+
one_hop_ids = [mid for mid in graph_memory_ids if entity_to_hops[mid] == 1]
|
|
370
|
+
two_hop_ids = [mid for mid in graph_memory_ids if entity_to_hops[mid] == 2]
|
|
371
|
+
|
|
372
|
+
capped_ids: list[str] = []
|
|
373
|
+
capped_ids.extend(one_hop_ids[:max_graph_candidates])
|
|
374
|
+
remaining = max_graph_candidates - len(capped_ids)
|
|
375
|
+
if remaining > 0:
|
|
376
|
+
capped_ids.extend(two_hop_ids[:remaining])
|
|
377
|
+
|
|
378
|
+
graph_memory_ids = set(capped_ids)
|
|
379
|
+
|
|
380
|
+
# Batch-fetch graph-connected memories concurrently for performance
|
|
381
|
+
graph_memory_list = list(graph_memory_ids)
|
|
382
|
+
fetched_entries = await asyncio.gather(
|
|
383
|
+
*(self.vector_store.get(mid) for mid in graph_memory_list)
|
|
384
|
+
) if graph_memory_list else []
|
|
385
|
+
|
|
386
|
+
graph_results: list[RecallResult] = []
|
|
387
|
+
for memory_id, entry in zip(graph_memory_list, fetched_entries):
|
|
388
|
+
if entry:
|
|
389
|
+
# Score based on hop distance using class constants
|
|
390
|
+
hops = entity_to_hops[memory_id] # Fail fast if logic is wrong
|
|
391
|
+
graph_score = (
|
|
392
|
+
self.GRAPH_1HOP_SCORE if hops == 1
|
|
393
|
+
else self.GRAPH_2HOP_SCORE
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Filter by min_relevance
|
|
397
|
+
if graph_score >= min_relevance:
|
|
398
|
+
graph_results.append(RecallResult(
|
|
399
|
+
memory=entry,
|
|
400
|
+
similarity_score=graph_score,
|
|
401
|
+
retrieval_time_ms=0,
|
|
402
|
+
retrieval_method="graph",
|
|
403
|
+
))
|
|
404
|
+
|
|
405
|
+
# Combine existing + graph results (#10: single sort, no redundant pre-sort)
|
|
406
|
+
combined = existing_results + graph_results
|
|
407
|
+
combined.sort(key=lambda r: r.similarity_score, reverse=True)
|
|
408
|
+
|
|
409
|
+
return combined[:limit]
|
|
247
410
|
|
|
248
411
|
async def correct(
|
|
249
412
|
self,
|
|
@@ -286,6 +449,11 @@ class TribalMemoryService(IMemoryService):
|
|
|
286
449
|
self.fts_store.delete(memory_id)
|
|
287
450
|
except Exception as e:
|
|
288
451
|
logger.warning("FTS cleanup failed for %s: %s", memory_id, e)
|
|
452
|
+
if result and self.graph_store:
|
|
453
|
+
try:
|
|
454
|
+
self.graph_store.delete_memory(memory_id)
|
|
455
|
+
except Exception as e:
|
|
456
|
+
logger.warning("Graph cleanup failed for %s: %s", memory_id, e)
|
|
289
457
|
return result
|
|
290
458
|
|
|
291
459
|
async def get(self, memory_id: str) -> Optional[MemoryEntry]:
|
|
@@ -311,6 +479,99 @@ class TribalMemoryService(IMemoryService):
|
|
|
311
479
|
return results
|
|
312
480
|
return [r for r in results if r.memory.id not in superseded_ids]
|
|
313
481
|
|
|
482
|
+
async def recall_entity(
|
|
483
|
+
self,
|
|
484
|
+
entity_name: str,
|
|
485
|
+
hops: int = 1,
|
|
486
|
+
limit: int = 10,
|
|
487
|
+
) -> list[RecallResult]:
|
|
488
|
+
"""Recall all memories associated with an entity and its connections.
|
|
489
|
+
|
|
490
|
+
This enables entity-centric queries like:
|
|
491
|
+
- "Tell me everything about auth-service"
|
|
492
|
+
- "What do we know about PostgreSQL?"
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
entity_name: Name of the entity to query
|
|
496
|
+
hops: Number of relationship hops to traverse (1 = direct only)
|
|
497
|
+
limit: Maximum results to return
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
List of recall results for memories mentioning the entity or connected entities
|
|
501
|
+
"""
|
|
502
|
+
if not self.graph_enabled:
|
|
503
|
+
logger.warning("Graph search not enabled, returning empty results")
|
|
504
|
+
return []
|
|
505
|
+
|
|
506
|
+
# Get memories directly mentioning the entity
|
|
507
|
+
direct_memories = set(self.graph_store.get_memories_for_entity(entity_name))
|
|
508
|
+
|
|
509
|
+
# Get memories for connected entities (if hops > 0)
|
|
510
|
+
if hops > 0:
|
|
511
|
+
connected = self.graph_store.find_connected(entity_name, hops=hops)
|
|
512
|
+
for entity in connected:
|
|
513
|
+
direct_memories.update(
|
|
514
|
+
self.graph_store.get_memories_for_entity(entity.name)
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
if not direct_memories:
|
|
518
|
+
return []
|
|
519
|
+
|
|
520
|
+
# Fetch full memory entries
|
|
521
|
+
results: list[RecallResult] = []
|
|
522
|
+
for memory_id in list(direct_memories)[:limit]:
|
|
523
|
+
entry = await self.vector_store.get(memory_id)
|
|
524
|
+
if entry:
|
|
525
|
+
results.append(RecallResult(
|
|
526
|
+
memory=entry,
|
|
527
|
+
similarity_score=1.0, # Entity match confidence (exact)
|
|
528
|
+
retrieval_time_ms=0,
|
|
529
|
+
retrieval_method="entity",
|
|
530
|
+
))
|
|
531
|
+
|
|
532
|
+
return results
|
|
533
|
+
|
|
534
|
+
def get_entity_graph(
|
|
535
|
+
self,
|
|
536
|
+
entity_name: str,
|
|
537
|
+
hops: int = 2,
|
|
538
|
+
) -> dict:
|
|
539
|
+
"""Get the relationship graph around an entity.
|
|
540
|
+
|
|
541
|
+
Returns a dict with:
|
|
542
|
+
- entities: list of connected entities with types
|
|
543
|
+
- relationships: list of relationships
|
|
544
|
+
|
|
545
|
+
Useful for visualization and debugging.
|
|
546
|
+
"""
|
|
547
|
+
if not self.graph_enabled:
|
|
548
|
+
return {"entities": [], "relationships": []}
|
|
549
|
+
|
|
550
|
+
connected = self.graph_store.find_connected(
|
|
551
|
+
entity_name, hops=hops, include_source=True
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
entities = [
|
|
555
|
+
{"name": e.name, "type": e.entity_type}
|
|
556
|
+
for e in connected
|
|
557
|
+
]
|
|
558
|
+
|
|
559
|
+
# Get relationships for all entities
|
|
560
|
+
relationships = []
|
|
561
|
+
seen_rels = set()
|
|
562
|
+
for entity in connected:
|
|
563
|
+
for rel in self.graph_store.get_relationships_for_entity(entity.name):
|
|
564
|
+
rel_key = (rel.source, rel.target, rel.relation_type)
|
|
565
|
+
if rel_key not in seen_rels:
|
|
566
|
+
seen_rels.add(rel_key)
|
|
567
|
+
relationships.append({
|
|
568
|
+
"source": rel.source,
|
|
569
|
+
"target": rel.target,
|
|
570
|
+
"type": rel.relation_type,
|
|
571
|
+
})
|
|
572
|
+
|
|
573
|
+
return {"entities": entities, "relationships": relationships}
|
|
574
|
+
|
|
314
575
|
|
|
315
576
|
def create_memory_service(
|
|
316
577
|
instance_id: Optional[str] = None,
|
|
@@ -421,6 +682,17 @@ def create_memory_service(
|
|
|
421
682
|
)
|
|
422
683
|
reranker = create_reranker(search_config)
|
|
423
684
|
|
|
685
|
+
# Create graph store for entity-enriched search (co-located with LanceDB)
|
|
686
|
+
graph_store = None
|
|
687
|
+
if db_path:
|
|
688
|
+
try:
|
|
689
|
+
graph_db_path = str(Path(db_path) / "graph.db")
|
|
690
|
+
graph_store = GraphStore(graph_db_path)
|
|
691
|
+
logger.info("Graph store enabled (SQLite)")
|
|
692
|
+
except Exception as e:
|
|
693
|
+
logger.warning(f"Graph store init failed: {e}. Graph search disabled.")
|
|
694
|
+
graph_store = None
|
|
695
|
+
|
|
424
696
|
return TribalMemoryService(
|
|
425
697
|
instance_id=instance_id,
|
|
426
698
|
embedding_service=embedding_service,
|
|
@@ -432,4 +704,6 @@ def create_memory_service(
|
|
|
432
704
|
hybrid_candidate_multiplier=hybrid_candidate_multiplier,
|
|
433
705
|
reranker=reranker,
|
|
434
706
|
rerank_pool_multiplier=rerank_pool_multiplier,
|
|
707
|
+
graph_store=graph_store,
|
|
708
|
+
graph_enabled=graph_store is not None,
|
|
435
709
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
tribalmemory/__init__.py,sha256=DNgC_ZT0lrhxsPdhXu4oeG_UdrLstYQHeKwR-U2toeY,104
|
|
2
2
|
tribalmemory/cli.py,sha256=wbVsgMst4NEuWkwA2dily_qC8AP6jvAHKwHgE3B-TG8,15960
|
|
3
|
-
tribalmemory/interfaces.py,sha256=
|
|
3
|
+
tribalmemory/interfaces.py,sha256=hs32jxImd3AXa0EfysGhP-goDAMQ90fqVQV4PbTpgQQ,12287
|
|
4
4
|
tribalmemory/utils.py,sha256=aei7xR6OVMGkllPySA2boeHyI3D1JsHTUX1YeaZdkMY,696
|
|
5
5
|
tribalmemory/a21/__init__.py,sha256=u1793uKzbWGKwiAVmCxEO9_3rdTL7QKTLhQQB8Umsl4,1181
|
|
6
6
|
tribalmemory/a21/system.py,sha256=gGFVWBTckSTFv8ZciEUc05HYjwxZP22UpIqbxXD6giM,9185
|
|
@@ -19,7 +19,7 @@ tribalmemory/a21/providers/openai.py,sha256=MxFJXph8rVFAqkVMCS3vxdqwBB8_MhoqqGg6
|
|
|
19
19
|
tribalmemory/a21/providers/timestamp.py,sha256=T1sJkaQSixRip2C0AUPnljWP0u5w4iHoIcVRmG8FgPo,2994
|
|
20
20
|
tribalmemory/mcp/__init__.py,sha256=r7CzTwzAnSgox8_cBNZgtXDjMGxC7o8polptSvB-AvY,262
|
|
21
21
|
tribalmemory/mcp/__main__.py,sha256=GX6mNmM_Lpq4EyuTr__awVTf1btCi7TpEkNtMTkgas0,115
|
|
22
|
-
tribalmemory/mcp/server.py,sha256=
|
|
22
|
+
tribalmemory/mcp/server.py,sha256=URosl0xbxqn0m57bkO52y-pKIBzgkP-f8wTm_MRWzd8,24708
|
|
23
23
|
tribalmemory/performance/__init__.py,sha256=truSiOqk2WuzatOrzVQICwF8pxLxMlCD8R9O-uqiX3I,55
|
|
24
24
|
tribalmemory/performance/benchmarks.py,sha256=2MVfML04Y-05YpmHCsU-SLtS05-H38oJ7a6DCk2qGIc,8985
|
|
25
25
|
tribalmemory/performance/corpus_generator.py,sha256=ovln1d-7JGd5fJbdSRsdxlA0uaqLCVM3Lo_1SDGRkA0,5894
|
|
@@ -35,8 +35,9 @@ tribalmemory/services/__init__.py,sha256=htv8HuG_r_lYJwP5Q1bwO-oir436U0NfJrOxk9m
|
|
|
35
35
|
tribalmemory/services/deduplication.py,sha256=E8PaIDB6g24H28tKHrB5rMBJaKGGT3pFTDepXQThvcc,3679
|
|
36
36
|
tribalmemory/services/embeddings.py,sha256=0kY1uPyCg81AlRTNg5QhXbRLDv8hN9khKR4JDGF2sik,10005
|
|
37
37
|
tribalmemory/services/fts_store.py,sha256=5-SBGmzDeQR0-8aDMMO-zZuo8M7VK_tlKYUVDNAitV4,8424
|
|
38
|
+
tribalmemory/services/graph_store.py,sha256=zekWCdmqvZR9qH09FFqp6L7QKSI1z368CKLUDH1osys,24376
|
|
38
39
|
tribalmemory/services/import_export.py,sha256=KfEl5EXAFcuyDmhOYJZfjiIRtJYY4qQlvxv2ePJQHaA,15095
|
|
39
|
-
tribalmemory/services/memory.py,sha256=
|
|
40
|
+
tribalmemory/services/memory.py,sha256=uWw-Eg8cJYmzwMsXBxmkGzPRM66Ew13OuRDKBAI2lng,27472
|
|
40
41
|
tribalmemory/services/reranker.py,sha256=0RSvQFHB609aWcxBl92fiwxIbAYILTqNL9MaAp0lQ74,8720
|
|
41
42
|
tribalmemory/services/session_store.py,sha256=wkVF9pNJOqkVXIYOkyvABSaRxaLoHDldT8KTZGThDU0,13818
|
|
42
43
|
tribalmemory/services/vector_store.py,sha256=fL8YgnHiCLPqxqV64pQ_rMLIzdJc6ohN2c4wgGz0buw,15364
|
|
@@ -46,9 +47,9 @@ tribalmemory/testing/fixtures.py,sha256=_zDyUVm6CQqXK1Us8CN6A95tJcmo1D7LFDktIvjO
|
|
|
46
47
|
tribalmemory/testing/metrics.py,sha256=X1n84dJDNQXsfGV-i-MzhsWKnFgqHWIcIaQB-BUp0e0,8711
|
|
47
48
|
tribalmemory/testing/mocks.py,sha256=sjLy-pq3D_T21rEvWWKM_bqw7xnchRPGLARZNfKkpGU,19788
|
|
48
49
|
tribalmemory/testing/semantic_expansions.py,sha256=AbbJIXYN4EJT8WKJ7UmIlNRlv63VejbcnbzBy2z2Ofk,2953
|
|
49
|
-
tribalmemory-0.
|
|
50
|
-
tribalmemory-0.
|
|
51
|
-
tribalmemory-0.
|
|
52
|
-
tribalmemory-0.
|
|
53
|
-
tribalmemory-0.
|
|
54
|
-
tribalmemory-0.
|
|
50
|
+
tribalmemory-0.3.0.dist-info/licenses/LICENSE,sha256=M8D9Xf3B6C6DFiCgAAhKcXeTscaC4cw1fhr3LHUrALU,10774
|
|
51
|
+
tribalmemory-0.3.0.dist-info/METADATA,sha256=05lUAOlIf1Zei-I2USroVXkuq2gCijCrCn52B_IqR6Y,9239
|
|
52
|
+
tribalmemory-0.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
53
|
+
tribalmemory-0.3.0.dist-info/entry_points.txt,sha256=9Pep7JNCk9ifdFP4WbeCugDOjMrLVegGJ5iuvbcZ9e8,103
|
|
54
|
+
tribalmemory-0.3.0.dist-info/top_level.txt,sha256=kX36ZpH4W7EWcInV4MrIudicusdz5hfkezKMZ3HCMQs,13
|
|
55
|
+
tribalmemory-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|