tribalmemory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. tribalmemory/__init__.py +3 -0
  2. tribalmemory/a21/__init__.py +38 -0
  3. tribalmemory/a21/config/__init__.py +20 -0
  4. tribalmemory/a21/config/providers.py +104 -0
  5. tribalmemory/a21/config/system.py +184 -0
  6. tribalmemory/a21/container/__init__.py +8 -0
  7. tribalmemory/a21/container/container.py +212 -0
  8. tribalmemory/a21/providers/__init__.py +32 -0
  9. tribalmemory/a21/providers/base.py +241 -0
  10. tribalmemory/a21/providers/deduplication.py +99 -0
  11. tribalmemory/a21/providers/lancedb.py +232 -0
  12. tribalmemory/a21/providers/memory.py +128 -0
  13. tribalmemory/a21/providers/mock.py +54 -0
  14. tribalmemory/a21/providers/openai.py +151 -0
  15. tribalmemory/a21/providers/timestamp.py +88 -0
  16. tribalmemory/a21/system.py +293 -0
  17. tribalmemory/cli.py +298 -0
  18. tribalmemory/interfaces.py +306 -0
  19. tribalmemory/mcp/__init__.py +9 -0
  20. tribalmemory/mcp/__main__.py +6 -0
  21. tribalmemory/mcp/server.py +484 -0
  22. tribalmemory/performance/__init__.py +1 -0
  23. tribalmemory/performance/benchmarks.py +285 -0
  24. tribalmemory/performance/corpus_generator.py +171 -0
  25. tribalmemory/portability/__init__.py +1 -0
  26. tribalmemory/portability/embedding_metadata.py +320 -0
  27. tribalmemory/server/__init__.py +9 -0
  28. tribalmemory/server/__main__.py +6 -0
  29. tribalmemory/server/app.py +187 -0
  30. tribalmemory/server/config.py +115 -0
  31. tribalmemory/server/models.py +206 -0
  32. tribalmemory/server/routes.py +378 -0
  33. tribalmemory/services/__init__.py +15 -0
  34. tribalmemory/services/deduplication.py +115 -0
  35. tribalmemory/services/embeddings.py +273 -0
  36. tribalmemory/services/import_export.py +506 -0
  37. tribalmemory/services/memory.py +275 -0
  38. tribalmemory/services/vector_store.py +360 -0
  39. tribalmemory/testing/__init__.py +22 -0
  40. tribalmemory/testing/embedding_utils.py +110 -0
  41. tribalmemory/testing/fixtures.py +123 -0
  42. tribalmemory/testing/metrics.py +256 -0
  43. tribalmemory/testing/mocks.py +560 -0
  44. tribalmemory/testing/semantic_expansions.py +91 -0
  45. tribalmemory/utils.py +23 -0
  46. tribalmemory-0.1.0.dist-info/METADATA +275 -0
  47. tribalmemory-0.1.0.dist-info/RECORD +51 -0
  48. tribalmemory-0.1.0.dist-info/WHEEL +5 -0
  49. tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
  50. tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
  51. tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,241 @@
1
+ """Abstract base classes for all providers.
2
+
3
+ These define the contracts that provider implementations must satisfy.
4
+ Designed for extensibility and forward compatibility.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass
9
+ from datetime import datetime
10
+ from typing import Optional, Any, TypeVar, Generic
11
+ from enum import Enum
12
+
13
+ from ...interfaces import MemoryEntry, RecallResult, StoreResult, MemorySource
14
+
15
+
16
+ # Type variable for provider-specific configuration
17
+ TConfig = TypeVar('TConfig')
18
+
19
+
20
+ class ProviderStatus(Enum):
21
+ """Provider health status."""
22
+ HEALTHY = "healthy"
23
+ DEGRADED = "degraded"
24
+ UNAVAILABLE = "unavailable"
25
+ INITIALIZING = "initializing"
26
+
27
+
28
+ @dataclass
29
+ class ProviderHealth:
30
+ """Health check result for a provider."""
31
+ status: ProviderStatus
32
+ latency_ms: Optional[float] = None
33
+ message: Optional[str] = None
34
+ last_check: datetime = None
35
+
36
+ def __post_init__(self):
37
+ if self.last_check is None:
38
+ self.last_check = datetime.utcnow()
39
+
40
+
41
+ class Provider(ABC, Generic[TConfig]):
42
+ """Base class for all providers.
43
+
44
+ Provides common functionality:
45
+ - Configuration management
46
+ - Health checking
47
+ - Lifecycle management (init/shutdown)
48
+ - Metrics collection hooks
49
+ """
50
+
51
+ def __init__(self, config: TConfig):
52
+ self.config = config
53
+ self._initialized = False
54
+
55
+ @abstractmethod
56
+ async def initialize(self) -> None:
57
+ """Initialize the provider. Called once before first use."""
58
+ pass
59
+
60
+ @abstractmethod
61
+ async def shutdown(self) -> None:
62
+ """Gracefully shutdown the provider."""
63
+ pass
64
+
65
+ @abstractmethod
66
+ async def health_check(self) -> ProviderHealth:
67
+ """Check provider health and connectivity."""
68
+ pass
69
+
70
+ @property
71
+ def is_initialized(self) -> bool:
72
+ return self._initialized
73
+
74
+ async def __aenter__(self):
75
+ if not self._initialized:
76
+ await self.initialize()
77
+ return self
78
+
79
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
80
+ await self.shutdown()
81
+
82
+
83
+ class EmbeddingProvider(Provider[TConfig]):
84
+ """Abstract embedding provider.
85
+
86
+ Responsible for converting text to vector embeddings.
87
+ Implementations may use OpenAI, local models, or other services.
88
+ """
89
+
90
+ @property
91
+ @abstractmethod
92
+ def dimensions(self) -> int:
93
+ """Return the embedding dimension size."""
94
+ pass
95
+
96
+ @property
97
+ @abstractmethod
98
+ def model_name(self) -> str:
99
+ """Return the model identifier."""
100
+ pass
101
+
102
+ @abstractmethod
103
+ async def embed(self, text: str) -> list[float]:
104
+ """Generate embedding for a single text."""
105
+ pass
106
+
107
+ @abstractmethod
108
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
109
+ """Generate embeddings for multiple texts efficiently."""
110
+ pass
111
+
112
+ def similarity(self, a: list[float], b: list[float]) -> float:
113
+ """Calculate cosine similarity between two embeddings.
114
+
115
+ Uses the formula: cos(θ) = (a · b) / (||a|| * ||b||)
116
+
117
+ Args:
118
+ a: First embedding vector
119
+ b: Second embedding vector
120
+
121
+ Returns:
122
+ Cosine similarity score between -1.0 and 1.0
123
+ """
124
+ import math
125
+ dot = sum(x * y for x, y in zip(a, b))
126
+ norm_a = math.sqrt(sum(x * x for x in a))
127
+ norm_b = math.sqrt(sum(x * x for x in b))
128
+ if norm_a == 0 or norm_b == 0:
129
+ return 0.0
130
+ return dot / (norm_a * norm_b)
131
+
132
+
133
+ class StorageProvider(Provider[TConfig]):
134
+ """Abstract storage provider.
135
+
136
+ Responsible for persisting and retrieving memory entries.
137
+ Implementations may use LanceDB, Pinecone, Postgres+pgvector, etc.
138
+ """
139
+
140
+ @abstractmethod
141
+ async def store(self, entry: MemoryEntry) -> StoreResult:
142
+ """Store a memory entry."""
143
+ pass
144
+
145
+ @abstractmethod
146
+ async def recall(
147
+ self,
148
+ query_embedding: list[float],
149
+ limit: int = 10,
150
+ min_similarity: float = 0.7,
151
+ filters: Optional[dict[str, Any]] = None,
152
+ ) -> list[RecallResult]:
153
+ """Recall memories similar to query embedding.
154
+
155
+ Args:
156
+ query_embedding: Vector to search for
157
+ limit: Maximum results
158
+ min_similarity: Minimum similarity threshold
159
+ filters: Optional metadata filters (e.g., tags, source_instance)
160
+ """
161
+ pass
162
+
163
+ @abstractmethod
164
+ async def get(self, memory_id: str) -> Optional[MemoryEntry]:
165
+ """Get a specific memory by ID."""
166
+ pass
167
+
168
+ @abstractmethod
169
+ async def delete(self, memory_id: str) -> bool:
170
+ """Soft delete a memory."""
171
+ pass
172
+
173
+ @abstractmethod
174
+ async def list(
175
+ self,
176
+ limit: int = 100,
177
+ offset: int = 0,
178
+ filters: Optional[dict[str, Any]] = None,
179
+ ) -> list[MemoryEntry]:
180
+ """List memories with pagination and filtering."""
181
+ pass
182
+
183
+ @abstractmethod
184
+ async def count(self, filters: Optional[dict[str, Any]] = None) -> int:
185
+ """Count memories matching filters."""
186
+ pass
187
+
188
+
189
+ class TimestampProvider(Provider[TConfig]):
190
+ """Abstract timestamp provider.
191
+
192
+ Responsible for generating cryptographic timestamps (RFC 3161).
193
+ Used for provenance verification.
194
+ """
195
+
196
+ @abstractmethod
197
+ async def timestamp(self, data: bytes) -> bytes:
198
+ """Generate a timestamp token for data."""
199
+ pass
200
+
201
+ @abstractmethod
202
+ async def verify(self, data: bytes, token: bytes) -> tuple[bool, Optional[datetime]]:
203
+ """Verify a timestamp token."""
204
+ pass
205
+
206
+
207
+ class DeduplicationProvider(Provider[TConfig]):
208
+ """Abstract deduplication provider.
209
+
210
+ Responsible for detecting duplicate or near-duplicate memories.
211
+ May use embedding similarity, hashing, or hybrid approaches.
212
+ """
213
+
214
+ @abstractmethod
215
+ async def is_duplicate(
216
+ self,
217
+ content: str,
218
+ embedding: list[float],
219
+ ) -> tuple[bool, Optional[str]]:
220
+ """Check if content is duplicate.
221
+
222
+ Returns:
223
+ Tuple of (is_duplicate, duplicate_id).
224
+ Use find_similar() or get_duplicate_report() if similarity score needed.
225
+ """
226
+ pass
227
+
228
+ @abstractmethod
229
+ async def find_similar(
230
+ self,
231
+ content: str,
232
+ embedding: list[float],
233
+ threshold: float = 0.85,
234
+ limit: int = 10,
235
+ ) -> list[tuple[str, float]]:
236
+ """Find similar memories.
237
+
238
+ Returns:
239
+ List of (memory_id, similarity_score)
240
+ """
241
+ pass
@@ -0,0 +1,99 @@
1
+ """Deduplication provider."""
2
+
3
+ from typing import Optional
4
+
5
+ from .base import DeduplicationProvider, StorageProvider, EmbeddingProvider, ProviderHealth, ProviderStatus
6
+ from ..config.providers import DeduplicationConfig
7
+
8
+
9
+ class EmbeddingDeduplicationProvider(DeduplicationProvider[DeduplicationConfig]):
10
+ """Deduplication using embedding similarity.
11
+
12
+ Detects duplicates by comparing embeddings against stored memories.
13
+ Uses configurable thresholds for exact and near-duplicate detection.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ config: DeduplicationConfig,
19
+ storage_provider: StorageProvider,
20
+ embedding_provider: EmbeddingProvider,
21
+ ):
22
+ """Initialize deduplication provider.
23
+
24
+ Args:
25
+ config: Deduplication configuration
26
+ storage_provider: Initialized storage provider for recall queries
27
+ embedding_provider: Initialized embedding provider for similarity calculations
28
+ """
29
+ super().__init__(config)
30
+ self._storage = storage_provider
31
+ self._embedding = embedding_provider
32
+
33
+ async def initialize(self) -> None:
34
+ """Initialize provider. Storage and embedding must already be initialized."""
35
+ self._initialized = True
36
+
37
+ async def shutdown(self) -> None:
38
+ """Shutdown provider."""
39
+ self._initialized = False
40
+
41
+ async def health_check(self) -> ProviderHealth:
42
+ """Check provider health."""
43
+ return ProviderHealth(
44
+ status=ProviderStatus.HEALTHY,
45
+ message="Deduplication ready"
46
+ )
47
+
48
+ async def is_duplicate(
49
+ self,
50
+ content: str,
51
+ embedding: list[float],
52
+ ) -> tuple[bool, Optional[str]]:
53
+ """Check if content is a duplicate.
54
+
55
+ Args:
56
+ content: Text content to check
57
+ embedding: Pre-computed embedding for the content
58
+
59
+ Returns:
60
+ Tuple of (is_duplicate, duplicate_id)
61
+ """
62
+ results = await self._storage.recall(
63
+ embedding,
64
+ limit=1,
65
+ min_similarity=self.config.exact_threshold
66
+ )
67
+
68
+ if results and results[0].similarity_score >= self.config.exact_threshold:
69
+ return True, results[0].memory.id
70
+
71
+ return False, None
72
+
73
+ async def find_similar(
74
+ self,
75
+ content: str,
76
+ embedding: list[float],
77
+ threshold: float = None,
78
+ limit: int = 10,
79
+ ) -> list[tuple[str, float]]:
80
+ """Find similar memories.
81
+
82
+ Args:
83
+ content: Text content to search for
84
+ embedding: Pre-computed embedding for the content
85
+ threshold: Minimum similarity (defaults to config.near_threshold)
86
+ limit: Maximum results
87
+
88
+ Returns:
89
+ List of (memory_id, similarity_score) tuples
90
+ """
91
+ threshold = threshold or self.config.near_threshold
92
+
93
+ results = await self._storage.recall(
94
+ embedding,
95
+ limit=limit,
96
+ min_similarity=threshold
97
+ )
98
+
99
+ return [(r.memory.id, r.similarity_score) for r in results]
@@ -0,0 +1,232 @@
1
+ """LanceDB storage provider."""
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ import time
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional, Any
10
+
11
+ from .base import StorageProvider, EmbeddingProvider, ProviderHealth, ProviderStatus
12
+ from ..config.providers import StorageConfig
13
+ from ...interfaces import MemoryEntry, MemorySource, RecallResult, StoreResult
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class LanceDBStorageProvider(StorageProvider[StorageConfig]):
19
+ """LanceDB-backed storage provider."""
20
+
21
+ def __init__(
22
+ self,
23
+ config: StorageConfig,
24
+ embedding_provider: EmbeddingProvider,
25
+ ):
26
+ super().__init__(config)
27
+ self._embedding = embedding_provider
28
+ self._db = None
29
+ self._table = None
30
+
31
+ async def initialize(self) -> None:
32
+ try:
33
+ import lancedb
34
+ except ImportError:
35
+ raise ImportError("LanceDB not installed. Run: pip install lancedb")
36
+
37
+ if self.config.uri:
38
+ self._db = lancedb.connect(self.config.uri, api_key=self.config.api_key)
39
+ elif self.config.path:
40
+ Path(self.config.path).mkdir(parents=True, exist_ok=True)
41
+ self._db = lancedb.connect(self.config.path)
42
+ else:
43
+ raise ValueError("LanceDB requires path or uri")
44
+
45
+ if self.config.table_name in self._db.table_names():
46
+ self._table = self._db.open_table(self.config.table_name)
47
+ else:
48
+ self._table = self._create_table()
49
+
50
+ self._initialized = True
51
+
52
+ async def shutdown(self) -> None:
53
+ self._db = None
54
+ self._table = None
55
+ self._initialized = False
56
+
57
+ async def health_check(self) -> ProviderHealth:
58
+ if not self._table:
59
+ return ProviderHealth(
60
+ status=ProviderStatus.UNAVAILABLE,
61
+ message="Table not initialized"
62
+ )
63
+
64
+ try:
65
+ start = datetime.utcnow()
66
+ count = await self.count()
67
+ latency = (datetime.utcnow() - start).total_seconds() * 1000
68
+ return ProviderHealth(
69
+ status=ProviderStatus.HEALTHY,
70
+ latency_ms=latency,
71
+ message=f"LanceDB with {count} entries"
72
+ )
73
+ except Exception as e:
74
+ return ProviderHealth(
75
+ status=ProviderStatus.DEGRADED,
76
+ message=str(e)
77
+ )
78
+
79
+ def _create_table(self):
80
+ import pyarrow as pa
81
+
82
+ schema = pa.schema([
83
+ pa.field("id", pa.string()),
84
+ pa.field("content", pa.string()),
85
+ pa.field("vector", pa.list_(pa.float32(), self.config.embedding_dimensions)),
86
+ pa.field("source_instance", pa.string()),
87
+ pa.field("source_type", pa.string()),
88
+ pa.field("created_at", pa.string()),
89
+ pa.field("updated_at", pa.string()),
90
+ pa.field("tags", pa.string()),
91
+ pa.field("context", pa.string()),
92
+ pa.field("confidence", pa.float32()),
93
+ pa.field("supersedes", pa.string()),
94
+ pa.field("related_to", pa.string()),
95
+ pa.field("deleted", pa.bool_()),
96
+ ])
97
+
98
+ return self._db.create_table(self.config.table_name, schema=schema)
99
+
100
+ async def store(self, entry: MemoryEntry) -> StoreResult:
101
+ if entry.embedding is None:
102
+ entry.embedding = await self._embedding.embed(entry.content)
103
+
104
+ # Validate dimensions
105
+ if len(entry.embedding) != self.config.embedding_dimensions:
106
+ return StoreResult(
107
+ success=False,
108
+ error=f"Invalid embedding dimension: {len(entry.embedding)}"
109
+ )
110
+
111
+ row = {
112
+ "id": entry.id,
113
+ "content": entry.content,
114
+ "vector": entry.embedding,
115
+ "source_instance": entry.source_instance,
116
+ "source_type": entry.source_type.value,
117
+ "created_at": entry.created_at.isoformat(),
118
+ "updated_at": entry.updated_at.isoformat(),
119
+ "tags": json.dumps(entry.tags),
120
+ "context": entry.context or "",
121
+ "confidence": entry.confidence,
122
+ "supersedes": entry.supersedes or "",
123
+ "related_to": json.dumps(entry.related_to),
124
+ "deleted": False,
125
+ }
126
+
127
+ try:
128
+ self._table.add([row])
129
+ return StoreResult(success=True, memory_id=entry.id)
130
+ except Exception as e:
131
+ logger.error(f"Failed to store memory {entry.id}: {e}")
132
+ return StoreResult(success=False, error=str(e))
133
+
134
+ async def recall(
135
+ self,
136
+ query_embedding: list[float],
137
+ limit: int = 10,
138
+ min_similarity: float = 0.7,
139
+ filters: Optional[dict[str, Any]] = None,
140
+ ) -> list[RecallResult]:
141
+ start = time.perf_counter()
142
+
143
+ query = self._table.search(query_embedding).where("deleted = false")
144
+
145
+ if filters:
146
+ for key, value in filters.items():
147
+ if key == "source_instance":
148
+ safe_val = self._sanitize(value)
149
+ query = query.where(f"source_instance = '{safe_val}'")
150
+
151
+ results = query.limit(limit * 2).to_list()
152
+ elapsed_ms = (time.perf_counter() - start) * 1000
153
+
154
+ recall_results = []
155
+ for row in results:
156
+ distance = row.get("_distance", 0)
157
+ similarity = max(0, 1 - (distance * distance / 2))
158
+
159
+ if similarity < min_similarity:
160
+ continue
161
+
162
+ entry = self._row_to_entry(row)
163
+ recall_results.append(RecallResult(
164
+ memory=entry,
165
+ similarity_score=similarity,
166
+ retrieval_time_ms=elapsed_ms
167
+ ))
168
+
169
+ recall_results.sort(key=lambda x: x.similarity_score, reverse=True)
170
+ return recall_results[:limit]
171
+
172
+ async def get(self, memory_id: str) -> Optional[MemoryEntry]:
173
+ safe_id = self._sanitize(memory_id)
174
+
175
+ results = (
176
+ self._table.search()
177
+ .where(f"id = '{safe_id}' AND deleted = false")
178
+ .limit(1)
179
+ .to_list()
180
+ )
181
+
182
+ if not results:
183
+ return None
184
+ return self._row_to_entry(results[0])
185
+
186
+ async def delete(self, memory_id: str) -> bool:
187
+ safe_id = self._sanitize(memory_id)
188
+
189
+ try:
190
+ self._table.update(
191
+ where=f"id = '{safe_id}'",
192
+ values={"deleted": True, "updated_at": datetime.utcnow().isoformat()}
193
+ )
194
+ return True
195
+ except Exception as e:
196
+ logger.error(f"Failed to delete memory {memory_id}: {e}")
197
+ return False
198
+
199
+ async def list(
200
+ self,
201
+ limit: int = 100,
202
+ offset: int = 0,
203
+ filters: Optional[dict[str, Any]] = None,
204
+ ) -> list[MemoryEntry]:
205
+ query = self._table.search().where("deleted = false")
206
+ results = query.limit(limit + offset).to_list()
207
+ return [self._row_to_entry(r) for r in results[offset:offset + limit]]
208
+
209
+ async def count(self, filters: Optional[dict[str, Any]] = None) -> int:
210
+ results = self._table.search().where("deleted = false").to_list()
211
+ return len(results)
212
+
213
+ def _sanitize(self, value: str) -> str:
214
+ if not re.match(r'^[a-zA-Z0-9\-_]+$', value):
215
+ raise ValueError(f"Invalid value format: {value[:20]}...")
216
+ return value
217
+
218
+ def _row_to_entry(self, row: dict) -> MemoryEntry:
219
+ return MemoryEntry(
220
+ id=row["id"],
221
+ content=row["content"],
222
+ embedding=row.get("vector"),
223
+ source_instance=row.get("source_instance", "unknown"),
224
+ source_type=MemorySource(row.get("source_type", "unknown")),
225
+ created_at=datetime.fromisoformat(row["created_at"]) if row.get("created_at") else datetime.utcnow(),
226
+ updated_at=datetime.fromisoformat(row["updated_at"]) if row.get("updated_at") else datetime.utcnow(),
227
+ tags=json.loads(row.get("tags", "[]")),
228
+ context=row.get("context") or None,
229
+ confidence=row.get("confidence", 1.0),
230
+ supersedes=row.get("supersedes") or None,
231
+ related_to=json.loads(row.get("related_to", "[]")),
232
+ )
@@ -0,0 +1,128 @@
1
+ """In-memory storage provider."""
2
+
3
+ import time
4
+ from datetime import datetime
5
+ from typing import Optional, Any, Callable
6
+
7
+ from .base import StorageProvider, EmbeddingProvider, ProviderHealth, ProviderStatus
8
+ from ..config.providers import StorageConfig
9
+ from ...interfaces import MemoryEntry, RecallResult, StoreResult
10
+
11
+
12
+ class InMemoryStorageProvider(StorageProvider[StorageConfig]):
13
+ """In-memory storage for testing and development."""
14
+
15
+ def __init__(
16
+ self,
17
+ config: StorageConfig,
18
+ embedding_provider: EmbeddingProvider,
19
+ ):
20
+ super().__init__(config)
21
+ self._embedding = embedding_provider
22
+ self._store: dict[str, MemoryEntry] = {}
23
+ self._deleted: set[str] = set()
24
+
25
+ async def initialize(self) -> None:
26
+ self._initialized = True
27
+
28
+ async def shutdown(self) -> None:
29
+ self._store.clear()
30
+ self._deleted.clear()
31
+ self._initialized = False
32
+
33
+ async def health_check(self) -> ProviderHealth:
34
+ return ProviderHealth(
35
+ status=ProviderStatus.HEALTHY,
36
+ latency_ms=0.1,
37
+ message=f"In-memory store with {len(self._store)} entries"
38
+ )
39
+
40
+ async def store(self, entry: MemoryEntry) -> StoreResult:
41
+ if entry.embedding is None:
42
+ entry.embedding = await self._embedding.embed(entry.content)
43
+
44
+ # Validate embedding dimensions
45
+ if len(entry.embedding) != self.config.embedding_dimensions:
46
+ return StoreResult(
47
+ success=False,
48
+ error=f"Invalid embedding dimension: expected {self.config.embedding_dimensions}, got {len(entry.embedding)}"
49
+ )
50
+
51
+ self._store[entry.id] = entry
52
+ return StoreResult(success=True, memory_id=entry.id)
53
+
54
+ async def recall(
55
+ self,
56
+ query_embedding: list[float],
57
+ limit: int = 10,
58
+ min_similarity: float = 0.7,
59
+ filters: Optional[dict[str, Any]] = None,
60
+ ) -> list[RecallResult]:
61
+ start = time.perf_counter()
62
+
63
+ results = []
64
+ for entry in self._store.values():
65
+ if entry.id in self._deleted:
66
+ continue
67
+ if entry.embedding is None:
68
+ continue
69
+
70
+ # Apply filters
71
+ if filters and not self._matches_filters(entry, filters):
72
+ continue
73
+
74
+ sim = self._embedding.similarity(query_embedding, entry.embedding)
75
+ if sim >= min_similarity:
76
+ results.append((entry, sim))
77
+
78
+ results.sort(key=lambda x: x[1], reverse=True)
79
+ elapsed_ms = (time.perf_counter() - start) * 1000
80
+
81
+ return [
82
+ RecallResult(memory=e, similarity_score=s, retrieval_time_ms=elapsed_ms)
83
+ for e, s in results[:limit]
84
+ ]
85
+
86
+ async def get(self, memory_id: str) -> Optional[MemoryEntry]:
87
+ if memory_id in self._deleted:
88
+ return None
89
+ return self._store.get(memory_id)
90
+
91
+ async def delete(self, memory_id: str) -> bool:
92
+ if memory_id in self._store:
93
+ self._deleted.add(memory_id)
94
+ return True
95
+ return False
96
+
97
+ async def list(
98
+ self,
99
+ limit: int = 100,
100
+ offset: int = 0,
101
+ filters: Optional[dict[str, Any]] = None,
102
+ ) -> list[MemoryEntry]:
103
+ entries = [
104
+ e for e in self._store.values()
105
+ if e.id not in self._deleted
106
+ ]
107
+
108
+ if filters:
109
+ entries = [e for e in entries if self._matches_filters(e, filters)]
110
+
111
+ return entries[offset:offset + limit]
112
+
113
+ async def count(self, filters: Optional[dict[str, Any]] = None) -> int:
114
+ entries = await self.list(limit=100000, filters=filters)
115
+ return len(entries)
116
+
117
+ def _matches_filters(self, entry: MemoryEntry, filters: dict[str, Any]) -> bool:
118
+ for key, value in filters.items():
119
+ if key == "tags":
120
+ if not any(t in entry.tags for t in value):
121
+ return False
122
+ elif key == "source_instance":
123
+ if entry.source_instance != value:
124
+ return False
125
+ elif key == "source_type":
126
+ if entry.source_type.value != value:
127
+ return False
128
+ return True