spatial-memory-mcp 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spatial-memory-mcp might be problematic. Click here for more details.
- spatial_memory/__init__.py +97 -0
- spatial_memory/__main__.py +270 -0
- spatial_memory/adapters/__init__.py +7 -0
- spatial_memory/adapters/lancedb_repository.py +878 -0
- spatial_memory/config.py +728 -0
- spatial_memory/core/__init__.py +118 -0
- spatial_memory/core/cache.py +317 -0
- spatial_memory/core/circuit_breaker.py +297 -0
- spatial_memory/core/connection_pool.py +220 -0
- spatial_memory/core/consolidation_strategies.py +402 -0
- spatial_memory/core/database.py +3069 -0
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +575 -0
- spatial_memory/core/db_migrations.py +584 -0
- spatial_memory/core/db_search.py +509 -0
- spatial_memory/core/db_versioning.py +177 -0
- spatial_memory/core/embeddings.py +557 -0
- spatial_memory/core/errors.py +317 -0
- spatial_memory/core/file_security.py +702 -0
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/health.py +289 -0
- spatial_memory/core/helpers.py +79 -0
- spatial_memory/core/import_security.py +432 -0
- spatial_memory/core/lifecycle_ops.py +1067 -0
- spatial_memory/core/logging.py +194 -0
- spatial_memory/core/metrics.py +192 -0
- spatial_memory/core/models.py +628 -0
- spatial_memory/core/rate_limiter.py +326 -0
- spatial_memory/core/response_types.py +497 -0
- spatial_memory/core/security.py +588 -0
- spatial_memory/core/spatial_ops.py +426 -0
- spatial_memory/core/tracing.py +300 -0
- spatial_memory/core/utils.py +110 -0
- spatial_memory/core/validation.py +403 -0
- spatial_memory/factory.py +407 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/__init__.py +11 -0
- spatial_memory/ports/repositories.py +631 -0
- spatial_memory/py.typed +0 -0
- spatial_memory/server.py +1141 -0
- spatial_memory/services/__init__.py +70 -0
- spatial_memory/services/export_import.py +1023 -0
- spatial_memory/services/lifecycle.py +1120 -0
- spatial_memory/services/memory.py +412 -0
- spatial_memory/services/spatial.py +1147 -0
- spatial_memory/services/utility.py +409 -0
- spatial_memory/tools/__init__.py +5 -0
- spatial_memory/tools/definitions.py +695 -0
- spatial_memory/verify.py +140 -0
- spatial_memory_mcp-1.6.1.dist-info/METADATA +499 -0
- spatial_memory_mcp-1.6.1.dist-info/RECORD +54 -0
- spatial_memory_mcp-1.6.1.dist-info/WHEEL +4 -0
- spatial_memory_mcp-1.6.1.dist-info/entry_points.txt +2 -0
- spatial_memory_mcp-1.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,878 @@
|
|
|
1
|
+
"""LanceDB repository adapter implementing MemoryRepositoryProtocol.
|
|
2
|
+
|
|
3
|
+
This adapter wraps the Database class to provide a clean interface
|
|
4
|
+
for the service layer, following Clean Architecture principles.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from collections.abc import Iterator
|
|
11
|
+
from dataclasses import asdict
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from spatial_memory.core.errors import MemoryNotFoundError, StorageError, ValidationError
|
|
18
|
+
from spatial_memory.core.models import Memory, MemoryResult, MemorySource
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from spatial_memory.core.database import Database
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LanceDBMemoryRepository:
|
|
27
|
+
"""Repository implementation using LanceDB.
|
|
28
|
+
|
|
29
|
+
Implements MemoryRepositoryProtocol for use with MemoryService.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, database: Database) -> None:
|
|
33
|
+
"""Initialize the repository.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
database: LanceDB database wrapper instance.
|
|
37
|
+
"""
|
|
38
|
+
self._db = database
|
|
39
|
+
|
|
40
|
+
def add(self, memory: Memory, vector: np.ndarray) -> str:
|
|
41
|
+
"""Add a memory with its embedding vector.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
memory: The Memory object to store.
|
|
45
|
+
vector: The embedding vector for the memory.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
The generated memory ID (UUID string).
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValidationError: If input validation fails.
|
|
52
|
+
StorageError: If database operation fails.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
return self._db.insert(
|
|
56
|
+
content=memory.content,
|
|
57
|
+
vector=vector,
|
|
58
|
+
namespace=memory.namespace,
|
|
59
|
+
tags=memory.tags,
|
|
60
|
+
importance=memory.importance,
|
|
61
|
+
source=memory.source.value,
|
|
62
|
+
metadata=memory.metadata,
|
|
63
|
+
)
|
|
64
|
+
except (ValidationError, StorageError):
|
|
65
|
+
raise
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Unexpected error in add: {e}")
|
|
68
|
+
raise StorageError(f"Failed to add memory: {e}") from e
|
|
69
|
+
|
|
70
|
+
def add_batch(
|
|
71
|
+
self,
|
|
72
|
+
memories: list[Memory],
|
|
73
|
+
vectors: list[np.ndarray],
|
|
74
|
+
) -> list[str]:
|
|
75
|
+
"""Add multiple memories efficiently.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
memories: List of Memory objects to store.
|
|
79
|
+
vectors: List of embedding vectors (same order as memories).
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
List of generated memory IDs.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ValidationError: If input validation fails.
|
|
86
|
+
StorageError: If database operation fails.
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
records = []
|
|
90
|
+
for memory, vector in zip(memories, vectors):
|
|
91
|
+
records.append({
|
|
92
|
+
"content": memory.content,
|
|
93
|
+
"vector": vector,
|
|
94
|
+
"namespace": memory.namespace,
|
|
95
|
+
"tags": memory.tags,
|
|
96
|
+
"importance": memory.importance,
|
|
97
|
+
"source": memory.source.value,
|
|
98
|
+
"metadata": memory.metadata,
|
|
99
|
+
})
|
|
100
|
+
return self._db.insert_batch(records)
|
|
101
|
+
except (ValidationError, StorageError):
|
|
102
|
+
raise
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.error(f"Unexpected error in add_batch: {e}")
|
|
105
|
+
raise StorageError(f"Failed to add batch: {e}") from e
|
|
106
|
+
|
|
107
|
+
def get(self, memory_id: str) -> Memory | None:
|
|
108
|
+
"""Get a memory by ID.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
memory_id: The memory UUID.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
The Memory object, or None if not found.
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
ValidationError: If memory_id is invalid.
|
|
118
|
+
StorageError: If database operation fails.
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
record = self._db.get(memory_id)
|
|
122
|
+
return self._record_to_memory(record)
|
|
123
|
+
except MemoryNotFoundError:
|
|
124
|
+
return None
|
|
125
|
+
except (ValidationError, StorageError):
|
|
126
|
+
raise
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.error(f"Unexpected error in get: {e}")
|
|
129
|
+
raise StorageError(f"Failed to get memory: {e}") from e
|
|
130
|
+
|
|
131
|
+
def get_with_vector(self, memory_id: str) -> tuple[Memory, np.ndarray] | None:
|
|
132
|
+
"""Get a memory and its vector by ID.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
memory_id: The memory UUID.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Tuple of (Memory, vector), or None if not found.
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ValidationError: If memory_id is invalid.
|
|
142
|
+
StorageError: If database operation fails.
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
record = self._db.get(memory_id)
|
|
146
|
+
memory = self._record_to_memory(record)
|
|
147
|
+
vector = np.array(record["vector"], dtype=np.float32)
|
|
148
|
+
return (memory, vector)
|
|
149
|
+
except MemoryNotFoundError:
|
|
150
|
+
return None
|
|
151
|
+
except (ValidationError, StorageError):
|
|
152
|
+
raise
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Unexpected error in get_with_vector: {e}")
|
|
155
|
+
raise StorageError(f"Failed to get memory with vector: {e}") from e
|
|
156
|
+
|
|
157
|
+
def delete(self, memory_id: str) -> bool:
|
|
158
|
+
"""Delete a memory by ID.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
memory_id: The memory UUID.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
True if deleted, False if not found.
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValidationError: If memory_id is invalid.
|
|
168
|
+
StorageError: If database operation fails.
|
|
169
|
+
"""
|
|
170
|
+
try:
|
|
171
|
+
self._db.delete(memory_id)
|
|
172
|
+
return True
|
|
173
|
+
except MemoryNotFoundError:
|
|
174
|
+
return False
|
|
175
|
+
except (ValidationError, StorageError):
|
|
176
|
+
raise
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.error(f"Unexpected error in delete: {e}")
|
|
179
|
+
raise StorageError(f"Failed to delete memory: {e}") from e
|
|
180
|
+
|
|
181
|
+
def delete_batch(self, memory_ids: list[str]) -> tuple[int, list[str]]:
|
|
182
|
+
"""Delete multiple memories atomically.
|
|
183
|
+
|
|
184
|
+
Delegates to Database.delete_batch for proper encapsulation.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
memory_ids: List of memory UUIDs to delete.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Tuple of (count_deleted, list_of_deleted_ids) where:
|
|
191
|
+
- count_deleted: Number of memories actually deleted
|
|
192
|
+
- list_of_deleted_ids: IDs that were actually deleted
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
ValidationError: If any memory_id is invalid.
|
|
196
|
+
StorageError: If database operation fails.
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
return self._db.delete_batch(memory_ids)
|
|
200
|
+
except (ValidationError, StorageError):
|
|
201
|
+
raise
|
|
202
|
+
except Exception as e:
|
|
203
|
+
logger.error(f"Unexpected error in delete_batch: {e}")
|
|
204
|
+
raise StorageError(f"Failed to delete batch: {e}") from e
|
|
205
|
+
|
|
206
|
+
def search(
|
|
207
|
+
self,
|
|
208
|
+
query_vector: np.ndarray,
|
|
209
|
+
limit: int = 5,
|
|
210
|
+
namespace: str | None = None,
|
|
211
|
+
include_vector: bool = False,
|
|
212
|
+
) -> list[MemoryResult]:
|
|
213
|
+
"""Search for similar memories by vector.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
query_vector: Query embedding vector.
|
|
217
|
+
limit: Maximum number of results.
|
|
218
|
+
namespace: Filter to specific namespace.
|
|
219
|
+
include_vector: Whether to include embedding vectors in results.
|
|
220
|
+
Defaults to False to reduce response size.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
List of MemoryResult objects with similarity scores.
|
|
224
|
+
If include_vector=True, each result includes its embedding vector.
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
ValidationError: If input validation fails.
|
|
228
|
+
StorageError: If database operation fails.
|
|
229
|
+
"""
|
|
230
|
+
try:
|
|
231
|
+
results = self._db.vector_search(
|
|
232
|
+
query_vector,
|
|
233
|
+
limit=limit,
|
|
234
|
+
namespace=namespace,
|
|
235
|
+
include_vector=include_vector,
|
|
236
|
+
)
|
|
237
|
+
return [self._record_to_memory_result(r) for r in results]
|
|
238
|
+
except (ValidationError, StorageError):
|
|
239
|
+
raise
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.error(f"Unexpected error in search: {e}")
|
|
242
|
+
raise StorageError(f"Failed to search: {e}") from e
|
|
243
|
+
|
|
244
|
+
def update_access(self, memory_id: str) -> None:
|
|
245
|
+
"""Update access timestamp and count for a memory.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
memory_id: The memory UUID.
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
ValidationError: If memory_id is invalid.
|
|
252
|
+
MemoryNotFoundError: If memory doesn't exist.
|
|
253
|
+
StorageError: If database operation fails.
|
|
254
|
+
"""
|
|
255
|
+
try:
|
|
256
|
+
self._db.update_access(memory_id)
|
|
257
|
+
except (ValidationError, MemoryNotFoundError, StorageError):
|
|
258
|
+
raise
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.error(f"Unexpected error in update_access: {e}")
|
|
261
|
+
raise StorageError(f"Failed to update access: {e}") from e
|
|
262
|
+
|
|
263
|
+
def update_access_batch(self, memory_ids: list[str]) -> int:
|
|
264
|
+
"""Update access timestamp and count for multiple memories.
|
|
265
|
+
|
|
266
|
+
Delegates to Database.update_access_batch for proper encapsulation.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
memory_ids: List of memory UUIDs.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Number of memories successfully updated.
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
ValidationError: If any memory_id is invalid.
|
|
276
|
+
StorageError: If database operation fails.
|
|
277
|
+
"""
|
|
278
|
+
try:
|
|
279
|
+
return self._db.update_access_batch(memory_ids)
|
|
280
|
+
except (ValidationError, StorageError):
|
|
281
|
+
raise
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.error(f"Unexpected error in update_access_batch: {e}")
|
|
284
|
+
raise StorageError(f"Batch access update failed: {e}") from e
|
|
285
|
+
|
|
286
|
+
def update(self, memory_id: str, updates: dict[str, Any]) -> None:
|
|
287
|
+
"""Update a memory's fields.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
memory_id: The memory UUID.
|
|
291
|
+
updates: Fields to update.
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
ValidationError: If input validation fails.
|
|
295
|
+
MemoryNotFoundError: If memory doesn't exist.
|
|
296
|
+
StorageError: If database operation fails.
|
|
297
|
+
"""
|
|
298
|
+
try:
|
|
299
|
+
self._db.update(memory_id, updates)
|
|
300
|
+
except (ValidationError, MemoryNotFoundError, StorageError):
|
|
301
|
+
raise
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.error(f"Unexpected error in update: {e}")
|
|
304
|
+
raise StorageError(f"Failed to update memory: {e}") from e
|
|
305
|
+
|
|
306
|
+
def get_batch(self, memory_ids: list[str]) -> dict[str, Memory]:
|
|
307
|
+
"""Get multiple memories by ID in a single query.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
memory_ids: List of memory UUIDs to retrieve.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dict mapping memory_id to Memory object. Missing IDs are not included.
|
|
314
|
+
|
|
315
|
+
Raises:
|
|
316
|
+
ValidationError: If any memory_id format is invalid.
|
|
317
|
+
StorageError: If database operation fails.
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
raw_results = self._db.get_batch(memory_ids)
|
|
321
|
+
result: dict[str, Memory] = {}
|
|
322
|
+
for memory_id, record in raw_results.items():
|
|
323
|
+
result[memory_id] = self._record_to_memory(record)
|
|
324
|
+
return result
|
|
325
|
+
except (ValidationError, StorageError):
|
|
326
|
+
raise
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logger.error(f"Unexpected error in get_batch: {e}")
|
|
329
|
+
raise StorageError(f"Failed to batch get memories: {e}") from e
|
|
330
|
+
|
|
331
|
+
def update_batch(
|
|
332
|
+
self, updates: list[tuple[str, dict[str, Any]]]
|
|
333
|
+
) -> tuple[int, list[str]]:
|
|
334
|
+
"""Update multiple memories in a single batch operation.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
updates: List of (memory_id, updates_dict) tuples.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Tuple of (success_count, list of failed memory_ids).
|
|
341
|
+
|
|
342
|
+
Raises:
|
|
343
|
+
StorageError: If database operation fails completely.
|
|
344
|
+
"""
|
|
345
|
+
try:
|
|
346
|
+
return self._db.update_batch(updates)
|
|
347
|
+
except StorageError:
|
|
348
|
+
raise
|
|
349
|
+
except Exception as e:
|
|
350
|
+
logger.error(f"Unexpected error in update_batch: {e}")
|
|
351
|
+
raise StorageError(f"Failed to batch update memories: {e}") from e
|
|
352
|
+
|
|
353
|
+
def count(self, namespace: str | None = None) -> int:
|
|
354
|
+
"""Count memories.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
namespace: Filter to specific namespace.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Number of memories.
|
|
361
|
+
|
|
362
|
+
Raises:
|
|
363
|
+
ValidationError: If namespace is invalid.
|
|
364
|
+
StorageError: If database operation fails.
|
|
365
|
+
"""
|
|
366
|
+
try:
|
|
367
|
+
return self._db.count(namespace=namespace)
|
|
368
|
+
except (ValidationError, StorageError):
|
|
369
|
+
raise
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.error(f"Unexpected error in count: {e}")
|
|
372
|
+
raise StorageError(f"Failed to count memories: {e}") from e
|
|
373
|
+
|
|
374
|
+
def get_namespaces(self) -> list[str]:
|
|
375
|
+
"""Get all unique namespaces.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
List of namespace names.
|
|
379
|
+
|
|
380
|
+
Raises:
|
|
381
|
+
StorageError: If database operation fails.
|
|
382
|
+
"""
|
|
383
|
+
try:
|
|
384
|
+
return self._db.get_namespaces()
|
|
385
|
+
except StorageError:
|
|
386
|
+
raise
|
|
387
|
+
except Exception as e:
|
|
388
|
+
logger.error(f"Unexpected error in get_namespaces: {e}")
|
|
389
|
+
raise StorageError(f"Failed to get namespaces: {e}") from e
|
|
390
|
+
|
|
391
|
+
def get_all(
|
|
392
|
+
self,
|
|
393
|
+
namespace: str | None = None,
|
|
394
|
+
limit: int | None = None,
|
|
395
|
+
) -> list[tuple[Memory, np.ndarray]]:
|
|
396
|
+
"""Get all memories with their vectors.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
namespace: Filter to specific namespace.
|
|
400
|
+
limit: Maximum number of results.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
List of (Memory, vector) tuples.
|
|
404
|
+
|
|
405
|
+
Raises:
|
|
406
|
+
ValidationError: If namespace is invalid.
|
|
407
|
+
StorageError: If database operation fails.
|
|
408
|
+
"""
|
|
409
|
+
try:
|
|
410
|
+
records = self._db.get_all(namespace=namespace, limit=limit)
|
|
411
|
+
results = []
|
|
412
|
+
for record in records:
|
|
413
|
+
memory = self._record_to_memory(record)
|
|
414
|
+
vector = np.array(record["vector"], dtype=np.float32)
|
|
415
|
+
results.append((memory, vector))
|
|
416
|
+
return results
|
|
417
|
+
except (ValidationError, StorageError):
|
|
418
|
+
raise
|
|
419
|
+
except Exception as e:
|
|
420
|
+
logger.error(f"Unexpected error in get_all: {e}")
|
|
421
|
+
raise StorageError(f"Failed to get all memories: {e}") from e
|
|
422
|
+
|
|
423
|
+
def hybrid_search(
|
|
424
|
+
self,
|
|
425
|
+
query_vector: np.ndarray,
|
|
426
|
+
query_text: str,
|
|
427
|
+
limit: int = 5,
|
|
428
|
+
namespace: str | None = None,
|
|
429
|
+
alpha: float = 0.5,
|
|
430
|
+
) -> list[MemoryResult]:
|
|
431
|
+
"""Search using both vector similarity and full-text search.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
query_vector: Query embedding vector.
|
|
435
|
+
query_text: Query text for FTS.
|
|
436
|
+
limit: Maximum results.
|
|
437
|
+
namespace: Optional namespace filter.
|
|
438
|
+
alpha: Balance between vector (1.0) and FTS (0.0).
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
List of matching memories ranked by combined score.
|
|
442
|
+
|
|
443
|
+
Raises:
|
|
444
|
+
ValidationError: If input validation fails.
|
|
445
|
+
StorageError: If database operation fails.
|
|
446
|
+
"""
|
|
447
|
+
try:
|
|
448
|
+
results = self._db.hybrid_search(
|
|
449
|
+
query=query_text,
|
|
450
|
+
query_vector=query_vector,
|
|
451
|
+
limit=limit,
|
|
452
|
+
namespace=namespace,
|
|
453
|
+
alpha=alpha,
|
|
454
|
+
)
|
|
455
|
+
return [self._record_to_memory_result(r) for r in results]
|
|
456
|
+
except (ValidationError, StorageError):
|
|
457
|
+
raise
|
|
458
|
+
except Exception as e:
|
|
459
|
+
logger.error(f"Unexpected error in hybrid_search: {e}")
|
|
460
|
+
raise StorageError(f"Failed to perform hybrid search: {e}") from e
|
|
461
|
+
|
|
462
|
+
def get_health_metrics(self) -> dict[str, Any]:
|
|
463
|
+
"""Get database health metrics.
|
|
464
|
+
|
|
465
|
+
Returns:
|
|
466
|
+
Dictionary with health metrics.
|
|
467
|
+
|
|
468
|
+
Raises:
|
|
469
|
+
StorageError: If database operation fails.
|
|
470
|
+
"""
|
|
471
|
+
try:
|
|
472
|
+
metrics = self._db.get_health_metrics()
|
|
473
|
+
return asdict(metrics)
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.error(f"Unexpected error in get_health_metrics: {e}")
|
|
476
|
+
raise StorageError(f"Failed to get health metrics: {e}") from e
|
|
477
|
+
|
|
478
|
+
def optimize(self) -> dict[str, Any]:
|
|
479
|
+
"""Run optimization and compaction.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Dictionary with optimization results.
|
|
483
|
+
|
|
484
|
+
Raises:
|
|
485
|
+
StorageError: If database operation fails.
|
|
486
|
+
"""
|
|
487
|
+
try:
|
|
488
|
+
return self._db.optimize()
|
|
489
|
+
except Exception as e:
|
|
490
|
+
logger.error(f"Unexpected error in optimize: {e}")
|
|
491
|
+
raise StorageError(f"Failed to optimize database: {e}") from e
|
|
492
|
+
|
|
493
|
+
def export_to_parquet(self, path: Path) -> int:
|
|
494
|
+
"""Export memories to Parquet file.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
path: Output file path.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
Number of records exported.
|
|
501
|
+
|
|
502
|
+
Raises:
|
|
503
|
+
StorageError: If export fails.
|
|
504
|
+
"""
|
|
505
|
+
try:
|
|
506
|
+
result = self._db.export_to_parquet(output_path=path)
|
|
507
|
+
rows_exported = result.get("rows_exported", 0)
|
|
508
|
+
if not isinstance(rows_exported, int):
|
|
509
|
+
raise StorageError("Invalid export result: rows_exported is not an integer")
|
|
510
|
+
return rows_exported
|
|
511
|
+
except StorageError:
|
|
512
|
+
raise
|
|
513
|
+
except Exception as e:
|
|
514
|
+
logger.error(f"Unexpected error in export_to_parquet: {e}")
|
|
515
|
+
raise StorageError(f"Failed to export to Parquet: {e}") from e
|
|
516
|
+
|
|
517
|
+
def import_from_parquet(
|
|
518
|
+
self,
|
|
519
|
+
path: Path,
|
|
520
|
+
namespace_override: str | None = None,
|
|
521
|
+
) -> int:
|
|
522
|
+
"""Import memories from Parquet file.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
path: Input file path.
|
|
526
|
+
namespace_override: Override namespace for imported memories.
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Number of records imported.
|
|
530
|
+
|
|
531
|
+
Raises:
|
|
532
|
+
ValidationError: If input validation fails.
|
|
533
|
+
StorageError: If import fails.
|
|
534
|
+
"""
|
|
535
|
+
try:
|
|
536
|
+
result = self._db.import_from_parquet(
|
|
537
|
+
parquet_path=path,
|
|
538
|
+
namespace_override=namespace_override,
|
|
539
|
+
)
|
|
540
|
+
rows_imported = result.get("rows_imported", 0)
|
|
541
|
+
if not isinstance(rows_imported, int):
|
|
542
|
+
raise StorageError("Invalid import result: rows_imported is not an integer")
|
|
543
|
+
return rows_imported
|
|
544
|
+
except (ValidationError, StorageError):
|
|
545
|
+
raise
|
|
546
|
+
except Exception as e:
|
|
547
|
+
logger.error(f"Unexpected error in import_from_parquet: {e}")
|
|
548
|
+
raise StorageError(f"Failed to import from Parquet: {e}") from e
|
|
549
|
+
|
|
550
|
+
def _record_to_memory(self, record: dict[str, Any]) -> Memory:
|
|
551
|
+
"""Convert a database record to a Memory object.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
record: Dictionary from database.
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
Memory object.
|
|
558
|
+
"""
|
|
559
|
+
# Handle source enum
|
|
560
|
+
source_value = record.get("source", "manual")
|
|
561
|
+
try:
|
|
562
|
+
source = MemorySource(source_value)
|
|
563
|
+
except ValueError:
|
|
564
|
+
source = MemorySource.MANUAL
|
|
565
|
+
|
|
566
|
+
return Memory(
|
|
567
|
+
id=record["id"],
|
|
568
|
+
content=record["content"],
|
|
569
|
+
created_at=record["created_at"],
|
|
570
|
+
updated_at=record["updated_at"],
|
|
571
|
+
last_accessed=record["last_accessed"],
|
|
572
|
+
access_count=record["access_count"],
|
|
573
|
+
importance=record["importance"],
|
|
574
|
+
namespace=record["namespace"],
|
|
575
|
+
tags=record.get("tags", []),
|
|
576
|
+
source=source,
|
|
577
|
+
metadata=record.get("metadata", {}),
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
def _record_to_memory_result(self, record: dict[str, Any]) -> MemoryResult:
|
|
581
|
+
"""Convert a search result record to a MemoryResult object.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
record: Dictionary from database search.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
MemoryResult object.
|
|
588
|
+
"""
|
|
589
|
+
# Clamp similarity to valid range [0, 1]
|
|
590
|
+
# Cosine distance can sometimes produce values slightly outside this range
|
|
591
|
+
similarity = record.get("similarity", 0.0)
|
|
592
|
+
similarity = max(0.0, min(1.0, similarity))
|
|
593
|
+
|
|
594
|
+
# Include vector if present in record (when include_vector=True in search)
|
|
595
|
+
vector = None
|
|
596
|
+
if "vector" in record and record["vector"] is not None:
|
|
597
|
+
# Convert to list for JSON serialization
|
|
598
|
+
vec = record["vector"]
|
|
599
|
+
vector = vec.tolist() if hasattr(vec, "tolist") else list(vec)
|
|
600
|
+
|
|
601
|
+
return MemoryResult(
|
|
602
|
+
id=record["id"],
|
|
603
|
+
content=record["content"],
|
|
604
|
+
similarity=similarity,
|
|
605
|
+
namespace=record["namespace"],
|
|
606
|
+
tags=record.get("tags", []),
|
|
607
|
+
importance=record["importance"],
|
|
608
|
+
created_at=record["created_at"],
|
|
609
|
+
metadata=record.get("metadata", {}),
|
|
610
|
+
vector=vector,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# ========================================================================
|
|
614
|
+
# Spatial Operations (Phase 4B)
|
|
615
|
+
# ========================================================================
|
|
616
|
+
|
|
617
|
+
def get_vectors_for_clustering(
|
|
618
|
+
self,
|
|
619
|
+
namespace: str | None = None,
|
|
620
|
+
max_memories: int = 10_000,
|
|
621
|
+
) -> tuple[list[str], np.ndarray]:
|
|
622
|
+
"""Extract memory IDs and vectors efficiently for clustering.
|
|
623
|
+
|
|
624
|
+
Optimized for memory efficiency with large datasets. Used by
|
|
625
|
+
spatial operations like HDBSCAN clustering for region detection.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
namespace: Filter to specific namespace.
|
|
629
|
+
max_memories: Maximum memories to fetch.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
Tuple of (memory_ids, vectors_array) where vectors_array
|
|
633
|
+
is a 2D numpy array of shape (n_memories, embedding_dim).
|
|
634
|
+
|
|
635
|
+
Raises:
|
|
636
|
+
ValidationError: If input validation fails.
|
|
637
|
+
StorageError: If database operation fails.
|
|
638
|
+
"""
|
|
639
|
+
try:
|
|
640
|
+
return self._db.get_vectors_for_clustering(
|
|
641
|
+
namespace=namespace,
|
|
642
|
+
max_memories=max_memories,
|
|
643
|
+
)
|
|
644
|
+
except (ValidationError, StorageError):
|
|
645
|
+
raise
|
|
646
|
+
except Exception as e:
|
|
647
|
+
logger.error(f"Unexpected error in get_vectors_for_clustering: {e}")
|
|
648
|
+
raise StorageError(f"Failed to get vectors for clustering: {e}") from e
|
|
649
|
+
|
|
650
|
+
def batch_vector_search(
|
|
651
|
+
self,
|
|
652
|
+
query_vectors: list[np.ndarray],
|
|
653
|
+
limit_per_query: int = 3,
|
|
654
|
+
namespace: str | None = None,
|
|
655
|
+
include_vector: bool = False,
|
|
656
|
+
) -> list[list[dict[str, Any]]]:
|
|
657
|
+
"""Search for memories near multiple query points.
|
|
658
|
+
|
|
659
|
+
Efficient for operations like journey interpolation where multiple
|
|
660
|
+
points need to find nearby memories. Uses parallel execution when
|
|
661
|
+
beneficial.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
query_vectors: List of query embedding vectors.
|
|
665
|
+
limit_per_query: Maximum results per query vector.
|
|
666
|
+
namespace: Filter to specific namespace.
|
|
667
|
+
include_vector: Whether to include embedding vectors in results.
|
|
668
|
+
Defaults to False to reduce response size.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
List of result lists (one per query vector). Each result
|
|
672
|
+
is a dict containing memory fields and similarity score.
|
|
673
|
+
If include_vector=True, each dict includes the 'vector' field.
|
|
674
|
+
|
|
675
|
+
Raises:
|
|
676
|
+
ValidationError: If input validation fails.
|
|
677
|
+
StorageError: If database operation fails.
|
|
678
|
+
"""
|
|
679
|
+
try:
|
|
680
|
+
return self._db.batch_vector_search(
|
|
681
|
+
query_vectors=query_vectors,
|
|
682
|
+
limit_per_query=limit_per_query,
|
|
683
|
+
namespace=namespace,
|
|
684
|
+
include_vector=include_vector,
|
|
685
|
+
)
|
|
686
|
+
except (ValidationError, StorageError):
|
|
687
|
+
raise
|
|
688
|
+
except Exception as e:
|
|
689
|
+
logger.error(f"Unexpected error in batch_vector_search: {e}")
|
|
690
|
+
raise StorageError(f"Failed to perform batch vector search: {e}") from e
|
|
691
|
+
|
|
692
|
+
def vector_search(
|
|
693
|
+
self,
|
|
694
|
+
query_vector: np.ndarray,
|
|
695
|
+
limit: int = 5,
|
|
696
|
+
namespace: str | None = None,
|
|
697
|
+
) -> list[dict[str, Any]]:
|
|
698
|
+
"""Search for similar memories by vector (returns raw dict).
|
|
699
|
+
|
|
700
|
+
Lower-level search that returns raw dictionary results instead
|
|
701
|
+
of MemoryResult objects. Useful for spatial operations that need
|
|
702
|
+
direct access to all fields including vectors.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
query_vector: Query embedding vector.
|
|
706
|
+
limit: Maximum number of results.
|
|
707
|
+
namespace: Filter to specific namespace.
|
|
708
|
+
|
|
709
|
+
Returns:
|
|
710
|
+
List of memory records as dictionaries with similarity scores.
|
|
711
|
+
|
|
712
|
+
Raises:
|
|
713
|
+
ValidationError: If input validation fails.
|
|
714
|
+
StorageError: If database operation fails.
|
|
715
|
+
"""
|
|
716
|
+
try:
|
|
717
|
+
return self._db.vector_search(
|
|
718
|
+
query_vector=query_vector,
|
|
719
|
+
limit=limit,
|
|
720
|
+
namespace=namespace,
|
|
721
|
+
)
|
|
722
|
+
except (ValidationError, StorageError):
|
|
723
|
+
raise
|
|
724
|
+
except Exception as e:
|
|
725
|
+
logger.error(f"Unexpected error in vector_search: {e}")
|
|
726
|
+
raise StorageError(f"Failed to perform vector search: {e}") from e
|
|
727
|
+
|
|
728
|
+
# ========================================================================
|
|
729
|
+
# Phase 5 Protocol Extensions: Utility & Export/Import Operations
|
|
730
|
+
# ========================================================================
|
|
731
|
+
|
|
732
|
+
def delete_by_namespace(self, namespace: str) -> int:
|
|
733
|
+
"""Delete all memories in a namespace.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
namespace: The namespace whose memories should be deleted.
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
Number of memories deleted.
|
|
740
|
+
|
|
741
|
+
Raises:
|
|
742
|
+
ValidationError: If namespace is invalid.
|
|
743
|
+
StorageError: If database operation fails.
|
|
744
|
+
"""
|
|
745
|
+
try:
|
|
746
|
+
return self._db.delete_by_namespace(namespace)
|
|
747
|
+
except (ValidationError, StorageError):
|
|
748
|
+
raise
|
|
749
|
+
except Exception as e:
|
|
750
|
+
logger.error(f"Unexpected error in delete_by_namespace: {e}")
|
|
751
|
+
raise StorageError(f"Failed to delete namespace: {e}") from e
|
|
752
|
+
|
|
753
|
+
def rename_namespace(self, old_namespace: str, new_namespace: str) -> int:
|
|
754
|
+
"""Rename all memories from one namespace to another.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
old_namespace: The current namespace name (source).
|
|
758
|
+
new_namespace: The new namespace name (target).
|
|
759
|
+
|
|
760
|
+
Returns:
|
|
761
|
+
Number of memories renamed.
|
|
762
|
+
|
|
763
|
+
Raises:
|
|
764
|
+
ValidationError: If namespace names are invalid.
|
|
765
|
+
NamespaceNotFoundError: If old_namespace doesn't exist.
|
|
766
|
+
StorageError: If database operation fails.
|
|
767
|
+
"""
|
|
768
|
+
from spatial_memory.core.errors import NamespaceNotFoundError
|
|
769
|
+
|
|
770
|
+
try:
|
|
771
|
+
return self._db.rename_namespace(old_namespace, new_namespace)
|
|
772
|
+
except (ValidationError, NamespaceNotFoundError, StorageError):
|
|
773
|
+
raise
|
|
774
|
+
except Exception as e:
|
|
775
|
+
logger.error(f"Unexpected error in rename_namespace: {e}")
|
|
776
|
+
raise StorageError(f"Failed to rename namespace: {e}") from e
|
|
777
|
+
|
|
778
|
+
def get_stats(self, namespace: str | None = None) -> dict[str, Any]:
|
|
779
|
+
"""Get comprehensive database statistics.
|
|
780
|
+
|
|
781
|
+
Args:
|
|
782
|
+
namespace: Filter statistics to a specific namespace.
|
|
783
|
+
If None, returns statistics for all namespaces.
|
|
784
|
+
|
|
785
|
+
Returns:
|
|
786
|
+
Dictionary containing statistics.
|
|
787
|
+
|
|
788
|
+
Raises:
|
|
789
|
+
ValidationError: If namespace is invalid.
|
|
790
|
+
StorageError: If database operation fails.
|
|
791
|
+
"""
|
|
792
|
+
try:
|
|
793
|
+
return self._db.get_stats(namespace)
|
|
794
|
+
except (ValidationError, StorageError):
|
|
795
|
+
raise
|
|
796
|
+
except Exception as e:
|
|
797
|
+
logger.error(f"Unexpected error in get_stats: {e}")
|
|
798
|
+
raise StorageError(f"Failed to get stats: {e}") from e
|
|
799
|
+
|
|
800
|
+
def get_namespace_stats(self, namespace: str) -> dict[str, Any]:
|
|
801
|
+
"""Get statistics for a specific namespace.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
namespace: The namespace to get statistics for.
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
Dictionary containing namespace statistics.
|
|
808
|
+
|
|
809
|
+
Raises:
|
|
810
|
+
ValidationError: If namespace is invalid.
|
|
811
|
+
NamespaceNotFoundError: If namespace doesn't exist.
|
|
812
|
+
StorageError: If database operation fails.
|
|
813
|
+
"""
|
|
814
|
+
from spatial_memory.core.errors import NamespaceNotFoundError
|
|
815
|
+
|
|
816
|
+
try:
|
|
817
|
+
return self._db.get_namespace_stats(namespace)
|
|
818
|
+
except (ValidationError, NamespaceNotFoundError, StorageError):
|
|
819
|
+
raise
|
|
820
|
+
except Exception as e:
|
|
821
|
+
logger.error(f"Unexpected error in get_namespace_stats: {e}")
|
|
822
|
+
raise StorageError(f"Failed to get namespace stats: {e}") from e
|
|
823
|
+
|
|
824
|
+
def get_all_for_export(
|
|
825
|
+
self,
|
|
826
|
+
namespace: str | None = None,
|
|
827
|
+
batch_size: int = 1000,
|
|
828
|
+
) -> Iterator[list[dict[str, Any]]]:
|
|
829
|
+
"""Stream all memories for export in batches.
|
|
830
|
+
|
|
831
|
+
Args:
|
|
832
|
+
namespace: Filter to a specific namespace.
|
|
833
|
+
If None, exports all namespaces.
|
|
834
|
+
batch_size: Number of records per yielded batch.
|
|
835
|
+
|
|
836
|
+
Yields:
|
|
837
|
+
Batches of memory dictionaries.
|
|
838
|
+
|
|
839
|
+
Raises:
|
|
840
|
+
ValidationError: If namespace is invalid.
|
|
841
|
+
StorageError: If database operation fails.
|
|
842
|
+
"""
|
|
843
|
+
try:
|
|
844
|
+
yield from self._db.get_all_for_export(namespace, batch_size)
|
|
845
|
+
except (ValidationError, StorageError):
|
|
846
|
+
raise
|
|
847
|
+
except Exception as e:
|
|
848
|
+
logger.error(f"Unexpected error in get_all_for_export: {e}")
|
|
849
|
+
raise StorageError(f"Failed to export: {e}") from e
|
|
850
|
+
|
|
851
|
+
def bulk_import(
|
|
852
|
+
self,
|
|
853
|
+
records: Iterator[dict[str, Any]],
|
|
854
|
+
batch_size: int = 1000,
|
|
855
|
+
namespace_override: str | None = None,
|
|
856
|
+
) -> tuple[int, list[str]]:
|
|
857
|
+
"""Import memories from an iterator of records.
|
|
858
|
+
|
|
859
|
+
Args:
|
|
860
|
+
records: Iterator of memory dictionaries.
|
|
861
|
+
batch_size: Number of records per database insert batch.
|
|
862
|
+
namespace_override: If provided, overrides the namespace
|
|
863
|
+
field for all imported records.
|
|
864
|
+
|
|
865
|
+
Returns:
|
|
866
|
+
Tuple of (records_imported, list_of_new_ids).
|
|
867
|
+
|
|
868
|
+
Raises:
|
|
869
|
+
ValidationError: If records contain invalid data.
|
|
870
|
+
StorageError: If database operation fails.
|
|
871
|
+
"""
|
|
872
|
+
try:
|
|
873
|
+
return self._db.bulk_import(records, batch_size, namespace_override)
|
|
874
|
+
except (ValidationError, StorageError):
|
|
875
|
+
raise
|
|
876
|
+
except Exception as e:
|
|
877
|
+
logger.error(f"Unexpected error in bulk_import: {e}")
|
|
878
|
+
raise StorageError(f"Failed to bulk import: {e}") from e
|