spatial-memory-mcp 1.0.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (39) hide show
  1. spatial_memory/__init__.py +97 -97
  2. spatial_memory/__main__.py +241 -2
  3. spatial_memory/adapters/lancedb_repository.py +74 -5
  4. spatial_memory/config.py +115 -2
  5. spatial_memory/core/__init__.py +35 -0
  6. spatial_memory/core/cache.py +317 -0
  7. spatial_memory/core/circuit_breaker.py +297 -0
  8. spatial_memory/core/connection_pool.py +41 -3
  9. spatial_memory/core/consolidation_strategies.py +402 -0
  10. spatial_memory/core/database.py +791 -769
  11. spatial_memory/core/db_idempotency.py +242 -0
  12. spatial_memory/core/db_indexes.py +575 -0
  13. spatial_memory/core/db_migrations.py +584 -0
  14. spatial_memory/core/db_search.py +509 -0
  15. spatial_memory/core/db_versioning.py +177 -0
  16. spatial_memory/core/embeddings.py +156 -19
  17. spatial_memory/core/errors.py +75 -3
  18. spatial_memory/core/filesystem.py +178 -0
  19. spatial_memory/core/logging.py +194 -103
  20. spatial_memory/core/models.py +4 -0
  21. spatial_memory/core/rate_limiter.py +326 -105
  22. spatial_memory/core/response_types.py +497 -0
  23. spatial_memory/core/tracing.py +300 -0
  24. spatial_memory/core/validation.py +403 -319
  25. spatial_memory/factory.py +407 -0
  26. spatial_memory/migrations/__init__.py +40 -0
  27. spatial_memory/ports/repositories.py +52 -2
  28. spatial_memory/server.py +329 -188
  29. spatial_memory/services/export_import.py +61 -43
  30. spatial_memory/services/lifecycle.py +397 -122
  31. spatial_memory/services/memory.py +81 -4
  32. spatial_memory/services/spatial.py +129 -46
  33. spatial_memory/tools/definitions.py +695 -671
  34. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
  35. spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
  36. spatial_memory_mcp-1.0.3.dist-info/RECORD +0 -41
  37. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
  38. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
  39. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,575 @@
1
+ """Index management for LanceDB database.
2
+
3
+ Provides vector, FTS, and scalar index creation and management.
4
+
5
+ This module is part of the database.py refactoring to separate concerns:
6
+ - IndexManager handles all index-related operations
7
+ - Database class delegates to IndexManager for these operations
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import math
14
+ import time
15
+ from typing import TYPE_CHECKING, Any, Protocol
16
+
17
+ from spatial_memory.core.errors import StorageError
18
+
19
+ if TYPE_CHECKING:
20
+ from lancedb.table import Table as LanceTable
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # All known vector index types for detection
25
+ VECTOR_INDEX_TYPES = frozenset({
26
+ "IVF_PQ", "IVF_FLAT", "HNSW",
27
+ "IVF_HNSW_PQ", "IVF_HNSW_SQ",
28
+ "HNSW_PQ", "HNSW_SQ",
29
+ })
30
+
31
+
32
+ def _get_index_attr(idx: Any, attr: str, default: Any = None) -> Any:
33
+ """Get an attribute from an index object (handles both dict and IndexConfig).
34
+
35
+ LanceDB 0.27+ returns IndexConfig objects, while older versions use dicts.
36
+
37
+ Args:
38
+ idx: Index object (dict or IndexConfig).
39
+ attr: Attribute name to retrieve.
40
+ default: Default value if attribute not found.
41
+
42
+ Returns:
43
+ The attribute value or default.
44
+ """
45
+ if isinstance(idx, dict):
46
+ return idx.get(attr, default)
47
+ return getattr(idx, attr, default)
48
+
49
+
50
+ class IndexManagerProtocol(Protocol):
51
+ """Protocol defining what IndexManager needs from Database.
52
+
53
+ This protocol enables loose coupling between IndexManager and Database,
54
+ preventing circular imports while maintaining type safety.
55
+ """
56
+
57
+ @property
58
+ def table(self) -> LanceTable:
59
+ """Access to the LanceDB table."""
60
+ ...
61
+
62
+ # Configuration properties
63
+ @property
64
+ def enable_fts(self) -> bool:
65
+ """Whether FTS is enabled."""
66
+ ...
67
+
68
+ @property
69
+ def fts_language(self) -> str:
70
+ """FTS language."""
71
+ ...
72
+
73
+ @property
74
+ def fts_stem(self) -> bool:
75
+ """FTS stemming enabled."""
76
+ ...
77
+
78
+ @property
79
+ def fts_remove_stop_words(self) -> bool:
80
+ """FTS stop words removal enabled."""
81
+ ...
82
+
83
+ @property
84
+ def index_type(self) -> str:
85
+ """Vector index type."""
86
+ ...
87
+
88
+ @property
89
+ def vector_index_threshold(self) -> int:
90
+ """Row count threshold for vector index."""
91
+ ...
92
+
93
+ @property
94
+ def auto_create_indexes(self) -> bool:
95
+ """Auto-create indexes when thresholds met."""
96
+ ...
97
+
98
+ @property
99
+ def hnsw_m(self) -> int:
100
+ """HNSW M parameter."""
101
+ ...
102
+
103
+ @property
104
+ def hnsw_ef_construction(self) -> int:
105
+ """HNSW ef_construction parameter."""
106
+ ...
107
+
108
+ @property
109
+ def index_wait_timeout_seconds(self) -> float:
110
+ """Timeout for waiting on index creation."""
111
+ ...
112
+
113
+ @property
114
+ def embedding_dim(self) -> int:
115
+ """Embedding dimension."""
116
+ ...
117
+
118
+
119
+ class IndexManager:
120
+ """Manages vector, FTS, and scalar indexes.
121
+
122
+ Handles index creation, detection, and optimization for
123
+ LanceDB tables.
124
+
125
+ Example:
126
+ index_mgr = IndexManager(database)
127
+ index_mgr.ensure_indexes()
128
+ if not index_mgr.has_vector_index:
129
+ index_mgr.create_vector_index()
130
+ """
131
+
132
+ def __init__(self, db: IndexManagerProtocol) -> None:
133
+ """Initialize the index manager.
134
+
135
+ Args:
136
+ db: Database instance providing table and config access.
137
+ """
138
+ self._db = db
139
+ self._has_vector_index: bool | None = None
140
+ self._has_fts_index: bool | None = None
141
+ self._has_scalar_indexes: bool = False
142
+
143
+ @property
144
+ def has_vector_index(self) -> bool | None:
145
+ """Whether vector index exists."""
146
+ return self._has_vector_index
147
+
148
+ @has_vector_index.setter
149
+ def has_vector_index(self, value: bool | None) -> None:
150
+ self._has_vector_index = value
151
+
152
+ @property
153
+ def has_fts_index(self) -> bool | None:
154
+ """Whether FTS index exists."""
155
+ return self._has_fts_index
156
+
157
+ @has_fts_index.setter
158
+ def has_fts_index(self, value: bool | None) -> None:
159
+ self._has_fts_index = value
160
+
161
+ @property
162
+ def has_scalar_indexes(self) -> bool:
163
+ """Whether scalar indexes exist."""
164
+ return self._has_scalar_indexes
165
+
166
+ @has_scalar_indexes.setter
167
+ def has_scalar_indexes(self, value: bool) -> None:
168
+ self._has_scalar_indexes = value
169
+
170
+ def reset_index_state(self) -> None:
171
+ """Reset all index state flags."""
172
+ self._has_vector_index = None
173
+ self._has_fts_index = None
174
+ self._has_scalar_indexes = False
175
+
176
+ def check_existing_indexes(self) -> None:
177
+ """Check which indexes already exist using robust detection."""
178
+ try:
179
+ indices = self._db.table.list_indices()
180
+
181
+ self._has_vector_index = False
182
+ self._has_fts_index = False
183
+
184
+ for idx in indices:
185
+ index_name = str(_get_index_attr(idx, "name", "")).lower()
186
+ index_type = str(_get_index_attr(idx, "index_type", "")).upper()
187
+ columns = _get_index_attr(idx, "columns", [])
188
+
189
+ # Vector index detection: check index_type or column name
190
+ if index_type in VECTOR_INDEX_TYPES:
191
+ self._has_vector_index = True
192
+ elif "vector" in columns or "vector" in index_name:
193
+ self._has_vector_index = True
194
+
195
+ # FTS index detection: check index_type or name patterns
196
+ if index_type == "FTS":
197
+ self._has_fts_index = True
198
+ elif "fts" in index_name or "content" in index_name:
199
+ self._has_fts_index = True
200
+
201
+ logger.debug(
202
+ f"Existing indexes: vector={self._has_vector_index}, "
203
+ f"fts={self._has_fts_index}"
204
+ )
205
+ except Exception as e:
206
+ logger.warning(f"Could not check existing indexes: {e}")
207
+ self._has_vector_index = None
208
+ self._has_fts_index = None
209
+
210
+ def create_fts_index(self) -> None:
211
+ """Create full-text search index with optimized settings."""
212
+ try:
213
+ self._db.table.create_fts_index(
214
+ "content",
215
+ use_tantivy=False, # Use Lance native FTS
216
+ language=self._db.fts_language,
217
+ stem=self._db.fts_stem,
218
+ remove_stop_words=self._db.fts_remove_stop_words,
219
+ with_position=True, # Enable phrase queries
220
+ lower_case=True, # Case-insensitive search
221
+ )
222
+ self._has_fts_index = True
223
+ logger.info(
224
+ f"Created FTS index with stemming={self._db.fts_stem}, "
225
+ f"stop_words={self._db.fts_remove_stop_words}"
226
+ )
227
+ except Exception as e:
228
+ # Check if index already exists (not an error)
229
+ if "already exists" in str(e).lower():
230
+ self._has_fts_index = True
231
+ logger.debug("FTS index already exists")
232
+ else:
233
+ logger.warning(f"FTS index creation failed: {e}")
234
+
235
+ def create_vector_index(self, force: bool = False) -> bool:
236
+ """Create vector index for similarity search.
237
+
238
+ Supports IVF_PQ, IVF_FLAT, and HNSW_SQ index types based on configuration.
239
+ Automatically determines optimal parameters based on dataset size.
240
+
241
+ Args:
242
+ force: Force index creation regardless of dataset size.
243
+
244
+ Returns:
245
+ True if index was created, False if skipped.
246
+
247
+ Raises:
248
+ StorageError: If index creation fails.
249
+ """
250
+ count = self._db.table.count_rows()
251
+
252
+ # Check threshold
253
+ if count < self._db.vector_index_threshold and not force:
254
+ logger.info(
255
+ f"Dataset has {count} rows, below threshold {self._db.vector_index_threshold}. "
256
+ "Skipping vector index creation."
257
+ )
258
+ return False
259
+
260
+ # Check if already exists
261
+ if self._has_vector_index and not force:
262
+ logger.info("Vector index already exists")
263
+ return False
264
+
265
+ # Handle HNSW_SQ index type
266
+ if self._db.index_type == "HNSW_SQ":
267
+ return self._create_hnsw_index(count)
268
+
269
+ # IVF-based index creation (IVF_PQ or IVF_FLAT)
270
+ return self._create_ivf_index(count)
271
+
272
+ def _create_hnsw_index(self, count: int) -> bool:
273
+ """Create HNSW-SQ vector index.
274
+
275
+ HNSW (Hierarchical Navigable Small World) provides better recall than IVF
276
+ at the cost of higher memory usage. Good for datasets where recall is critical.
277
+
278
+ Args:
279
+ count: Number of rows in the table.
280
+
281
+ Returns:
282
+ True if index was created.
283
+
284
+ Raises:
285
+ StorageError: If index creation fails.
286
+ """
287
+ logger.info(
288
+ f"Creating HNSW_SQ vector index: m={self._db.hnsw_m}, "
289
+ f"ef_construction={self._db.hnsw_ef_construction} for {count} rows"
290
+ )
291
+
292
+ try:
293
+ self._db.table.create_index(
294
+ metric="cosine",
295
+ vector_column_name="vector",
296
+ index_type="HNSW_SQ",
297
+ replace=True,
298
+ m=self._db.hnsw_m,
299
+ ef_construction=self._db.hnsw_ef_construction,
300
+ )
301
+
302
+ # Wait for index to be ready with configurable timeout
303
+ self._wait_for_index_ready("vector", self._db.index_wait_timeout_seconds)
304
+
305
+ self._has_vector_index = True
306
+ logger.info("HNSW_SQ vector index created successfully")
307
+
308
+ # Optimize after index creation (may fail in some environments)
309
+ try:
310
+ self._db.table.optimize()
311
+ except Exception as optimize_error:
312
+ logger.debug(f"Optimization after index creation skipped: {optimize_error}")
313
+
314
+ return True
315
+
316
+ except Exception as e:
317
+ logger.error(f"Failed to create HNSW_SQ vector index: {e}")
318
+ raise StorageError(f"HNSW_SQ vector index creation failed: {e}") from e
319
+
320
+ def _create_ivf_index(self, count: int) -> bool:
321
+ """Create IVF-PQ or IVF-FLAT vector index.
322
+
323
+ Uses sqrt rule for partitions: num_partitions = sqrt(count), clamped to [16, 4096].
324
+ Uses 48 sub-vectors for <500K rows (8 dims each for 384-dim vectors),
325
+ 96 sub-vectors for >=500K rows (4 dims each).
326
+
327
+ Args:
328
+ count: Number of rows in the table.
329
+
330
+ Returns:
331
+ True if index was created.
332
+
333
+ Raises:
334
+ StorageError: If index creation fails.
335
+ """
336
+ # Use sqrt rule for partitions, clamped to [16, 4096]
337
+ num_partitions = int(math.sqrt(count))
338
+ num_partitions = max(16, min(num_partitions, 4096))
339
+
340
+ # Choose num_sub_vectors based on dataset size
341
+ # <500K: 48 sub-vectors (8 dims each for 384-dim, more precision)
342
+ # >=500K: 96 sub-vectors (4 dims each, more compression)
343
+ if count < 500_000:
344
+ num_sub_vectors = 48
345
+ else:
346
+ num_sub_vectors = 96
347
+
348
+ # Validate embedding_dim % num_sub_vectors == 0 (required for IVF-PQ)
349
+ if self._db.embedding_dim % num_sub_vectors != 0:
350
+ # Find a valid divisor from common sub-vector counts
351
+ valid_divisors = [96, 48, 32, 24, 16, 12, 8, 4]
352
+ found_divisor = False
353
+ for divisor in valid_divisors:
354
+ if self._db.embedding_dim % divisor == 0:
355
+ logger.info(
356
+ f"Adjusted num_sub_vectors from {num_sub_vectors} to {divisor} "
357
+ f"for embedding_dim={self._db.embedding_dim}"
358
+ )
359
+ num_sub_vectors = divisor
360
+ found_divisor = True
361
+ break
362
+
363
+ if not found_divisor:
364
+ raise StorageError(
365
+ f"Cannot create IVF-PQ index: embedding_dim={self._db.embedding_dim} "
366
+ "has no suitable divisor for sub-vectors. "
367
+ f"Tried divisors: {valid_divisors}"
368
+ )
369
+
370
+ # IVF-PQ requires minimum rows for training (sample_rate * num_partitions / 256)
371
+ # Default sample_rate=256, so we need at least 256 rows
372
+ # Also, IVF requires num_partitions < num_vectors for KMeans training
373
+ sample_rate = 256 # default
374
+ if count < 256:
375
+ # Use IVF_FLAT for very small datasets (no PQ training required)
376
+ logger.info(
377
+ f"Dataset too small for IVF-PQ ({count} rows < 256). "
378
+ "Using IVF_FLAT index instead."
379
+ )
380
+ index_type = "IVF_FLAT"
381
+ sample_rate = max(16, count // 4) # Lower sample rate for small data
382
+ else:
383
+ index_type = self._db.index_type if self._db.index_type in ("IVF_PQ", "IVF_FLAT") else "IVF_PQ"
384
+
385
+ # Ensure num_partitions < num_vectors for KMeans clustering
386
+ if num_partitions >= count:
387
+ num_partitions = max(1, count // 4) # Use 1/4 of count, minimum 1
388
+ logger.info(f"Adjusted num_partitions to {num_partitions} for {count} rows")
389
+
390
+ logger.info(
391
+ f"Creating {index_type} vector index: {num_partitions} partitions, "
392
+ f"{num_sub_vectors} sub-vectors for {count} rows"
393
+ )
394
+
395
+ try:
396
+ # LanceDB 0.27+ API: parameters passed directly to create_index
397
+ index_kwargs: dict[str, Any] = {
398
+ "metric": "cosine",
399
+ "num_partitions": num_partitions,
400
+ "vector_column_name": "vector",
401
+ "index_type": index_type,
402
+ "replace": True,
403
+ "sample_rate": sample_rate,
404
+ }
405
+
406
+ # num_sub_vectors only applies to PQ-based indexes
407
+ if "PQ" in index_type:
408
+ index_kwargs["num_sub_vectors"] = num_sub_vectors
409
+
410
+ self._db.table.create_index(**index_kwargs)
411
+
412
+ # Wait for index to be ready with configurable timeout
413
+ self._wait_for_index_ready("vector", self._db.index_wait_timeout_seconds)
414
+
415
+ self._has_vector_index = True
416
+ logger.info(f"{index_type} vector index created successfully")
417
+
418
+ # Optimize after index creation (may fail in some environments)
419
+ try:
420
+ self._db.table.optimize()
421
+ except Exception as optimize_error:
422
+ logger.debug(f"Optimization after index creation skipped: {optimize_error}")
423
+
424
+ return True
425
+
426
+ except Exception as e:
427
+ logger.error(f"Failed to create {index_type} vector index: {e}")
428
+ raise StorageError(f"{index_type} vector index creation failed: {e}") from e
429
+
430
+ def _wait_for_index_ready(
431
+ self,
432
+ column_name: str,
433
+ timeout_seconds: float,
434
+ poll_interval: float = 0.5,
435
+ ) -> None:
436
+ """Wait for an index on the specified column to be ready.
437
+
438
+ Args:
439
+ column_name: Name of the column the index is on (e.g., "vector").
440
+ LanceDB typically names indexes as "{column_name}_idx".
441
+ timeout_seconds: Maximum time to wait.
442
+ poll_interval: Time between status checks.
443
+ """
444
+ if timeout_seconds <= 0:
445
+ return
446
+
447
+ start_time = time.time()
448
+ while time.time() - start_time < timeout_seconds:
449
+ try:
450
+ indices = self._db.table.list_indices()
451
+ for idx in indices:
452
+ idx_name = str(_get_index_attr(idx, "name", "")).lower()
453
+ idx_columns = _get_index_attr(idx, "columns", [])
454
+
455
+ # Match by column name in index metadata, or index name contains column
456
+ if column_name in idx_columns or column_name in idx_name:
457
+ # Index exists, check if it's ready
458
+ status = str(_get_index_attr(idx, "status", "ready"))
459
+ if status.lower() in ("ready", "complete", "built"):
460
+ logger.debug(f"Index on {column_name} is ready")
461
+ return
462
+ break
463
+ except Exception as e:
464
+ logger.debug(f"Error checking index status: {e}")
465
+
466
+ time.sleep(poll_interval)
467
+
468
+ logger.warning(
469
+ f"Timeout waiting for index on {column_name} after {timeout_seconds}s"
470
+ )
471
+
472
+ def create_scalar_indexes(self) -> None:
473
+ """Create scalar indexes for frequently filtered columns.
474
+
475
+ Creates:
476
+ - BTREE on id (fast lookups, upserts)
477
+ - BTREE on timestamps and importance (range queries)
478
+ - BITMAP on namespace and source (low cardinality)
479
+ - LABEL_LIST on tags (array contains queries)
480
+
481
+ Raises:
482
+ StorageError: If index creation fails critically.
483
+ """
484
+ # BTREE indexes for range queries and lookups
485
+ btree_columns = [
486
+ "id", # Fast lookups and merge_insert
487
+ "created_at",
488
+ "updated_at",
489
+ "last_accessed",
490
+ "importance",
491
+ "access_count",
492
+ "expires_at", # TTL expiration queries
493
+ ]
494
+
495
+ for column in btree_columns:
496
+ try:
497
+ self._db.table.create_scalar_index(
498
+ column,
499
+ index_type="BTREE",
500
+ replace=True,
501
+ )
502
+ logger.debug(f"Created BTREE index on {column}")
503
+ except Exception as e:
504
+ if "already exists" not in str(e).lower():
505
+ logger.warning(f"Could not create BTREE index on {column}: {e}")
506
+
507
+ # BITMAP indexes for low-cardinality columns
508
+ bitmap_columns = ["namespace", "source"]
509
+
510
+ for column in bitmap_columns:
511
+ try:
512
+ self._db.table.create_scalar_index(
513
+ column,
514
+ index_type="BITMAP",
515
+ replace=True,
516
+ )
517
+ logger.debug(f"Created BITMAP index on {column}")
518
+ except Exception as e:
519
+ if "already exists" not in str(e).lower():
520
+ logger.warning(f"Could not create BITMAP index on {column}: {e}")
521
+
522
+ # LABEL_LIST index for tags array (supports array_has_any queries)
523
+ try:
524
+ self._db.table.create_scalar_index(
525
+ "tags",
526
+ index_type="LABEL_LIST",
527
+ replace=True,
528
+ )
529
+ logger.debug("Created LABEL_LIST index on tags")
530
+ except Exception as e:
531
+ if "already exists" not in str(e).lower():
532
+ logger.warning(f"Could not create LABEL_LIST index on tags: {e}")
533
+
534
+ self._has_scalar_indexes = True
535
+ logger.info("Scalar indexes created")
536
+
537
+ def ensure_indexes(self, force: bool = False) -> dict[str, bool]:
538
+ """Ensure all appropriate indexes exist.
539
+
540
+ Args:
541
+ force: Force index creation regardless of thresholds.
542
+
543
+ Returns:
544
+ Dict indicating which indexes were created.
545
+ """
546
+ results = {
547
+ "vector_index": False,
548
+ "scalar_indexes": False,
549
+ "fts_index": False,
550
+ }
551
+
552
+ count = self._db.table.count_rows()
553
+
554
+ # Vector index
555
+ if self._db.auto_create_indexes or force:
556
+ if count >= self._db.vector_index_threshold or force:
557
+ results["vector_index"] = self.create_vector_index(force=force)
558
+
559
+ # Scalar indexes (always create if > 1000 rows)
560
+ if count >= 1000 or force:
561
+ try:
562
+ self.create_scalar_indexes()
563
+ results["scalar_indexes"] = True
564
+ except Exception as e:
565
+ logger.warning(f"Scalar index creation partially failed: {e}")
566
+
567
+ # FTS index
568
+ if self._db.enable_fts and not self._has_fts_index:
569
+ try:
570
+ self.create_fts_index()
571
+ results["fts_index"] = True
572
+ except Exception as e:
573
+ logger.warning(f"FTS index creation failed in ensure_indexes: {e}")
574
+
575
+ return results