spatial-memory-mcp 1.0.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (39) hide show
  1. spatial_memory/__init__.py +97 -97
  2. spatial_memory/__main__.py +241 -2
  3. spatial_memory/adapters/lancedb_repository.py +74 -5
  4. spatial_memory/config.py +115 -2
  5. spatial_memory/core/__init__.py +35 -0
  6. spatial_memory/core/cache.py +317 -0
  7. spatial_memory/core/circuit_breaker.py +297 -0
  8. spatial_memory/core/connection_pool.py +41 -3
  9. spatial_memory/core/consolidation_strategies.py +402 -0
  10. spatial_memory/core/database.py +791 -769
  11. spatial_memory/core/db_idempotency.py +242 -0
  12. spatial_memory/core/db_indexes.py +575 -0
  13. spatial_memory/core/db_migrations.py +584 -0
  14. spatial_memory/core/db_search.py +509 -0
  15. spatial_memory/core/db_versioning.py +177 -0
  16. spatial_memory/core/embeddings.py +156 -19
  17. spatial_memory/core/errors.py +75 -3
  18. spatial_memory/core/filesystem.py +178 -0
  19. spatial_memory/core/logging.py +194 -103
  20. spatial_memory/core/models.py +4 -0
  21. spatial_memory/core/rate_limiter.py +326 -105
  22. spatial_memory/core/response_types.py +497 -0
  23. spatial_memory/core/tracing.py +300 -0
  24. spatial_memory/core/validation.py +403 -319
  25. spatial_memory/factory.py +407 -0
  26. spatial_memory/migrations/__init__.py +40 -0
  27. spatial_memory/ports/repositories.py +52 -2
  28. spatial_memory/server.py +329 -188
  29. spatial_memory/services/export_import.py +61 -43
  30. spatial_memory/services/lifecycle.py +397 -122
  31. spatial_memory/services/memory.py +81 -4
  32. spatial_memory/services/spatial.py +129 -46
  33. spatial_memory/tools/definitions.py +695 -671
  34. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
  35. spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
  36. spatial_memory_mcp-1.0.3.dist-info/RECORD +0 -41
  37. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
  38. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
  39. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,509 @@
1
+ """Search operations for LanceDB database.
2
+
3
+ Provides vector search, hybrid search, and batch search functionality.
4
+
5
+ This module is part of the database.py refactoring to separate concerns:
6
+ - SearchManager handles all search-related operations
7
+ - Database class delegates to SearchManager for these operations
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import logging
14
+ from typing import TYPE_CHECKING, Any, Protocol
15
+
16
+ import numpy as np
17
+
18
+ from spatial_memory.core.errors import StorageError, ValidationError
19
+ from spatial_memory.core.validation import (
20
+ sanitize_string as _sanitize_string,
21
+ validate_namespace as _validate_namespace,
22
+ )
23
+
24
+ if TYPE_CHECKING:
25
+ from lancedb.table import Table as LanceTable
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class SearchManagerProtocol(Protocol):
31
+ """Protocol defining what SearchManager needs from Database.
32
+
33
+ This protocol enables loose coupling between SearchManager and Database,
34
+ preventing circular imports while maintaining type safety.
35
+ """
36
+
37
+ @property
38
+ def table(self) -> LanceTable:
39
+ """Access to the LanceDB table."""
40
+ ...
41
+
42
+ @property
43
+ def index_nprobes(self) -> int:
44
+ """Base nprobes for search."""
45
+ ...
46
+
47
+ @property
48
+ def index_refine_factor(self) -> int:
49
+ """Base refine factor for search."""
50
+ ...
51
+
52
+ @property
53
+ def vector_index_threshold(self) -> int:
54
+ """Row count threshold for vector index."""
55
+ ...
56
+
57
+ def _get_cached_row_count(self) -> int:
58
+ """Get cached row count."""
59
+ ...
60
+
61
+ @property
62
+ def _has_vector_index(self) -> bool | None:
63
+ """Whether vector index exists."""
64
+ ...
65
+
66
+ @property
67
+ def _has_fts_index(self) -> bool | None:
68
+ """Whether FTS index exists."""
69
+ ...
70
+
71
+
72
+ class SearchManager:
73
+ """Manages search operations for vector and hybrid queries.
74
+
75
+ Handles vector similarity search, batch search, and hybrid
76
+ search combining vector and keyword matching.
77
+
78
+ Example:
79
+ search_mgr = SearchManager(database)
80
+ results = search_mgr.vector_search(query_vector, limit=10)
81
+ batch_results = search_mgr.batch_vector_search_native([vec1, vec2])
82
+ """
83
+
84
+ def __init__(self, db: SearchManagerProtocol) -> None:
85
+ """Initialize the search manager.
86
+
87
+ Args:
88
+ db: Database instance providing table and config access.
89
+ """
90
+ self._db = db
91
+
92
+ def calculate_search_params(
93
+ self,
94
+ count: int,
95
+ limit: int,
96
+ nprobes_override: int | None = None,
97
+ refine_factor_override: int | None = None,
98
+ ) -> tuple[int, int]:
99
+ """Calculate optimal search parameters based on dataset size and limit.
100
+
101
+ Dynamically tunes nprobes and refine_factor for optimal recall/speed tradeoff.
102
+
103
+ Args:
104
+ count: Number of rows in the dataset.
105
+ limit: Number of results requested.
106
+ nprobes_override: Optional override for nprobes (uses this if provided).
107
+ refine_factor_override: Optional override for refine_factor.
108
+
109
+ Returns:
110
+ Tuple of (nprobes, refine_factor).
111
+
112
+ Scaling rules:
113
+ - nprobes: Base from config, scaled up for larger datasets
114
+ - <100K: config value (default 20)
115
+ - 100K-1M: max(config, 30)
116
+ - 1M-10M: max(config, 50)
117
+ - >10M: max(config, 100)
118
+ - refine_factor: Base from config, scaled up for small limits
119
+ - limit <= 5: config value * 2
120
+ - limit <= 20: config value
121
+ - limit > 20: max(config // 2, 2)
122
+ """
123
+ # Calculate nprobes based on dataset size
124
+ if nprobes_override is not None:
125
+ nprobes = nprobes_override
126
+ else:
127
+ base_nprobes = self._db.index_nprobes
128
+ if count < 100_000:
129
+ nprobes = base_nprobes
130
+ elif count < 1_000_000:
131
+ nprobes = max(base_nprobes, 30)
132
+ elif count < 10_000_000:
133
+ nprobes = max(base_nprobes, 50)
134
+ else:
135
+ nprobes = max(base_nprobes, 100)
136
+
137
+ # Calculate refine_factor based on limit
138
+ if refine_factor_override is not None:
139
+ refine_factor = refine_factor_override
140
+ else:
141
+ base_refine = self._db.index_refine_factor
142
+ if limit <= 5:
143
+ # Small limits need more refinement for accuracy
144
+ refine_factor = base_refine * 2
145
+ elif limit <= 20:
146
+ refine_factor = base_refine
147
+ else:
148
+ # Large limits can use less refinement
149
+ refine_factor = max(base_refine // 2, 2)
150
+
151
+ return nprobes, refine_factor
152
+
153
+ def vector_search(
154
+ self,
155
+ query_vector: np.ndarray,
156
+ limit: int = 5,
157
+ namespace: str | None = None,
158
+ min_similarity: float = 0.0,
159
+ nprobes: int | None = None,
160
+ refine_factor: int | None = None,
161
+ include_vector: bool = False,
162
+ ) -> list[dict[str, Any]]:
163
+ """Search for similar memories by vector with performance tuning.
164
+
165
+ Note: This method should be called through the Database class which
166
+ applies stale connection recovery and retry decorators.
167
+
168
+ Args:
169
+ query_vector: Query embedding vector.
170
+ limit: Maximum number of results.
171
+ namespace: Filter to specific namespace.
172
+ min_similarity: Minimum similarity threshold (0-1).
173
+ nprobes: Number of partitions to search (higher = better recall).
174
+ Only effective when vector index exists. Defaults to dynamic calculation.
175
+ refine_factor: Re-rank top (refine_factor * limit) for accuracy.
176
+ Defaults to dynamic calculation based on limit.
177
+ include_vector: Whether to include vector embeddings in results.
178
+ Defaults to False to reduce response size.
179
+
180
+ Returns:
181
+ List of memory records with similarity scores.
182
+
183
+ Raises:
184
+ ValidationError: If input validation fails.
185
+ StorageError: If database operation fails.
186
+ """
187
+ try:
188
+ search = self._db.table.search(query_vector.tolist())
189
+
190
+ # Distance type for queries (cosine for semantic similarity)
191
+ # Note: When vector index exists, the index's metric is used
192
+ search = search.distance_type("cosine")
193
+
194
+ # Apply performance tuning when index exists (use cached count)
195
+ count = self._db._get_cached_row_count()
196
+ if count > self._db.vector_index_threshold and self._db._has_vector_index:
197
+ # Use dynamic calculation for search params
198
+ actual_nprobes, actual_refine = self.calculate_search_params(
199
+ count, limit, nprobes, refine_factor
200
+ )
201
+ search = search.nprobes(actual_nprobes)
202
+ search = search.refine_factor(actual_refine)
203
+
204
+ # Build filter with sanitized namespace
205
+ # prefilter=True applies namespace filter BEFORE vector search for better performance
206
+ if namespace:
207
+ namespace = _validate_namespace(namespace)
208
+ safe_ns = _sanitize_string(namespace)
209
+ search = search.where(f"namespace = '{safe_ns}'", prefilter=True)
210
+
211
+ # Vector projection: exclude vector column to reduce response size
212
+ if not include_vector:
213
+ search = search.select([
214
+ "id", "content", "namespace", "metadata",
215
+ "created_at", "updated_at", "last_accessed",
216
+ "importance", "tags", "source", "access_count",
217
+ "expires_at",
218
+ ])
219
+
220
+ # Fetch extra if filtering by similarity
221
+ fetch_limit = limit * 2 if min_similarity > 0.0 else limit
222
+ results: list[dict[str, Any]] = search.limit(fetch_limit).to_list()
223
+
224
+ # Process results
225
+ filtered_results: list[dict[str, Any]] = []
226
+ for record in results:
227
+ record["metadata"] = json.loads(record["metadata"]) if record["metadata"] else {}
228
+ # LanceDB returns _distance, convert to similarity
229
+ if "_distance" in record:
230
+ # Cosine distance to similarity: 1 - distance
231
+ # Clamp to [0, 1] (cosine distance can exceed 1 for unnormalized)
232
+ similarity = max(0.0, min(1.0, 1 - record["_distance"]))
233
+ record["similarity"] = similarity
234
+ del record["_distance"]
235
+
236
+ # Apply similarity threshold
237
+ if record.get("similarity", 0) >= min_similarity:
238
+ filtered_results.append(record)
239
+ if len(filtered_results) >= limit:
240
+ break
241
+
242
+ return filtered_results
243
+ except ValidationError:
244
+ raise
245
+ except Exception as e:
246
+ raise StorageError(f"Failed to search: {e}") from e
247
+
248
+ def batch_vector_search_native(
249
+ self,
250
+ query_vectors: list[np.ndarray],
251
+ limit_per_query: int = 3,
252
+ namespace: str | None = None,
253
+ min_similarity: float = 0.0,
254
+ include_vector: bool = False,
255
+ ) -> list[list[dict[str, Any]]]:
256
+ """Batch search for similar memories using native LanceDB batch search.
257
+
258
+ Searches for multiple query vectors in a single database operation,
259
+ much more efficient than individual searches. Uses LanceDB's native
260
+ batch search API which returns results with query_index for grouping.
261
+
262
+ Note: This method should be called through the Database class which
263
+ applies stale connection recovery and retry decorators.
264
+
265
+ Args:
266
+ query_vectors: List of query embedding vectors.
267
+ limit_per_query: Maximum number of results per query.
268
+ namespace: Filter to specific namespace (applied to all queries).
269
+ min_similarity: Minimum similarity threshold (0-1).
270
+ include_vector: Whether to include vector embeddings in results.
271
+
272
+ Returns:
273
+ List of result lists, one per query vector (same order as input).
274
+ Each result list contains memory records with similarity scores.
275
+
276
+ Raises:
277
+ ValidationError: If input validation fails.
278
+ StorageError: If database operation fails.
279
+ """
280
+ if not query_vectors:
281
+ return []
282
+
283
+ try:
284
+ # Convert all vectors to lists for LanceDB
285
+ vector_lists = [v.tolist() for v in query_vectors]
286
+
287
+ # LanceDB native batch search
288
+ search = self._db.table.search(vector_lists)
289
+ search = search.distance_type("cosine")
290
+
291
+ # Apply performance tuning when index exists
292
+ count = self._db._get_cached_row_count()
293
+ if count > self._db.vector_index_threshold and self._db._has_vector_index:
294
+ actual_nprobes, actual_refine = self.calculate_search_params(
295
+ count, limit_per_query, None, None
296
+ )
297
+ search = search.nprobes(actual_nprobes)
298
+ search = search.refine_factor(actual_refine)
299
+
300
+ # Apply namespace filter
301
+ if namespace:
302
+ namespace = _validate_namespace(namespace)
303
+ safe_ns = _sanitize_string(namespace)
304
+ search = search.where(f"namespace = '{safe_ns}'", prefilter=True)
305
+
306
+ # Vector projection
307
+ if not include_vector:
308
+ search = search.select([
309
+ "id", "content", "namespace", "metadata",
310
+ "created_at", "updated_at", "last_accessed",
311
+ "importance", "tags", "source", "access_count",
312
+ ])
313
+
314
+ # Execute search and get results
315
+ # LanceDB returns results with _query_index to identify which query each result belongs to
316
+ search = search.limit(limit_per_query)
317
+ results_df = search.to_pandas()
318
+
319
+ # Initialize result lists (one per query)
320
+ num_queries = len(query_vectors)
321
+ batch_results: list[list[dict[str, Any]]] = [[] for _ in range(num_queries)]
322
+
323
+ if results_df.empty:
324
+ return batch_results
325
+
326
+ # Group results by query index
327
+ for _, row in results_df.iterrows():
328
+ query_idx = int(row.get("_query_index", 0))
329
+ if query_idx >= num_queries:
330
+ continue
331
+
332
+ # Convert distance to similarity (cosine distance -> similarity)
333
+ distance = row.get("_distance", 0)
334
+ similarity = 1.0 - distance
335
+
336
+ if similarity < min_similarity:
337
+ continue
338
+
339
+ record = row.to_dict()
340
+ # Clean up internal fields
341
+ record.pop("_distance", None)
342
+ record.pop("_query_index", None)
343
+ record.pop("_relevance_score", None)
344
+
345
+ # Add similarity score
346
+ record["similarity"] = similarity
347
+
348
+ # Deserialize metadata
349
+ if record.get("metadata"):
350
+ try:
351
+ record["metadata"] = json.loads(record["metadata"])
352
+ except (json.JSONDecodeError, TypeError):
353
+ record["metadata"] = {}
354
+ else:
355
+ record["metadata"] = {}
356
+
357
+ batch_results[query_idx].append(record)
358
+
359
+ return batch_results
360
+ except ValidationError:
361
+ raise
362
+ except Exception as e:
363
+ raise StorageError(f"Failed to batch search: {e}") from e
364
+
365
+ def hybrid_search(
366
+ self,
367
+ query: str,
368
+ query_vector: np.ndarray,
369
+ limit: int = 5,
370
+ namespace: str | None = None,
371
+ alpha: float = 0.5,
372
+ min_similarity: float = 0.0,
373
+ ) -> list[dict[str, Any]]:
374
+ """Hybrid search combining vector similarity and keyword matching.
375
+
376
+ Uses LinearCombinationReranker to balance vector and keyword scores
377
+ based on the alpha parameter.
378
+
379
+ Note: This method should be called through the Database class which
380
+ applies stale connection recovery and retry decorators.
381
+
382
+ Args:
383
+ query: Text query for full-text search.
384
+ query_vector: Embedding vector for semantic search.
385
+ limit: Number of results.
386
+ namespace: Filter to namespace.
387
+ alpha: Balance between vector (1.0) and keyword (0.0).
388
+ 0.5 = balanced (recommended).
389
+ min_similarity: Minimum similarity threshold (0.0-1.0).
390
+ Results below this threshold are filtered out.
391
+
392
+ Returns:
393
+ List of memory records with combined scores.
394
+
395
+ Raises:
396
+ ValidationError: If input validation fails.
397
+ StorageError: If database operation fails.
398
+ """
399
+ try:
400
+ # Check if FTS is available
401
+ if not self._db._has_fts_index:
402
+ logger.debug("FTS index not available, falling back to vector search")
403
+ return self.vector_search(query_vector, limit=limit, namespace=namespace)
404
+
405
+ # Create hybrid search with explicit vector column specification
406
+ # Required when using external embeddings (not LanceDB built-in)
407
+ search = (
408
+ self._db.table.search(query, query_type="hybrid")
409
+ .vector(query_vector.tolist())
410
+ .vector_column_name("vector")
411
+ )
412
+
413
+ # Apply alpha parameter using LinearCombinationReranker
414
+ # alpha=1.0 means full vector, alpha=0.0 means full FTS
415
+ try:
416
+ from lancedb.rerankers import LinearCombinationReranker
417
+
418
+ reranker = LinearCombinationReranker(weight=alpha)
419
+ search = search.rerank(reranker)
420
+ except ImportError:
421
+ logger.debug("LinearCombinationReranker not available, using default reranking")
422
+ except Exception as e:
423
+ logger.debug(f"Could not apply reranker: {e}")
424
+
425
+ # Apply namespace filter
426
+ if namespace:
427
+ namespace = _validate_namespace(namespace)
428
+ safe_ns = _sanitize_string(namespace)
429
+ search = search.where(f"namespace = '{safe_ns}'")
430
+
431
+ results: list[dict[str, Any]] = search.limit(limit).to_list()
432
+
433
+ # Process results - normalize scores and clean up internal columns
434
+ processed_results: list[dict[str, Any]] = []
435
+ for record in results:
436
+ record["metadata"] = json.loads(record["metadata"]) if record["metadata"] else {}
437
+
438
+ # Compute similarity from various score columns
439
+ # Priority: _relevance_score > _distance > _score > default
440
+ similarity: float
441
+ if "_relevance_score" in record:
442
+ # Reranker output - use directly (already 0-1 range)
443
+ similarity = float(record["_relevance_score"])
444
+ del record["_relevance_score"]
445
+ elif "_distance" in record:
446
+ # Vector distance - convert to similarity
447
+ similarity = max(0.0, min(1.0, 1 - float(record["_distance"])))
448
+ del record["_distance"]
449
+ elif "_score" in record:
450
+ # BM25 score - normalize using score/(1+score)
451
+ score = float(record["_score"])
452
+ similarity = score / (1.0 + score)
453
+ del record["_score"]
454
+ else:
455
+ # No score column - use default
456
+ similarity = 0.5
457
+
458
+ record["similarity"] = similarity
459
+
460
+ # Mark as hybrid result with alpha value
461
+ record["search_type"] = "hybrid"
462
+ record["alpha"] = alpha
463
+
464
+ # Apply min_similarity filter
465
+ if similarity >= min_similarity:
466
+ processed_results.append(record)
467
+
468
+ return processed_results
469
+
470
+ except Exception as e:
471
+ logger.warning(f"Hybrid search failed, falling back to vector search: {e}")
472
+ return self.vector_search(query_vector, limit=limit, namespace=namespace)
473
+
474
+ def batch_vector_search(
475
+ self,
476
+ query_vectors: list[np.ndarray],
477
+ limit_per_query: int = 3,
478
+ namespace: str | None = None,
479
+ parallel: bool = False, # Deprecated: native batch is always efficient
480
+ max_workers: int = 4, # Deprecated: native batch handles parallelism
481
+ include_vector: bool = False,
482
+ ) -> list[list[dict[str, Any]]]:
483
+ """Search for similar memories using multiple query vectors.
484
+
485
+ Uses native LanceDB batch search for efficiency. A single database
486
+ operation searches all vectors simultaneously.
487
+
488
+ Args:
489
+ query_vectors: List of query embedding vectors.
490
+ limit_per_query: Maximum results per query vector.
491
+ namespace: Filter to specific namespace.
492
+ parallel: Deprecated, kept for backward compatibility.
493
+ max_workers: Deprecated, kept for backward compatibility.
494
+ include_vector: Whether to include vector embeddings in results.
495
+
496
+ Returns:
497
+ List of result lists (one per query vector).
498
+
499
+ Raises:
500
+ StorageError: If database operation fails.
501
+ """
502
+ # Delegate to native batch search implementation
503
+ return self.batch_vector_search_native(
504
+ query_vectors=query_vectors,
505
+ limit_per_query=limit_per_query,
506
+ namespace=namespace,
507
+ min_similarity=0.0,
508
+ include_vector=include_vector,
509
+ )
@@ -0,0 +1,177 @@
1
+ """Snapshot and version management for LanceDB database.
2
+
3
+ Provides snapshot creation, listing, and restoration capabilities
4
+ leveraging LanceDB's built-in versioning system.
5
+
6
+ This module is part of the database.py refactoring to separate concerns:
7
+ - VersionManager handles all snapshot/version operations
8
+ - Database class delegates to VersionManager for these operations
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from typing import TYPE_CHECKING, Any, Protocol
15
+
16
+ from spatial_memory.core.errors import StorageError, ValidationError
17
+
18
+ if TYPE_CHECKING:
19
+ from lancedb.table import Table as LanceTable
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class VersionManagerProtocol(Protocol):
25
+ """Protocol defining what VersionManager needs from Database.
26
+
27
+ This protocol enables loose coupling between VersionManager and Database,
28
+ preventing circular imports while maintaining type safety.
29
+ """
30
+
31
+ @property
32
+ def table(self) -> LanceTable:
33
+ """Access to the LanceDB table."""
34
+ ...
35
+
36
+ def _invalidate_count_cache(self) -> None:
37
+ """Invalidate the row count cache."""
38
+ ...
39
+
40
+ def _track_modification(self, count: int = 1) -> None:
41
+ """Track a modification for auto-compaction."""
42
+ ...
43
+
44
+ def _invalidate_namespace_cache(self) -> None:
45
+ """Invalidate the namespace cache."""
46
+ ...
47
+
48
+
49
+ class VersionManager:
50
+ """Manages database snapshots and version control.
51
+
52
+ Leverages LanceDB's native versioning to provide:
53
+ - Snapshot creation with semantic tags
54
+ - Version listing
55
+ - Point-in-time restoration
56
+
57
+ LanceDB automatically versions data on every write. This manager
58
+ provides a clean interface for working with those versions.
59
+
60
+ Example:
61
+ version_mgr = VersionManager(database)
62
+ version = version_mgr.create_snapshot("backup-2024-01")
63
+ snapshots = version_mgr.list_snapshots()
64
+ version_mgr.restore_snapshot(version)
65
+ """
66
+
67
+ def __init__(self, db: VersionManagerProtocol) -> None:
68
+ """Initialize the version manager.
69
+
70
+ Args:
71
+ db: Database instance providing table and cache access.
72
+ """
73
+ self._db = db
74
+
75
+ def create_snapshot(self, tag: str) -> int:
76
+ """Create a named snapshot of the current table state.
77
+
78
+ LanceDB automatically versions data on every write. This method
79
+ returns the current version number which can be used with restore_snapshot().
80
+
81
+ Args:
82
+ tag: Semantic version tag (e.g., "v1.0.0", "backup-2024-01").
83
+ Note: Tag is logged for reference but LanceDB tracks versions
84
+ numerically. Consider storing tag->version mappings externally
85
+ if tag-based retrieval is needed.
86
+
87
+ Returns:
88
+ Version number of the snapshot.
89
+
90
+ Raises:
91
+ StorageError: If snapshot creation fails.
92
+ """
93
+ try:
94
+ version = self._db.table.version
95
+ logger.info(f"Created snapshot '{tag}' at version {version}")
96
+ return version
97
+ except Exception as e:
98
+ raise StorageError(f"Failed to create snapshot: {e}") from e
99
+
100
+ def list_snapshots(self) -> list[dict[str, Any]]:
101
+ """List available versions/snapshots.
102
+
103
+ Returns:
104
+ List of version information dictionaries. Each dict contains
105
+ at minimum 'version' key. Additional fields depend on LanceDB
106
+ version and available metadata.
107
+
108
+ Raises:
109
+ StorageError: If listing fails.
110
+ """
111
+ try:
112
+ versions_info: list[dict[str, Any]] = []
113
+
114
+ # Try to get version history if available
115
+ if hasattr(self._db.table, "list_versions"):
116
+ try:
117
+ versions = self._db.table.list_versions()
118
+ for v in versions:
119
+ if isinstance(v, dict):
120
+ versions_info.append(v)
121
+ elif hasattr(v, "version"):
122
+ versions_info.append({
123
+ "version": v.version,
124
+ "timestamp": getattr(v, "timestamp", None),
125
+ })
126
+ else:
127
+ versions_info.append({"version": v})
128
+ except Exception as e:
129
+ logger.debug(f"list_versions not fully supported: {e}")
130
+
131
+ # Always include current version
132
+ if not versions_info:
133
+ versions_info.append({"version": self._db.table.version})
134
+
135
+ return versions_info
136
+ except Exception as e:
137
+ logger.warning(f"Could not list snapshots: {e}")
138
+ return [{"version": 0, "error": str(e)}]
139
+
140
+ def restore_snapshot(self, version: int) -> None:
141
+ """Restore table to a specific version.
142
+
143
+ This creates a NEW version that reflects the old state
144
+ (doesn't delete history).
145
+
146
+ Args:
147
+ version: The version number to restore to.
148
+
149
+ Raises:
150
+ ValidationError: If version is invalid.
151
+ StorageError: If restore fails.
152
+ """
153
+ if version < 0:
154
+ raise ValidationError("Version must be non-negative")
155
+
156
+ try:
157
+ self._db.table.restore(version)
158
+ self._db._invalidate_count_cache()
159
+ self._db._track_modification()
160
+ self._db._invalidate_namespace_cache()
161
+ logger.info(f"Restored to version {version}")
162
+ except Exception as e:
163
+ raise StorageError(f"Failed to restore snapshot: {e}") from e
164
+
165
+ def get_current_version(self) -> int:
166
+ """Get the current table version number.
167
+
168
+ Returns:
169
+ Current version number.
170
+
171
+ Raises:
172
+ StorageError: If version cannot be retrieved.
173
+ """
174
+ try:
175
+ return self._db.table.version
176
+ except Exception as e:
177
+ raise StorageError(f"Failed to get current version: {e}") from e