spatial-memory-mcp 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (34) hide show
  1. spatial_memory/__init__.py +1 -1
  2. spatial_memory/__main__.py +241 -2
  3. spatial_memory/adapters/lancedb_repository.py +74 -5
  4. spatial_memory/config.py +10 -2
  5. spatial_memory/core/__init__.py +9 -0
  6. spatial_memory/core/connection_pool.py +41 -3
  7. spatial_memory/core/consolidation_strategies.py +402 -0
  8. spatial_memory/core/database.py +774 -918
  9. spatial_memory/core/db_idempotency.py +242 -0
  10. spatial_memory/core/db_indexes.py +575 -0
  11. spatial_memory/core/db_migrations.py +584 -0
  12. spatial_memory/core/db_search.py +509 -0
  13. spatial_memory/core/db_versioning.py +177 -0
  14. spatial_memory/core/embeddings.py +65 -18
  15. spatial_memory/core/errors.py +75 -3
  16. spatial_memory/core/filesystem.py +178 -0
  17. spatial_memory/core/models.py +4 -0
  18. spatial_memory/core/rate_limiter.py +26 -9
  19. spatial_memory/core/response_types.py +497 -0
  20. spatial_memory/core/validation.py +86 -2
  21. spatial_memory/factory.py +407 -0
  22. spatial_memory/migrations/__init__.py +40 -0
  23. spatial_memory/ports/repositories.py +52 -2
  24. spatial_memory/server.py +131 -189
  25. spatial_memory/services/export_import.py +61 -43
  26. spatial_memory/services/lifecycle.py +397 -122
  27. spatial_memory/services/memory.py +2 -2
  28. spatial_memory/services/spatial.py +129 -46
  29. {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
  30. spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
  31. spatial_memory_mcp-1.5.3.dist-info/RECORD +0 -44
  32. {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
  33. {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
  34. {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -175,6 +175,7 @@ class EmbeddingService:
175
175
  circuit_breaker_enabled: bool = True,
176
176
  circuit_breaker_failure_threshold: int = 5,
177
177
  circuit_breaker_reset_timeout: float = 60.0,
178
+ cache_max_size: int = 1000,
178
179
  ) -> None:
179
180
  """Initialize the embedding service.
180
181
 
@@ -192,6 +193,8 @@ class EmbeddingService:
192
193
  opening the circuit. Default is 5.
193
194
  circuit_breaker_reset_timeout: Seconds to wait before attempting recovery.
194
195
  Default is 60.0 seconds.
196
+ cache_max_size: Maximum number of embeddings to cache (LRU eviction).
197
+ Default is 1000. Set to 0 to disable caching.
195
198
  """
196
199
  self.model_name = model_name
197
200
  # Handle both plain strings and SecretStr (pydantic)
@@ -209,7 +212,7 @@ class EmbeddingService:
209
212
 
210
213
  # Embedding cache (LRU with max size)
211
214
  self._embed_cache: OrderedDict[str, np.ndarray] = OrderedDict()
212
- self._cache_max_size = 1000
215
+ self._cache_max_size = cache_max_size
213
216
  self._cache_lock = threading.Lock()
214
217
 
215
218
  # Determine if using OpenAI
@@ -308,8 +311,11 @@ class EmbeddingService:
308
311
  raise EmbeddingError(f"Failed to initialize OpenAI client: {masked_error}") from e
309
312
 
310
313
  def _get_cache_key(self, text: str) -> str:
311
- """Generate cache key from text content."""
312
- return hashlib.md5(text.encode()).hexdigest()
314
+ """Generate cache key from text content.
315
+
316
+ Uses MD5 for speed (not security) - collisions are acceptable for cache.
317
+ """
318
+ return hashlib.md5(text.encode(), usedforsecurity=False).hexdigest()
313
319
 
314
320
  @property
315
321
  def dimensions(self) -> int:
@@ -379,23 +385,27 @@ class EmbeddingService:
379
385
  else:
380
386
  embedding = self._embed_local([text])[0]
381
387
 
382
- # Cache the result
383
- with self._cache_lock:
384
- # Check if another thread already cached it
385
- if cache_key not in self._embed_cache:
386
- # Evict oldest entries if at capacity
387
- while len(self._embed_cache) >= self._cache_max_size:
388
- self._embed_cache.popitem(last=False)
389
- self._embed_cache[cache_key] = embedding.copy()
390
- else:
391
- # Another thread cached it, move to end
392
- self._embed_cache.move_to_end(cache_key)
388
+ # Cache the result (if caching enabled)
389
+ if self._cache_max_size > 0:
390
+ with self._cache_lock:
391
+ # Check if another thread already cached it
392
+ if cache_key not in self._embed_cache:
393
+ # Evict oldest entries if at capacity
394
+ while len(self._embed_cache) >= self._cache_max_size:
395
+ self._embed_cache.popitem(last=False)
396
+ self._embed_cache[cache_key] = embedding.copy()
397
+ else:
398
+ # Another thread cached it, move to end
399
+ self._embed_cache.move_to_end(cache_key)
393
400
 
394
401
  return embedding
395
402
 
396
403
  def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
397
404
  """Generate embeddings for multiple texts.
398
405
 
406
+ Uses cache for already-embedded texts and only generates
407
+ embeddings for texts not in cache.
408
+
399
409
  Args:
400
410
  texts: List of texts to embed.
401
411
 
@@ -406,10 +416,47 @@ class EmbeddingService:
406
416
  logger.debug("embed_batch called with empty input, returning empty list")
407
417
  return []
408
418
 
409
- if self.use_openai:
410
- return self._embed_openai_with_circuit_breaker(texts)
411
- else:
412
- return self._embed_local(texts)
419
+ # If caching disabled, generate all embeddings directly
420
+ if self._cache_max_size <= 0:
421
+ if self.use_openai:
422
+ return self._embed_openai_with_circuit_breaker(texts)
423
+ else:
424
+ return self._embed_local(texts)
425
+
426
+ # Check cache for each text
427
+ results: list[np.ndarray | None] = [None] * len(texts)
428
+ texts_to_embed: list[tuple[int, str]] = [] # (index, text)
429
+
430
+ with self._cache_lock:
431
+ for i, text in enumerate(texts):
432
+ cache_key = self._get_cache_key(text)
433
+ if cache_key in self._embed_cache:
434
+ self._embed_cache.move_to_end(cache_key)
435
+ results[i] = self._embed_cache[cache_key].copy()
436
+ else:
437
+ texts_to_embed.append((i, text))
438
+
439
+ # Generate embeddings for uncached texts
440
+ if texts_to_embed:
441
+ uncached_texts = [t for _, t in texts_to_embed]
442
+ if self.use_openai:
443
+ new_embeddings = self._embed_openai_with_circuit_breaker(uncached_texts)
444
+ else:
445
+ new_embeddings = self._embed_local(uncached_texts)
446
+
447
+ # Store results and cache them
448
+ with self._cache_lock:
449
+ for (idx, text), embedding in zip(texts_to_embed, new_embeddings):
450
+ results[idx] = embedding
451
+ cache_key = self._get_cache_key(text)
452
+ if cache_key not in self._embed_cache:
453
+ # Evict oldest entries if at capacity
454
+ while len(self._embed_cache) >= self._cache_max_size:
455
+ self._embed_cache.popitem(last=False)
456
+ self._embed_cache[cache_key] = embedding.copy()
457
+
458
+ # Type assertion - all results should be filled
459
+ return [r for r in results if r is not None]
413
460
 
414
461
  def clear_cache(self) -> int:
415
462
  """Clear embedding cache. Returns number of entries cleared."""
@@ -1,5 +1,24 @@
1
1
  """Custom exceptions for Spatial Memory MCP Server."""
2
2
 
3
+ from pathlib import Path
4
+
5
+
6
+ def sanitize_path_for_error(path: str | Path) -> str:
7
+ """Extract only the filename from a path for safe error messages.
8
+
9
+ Prevents leaking full system paths in error messages which could
10
+ expose sensitive directory structure information.
11
+
12
+ Args:
13
+ path: Full path or filename.
14
+
15
+ Returns:
16
+ Just the filename portion.
17
+ """
18
+ if isinstance(path, Path):
19
+ return path.name
20
+ return Path(path).name
21
+
3
22
 
4
23
  class SpatialMemoryError(Exception):
5
24
  """Base exception for all spatial memory errors."""
@@ -35,6 +54,37 @@ class StorageError(SpatialMemoryError):
35
54
  pass
36
55
 
37
56
 
57
+ class PartialBatchInsertError(StorageError):
58
+ """Raised when batch insert partially fails.
59
+
60
+ Provides information about which records were successfully inserted
61
+ before the failure, enabling recovery or rollback.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ message: str,
67
+ succeeded_ids: list[str],
68
+ total_requested: int,
69
+ failed_batch_index: int | None = None,
70
+ ) -> None:
71
+ """Initialize with details about partial failure.
72
+
73
+ Args:
74
+ message: Error description.
75
+ succeeded_ids: IDs of successfully inserted records.
76
+ total_requested: Total number of records requested to insert.
77
+ failed_batch_index: Index of the batch that failed (if batched).
78
+ """
79
+ self.succeeded_ids = succeeded_ids
80
+ self.total_requested = total_requested
81
+ self.failed_batch_index = failed_batch_index
82
+ super().__init__(
83
+ f"{message}. "
84
+ f"Inserted {len(succeeded_ids)}/{total_requested} records before failure."
85
+ )
86
+
87
+
38
88
  class ValidationError(SpatialMemoryError):
39
89
  """Raised when input validation fails."""
40
90
 
@@ -141,6 +191,10 @@ class PathSecurityError(SpatialMemoryError):
141
191
  - Path outside allowed directories
142
192
  - Symlink to disallowed location
143
193
  - Invalid file extension
194
+
195
+ Note:
196
+ Error messages only include the filename, not the full path,
197
+ to avoid leaking system directory structure.
144
198
  """
145
199
 
146
200
  def __init__(
@@ -151,12 +205,18 @@ class PathSecurityError(SpatialMemoryError):
151
205
  ) -> None:
152
206
  self.path = path
153
207
  self.violation_type = violation_type
154
- self.message = message or f"Path security violation ({violation_type}): {path}"
208
+ safe_name = sanitize_path_for_error(path)
209
+ self.message = message or f"Path security violation ({violation_type}): {safe_name}"
155
210
  super().__init__(self.message)
156
211
 
157
212
 
158
213
  class FileSizeLimitError(SpatialMemoryError):
159
- """Raised when a file exceeds size limits."""
214
+ """Raised when a file exceeds size limits.
215
+
216
+ Note:
217
+ Error messages only include the filename, not the full path,
218
+ to avoid leaking system directory structure.
219
+ """
160
220
 
161
221
  def __init__(
162
222
  self,
@@ -169,8 +229,9 @@ class FileSizeLimitError(SpatialMemoryError):
169
229
  self.max_size_bytes = max_size_bytes
170
230
  actual_mb = actual_size_bytes / (1024 * 1024)
171
231
  max_mb = max_size_bytes / (1024 * 1024)
232
+ safe_name = sanitize_path_for_error(path)
172
233
  super().__init__(
173
- f"File exceeds size limit: {path} is {actual_mb:.2f}MB "
234
+ f"File exceeds size limit: {safe_name} is {actual_mb:.2f}MB "
174
235
  f"(max: {max_mb:.2f}MB)"
175
236
  )
176
237
 
@@ -243,3 +304,14 @@ class FileLockError(SpatialMemoryError):
243
304
  self.timeout = timeout
244
305
  self.message = message or f"Failed to acquire file lock at {lock_path} after {timeout}s"
245
306
  super().__init__(self.message)
307
+
308
+
309
+ # =============================================================================
310
+ # Migration Error
311
+ # =============================================================================
312
+
313
+
314
+ class MigrationError(SpatialMemoryError):
315
+ """Raised when a database migration fails."""
316
+
317
+ pass
@@ -0,0 +1,178 @@
1
+ """Filesystem detection utilities for identifying network filesystems.
2
+
3
+ This module provides utilities to detect if a path is on a network filesystem
4
+ (NFS, SMB/CIFS) where file-based locking may not work reliably.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import os
11
+ import platform
12
+ import subprocess
13
+ from enum import Enum
14
+ from pathlib import Path
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class FilesystemType(Enum):
20
+ """Types of filesystems that can be detected."""
21
+
22
+ LOCAL = "local"
23
+ NFS = "nfs"
24
+ SMB = "smb"
25
+ CIFS = "cifs"
26
+ NETWORK_UNKNOWN = "network_unknown"
27
+ UNKNOWN = "unknown"
28
+
29
+
30
+ def detect_filesystem_type(path: Path) -> FilesystemType:
31
+ """Detect the filesystem type for a given path.
32
+
33
+ Args:
34
+ path: Path to check. Will resolve to absolute path.
35
+
36
+ Returns:
37
+ FilesystemType indicating the detected filesystem.
38
+ Returns LOCAL for local filesystems, specific types for
39
+ network filesystems, or UNKNOWN if detection fails.
40
+ """
41
+ try:
42
+ resolved = path.resolve()
43
+
44
+ if platform.system() == "Windows":
45
+ return _detect_windows(resolved)
46
+ else:
47
+ return _detect_unix(resolved)
48
+ except Exception as e:
49
+ logger.debug(f"Filesystem detection failed for {path}: {e}")
50
+ return FilesystemType.UNKNOWN
51
+
52
+
53
+ def _detect_windows(path: Path) -> FilesystemType:
54
+ """Detect filesystem type on Windows.
55
+
56
+ Uses GetDriveTypeW to check if drive is remote.
57
+ """
58
+ try:
59
+ import ctypes
60
+
61
+ # Get the drive letter (e.g., "C:\\")
62
+ drive = str(path)[:3] if len(str(path)) >= 3 else str(path)
63
+
64
+ # Ensure it ends with backslash for GetDriveTypeW
65
+ if not drive.endswith("\\"):
66
+ drive = drive + "\\"
67
+
68
+ # DRIVE_REMOTE = 4
69
+ drive_type = ctypes.windll.kernel32.GetDriveTypeW(drive)
70
+
71
+ if drive_type == 4: # DRIVE_REMOTE
72
+ logger.debug(f"Detected remote drive: {drive}")
73
+ return FilesystemType.NETWORK_UNKNOWN
74
+ else:
75
+ return FilesystemType.LOCAL
76
+
77
+ except Exception as e:
78
+ logger.debug(f"Windows filesystem detection failed: {e}")
79
+ return FilesystemType.UNKNOWN
80
+
81
+
82
+ def _detect_unix(path: Path) -> FilesystemType:
83
+ """Detect filesystem type on Unix-like systems.
84
+
85
+ Uses 'df -T' or 'mount' to determine filesystem type.
86
+ """
87
+ try:
88
+ # Try using df -T first (more portable)
89
+ result = subprocess.run(
90
+ ["df", "-T", str(path)],
91
+ capture_output=True,
92
+ text=True,
93
+ timeout=5,
94
+ )
95
+
96
+ if result.returncode == 0:
97
+ output = result.stdout.lower()
98
+ # Check for common network filesystem types
99
+ if "nfs" in output:
100
+ return FilesystemType.NFS
101
+ if "cifs" in output:
102
+ return FilesystemType.CIFS
103
+ if "smb" in output:
104
+ return FilesystemType.SMB
105
+ if "fuse.sshfs" in output:
106
+ return FilesystemType.NETWORK_UNKNOWN
107
+ # If none of the above, assume local
108
+ return FilesystemType.LOCAL
109
+
110
+ except subprocess.TimeoutExpired:
111
+ logger.debug("df command timed out - may indicate network filesystem issue")
112
+ return FilesystemType.NETWORK_UNKNOWN
113
+ except FileNotFoundError:
114
+ # df not available, try alternative
115
+ pass
116
+ except Exception as e:
117
+ logger.debug(f"df command failed: {e}")
118
+
119
+ # Fallback: try reading /proc/mounts on Linux
120
+ try:
121
+ if os.path.exists("/proc/mounts"):
122
+ with open("/proc/mounts") as f:
123
+ mounts = f.read().lower()
124
+ path_str = str(path).lower()
125
+ # Find the mount point for this path
126
+ for line in mounts.split("\n"):
127
+ parts = line.split()
128
+ if len(parts) >= 3:
129
+ mount_point = parts[1]
130
+ fs_type = parts[2]
131
+ if path_str.startswith(mount_point):
132
+ if "nfs" in fs_type:
133
+ return FilesystemType.NFS
134
+ if "cifs" in fs_type or "smb" in fs_type:
135
+ return FilesystemType.SMB
136
+ except Exception as e:
137
+ logger.debug(f"/proc/mounts check failed: {e}")
138
+
139
+ return FilesystemType.LOCAL
140
+
141
+
142
+ def is_network_filesystem(path: Path) -> bool:
143
+ """Check if a path is on a network filesystem.
144
+
145
+ Args:
146
+ path: Path to check.
147
+
148
+ Returns:
149
+ True if the path appears to be on a network filesystem
150
+ (NFS, SMB, CIFS, or unknown network type).
151
+ """
152
+ fs_type = detect_filesystem_type(path)
153
+ return fs_type in (
154
+ FilesystemType.NFS,
155
+ FilesystemType.SMB,
156
+ FilesystemType.CIFS,
157
+ FilesystemType.NETWORK_UNKNOWN,
158
+ )
159
+
160
+
161
+ def get_filesystem_warning_message(fs_type: FilesystemType, path: Path) -> str:
162
+ """Generate a warning message for network filesystem detection.
163
+
164
+ Args:
165
+ fs_type: The detected filesystem type.
166
+ path: The path that was checked.
167
+
168
+ Returns:
169
+ A warning message string explaining the risks.
170
+ """
171
+ return (
172
+ f"WARNING: Storage path appears to be on a network filesystem ({fs_type.value}). "
173
+ f"Path: {path}\n"
174
+ f"File-based locking does not work reliably on network filesystems. "
175
+ f"Running multiple instances against this storage may cause data corruption. "
176
+ f"To suppress this warning, set SPATIAL_MEMORY_ACKNOWLEDGE_NETWORK_FS_RISK=true "
177
+ f"or use a local filesystem path."
178
+ )
@@ -53,6 +53,10 @@ class MemoryResult(BaseModel):
53
53
  importance: float
54
54
  created_at: datetime
55
55
  metadata: dict[str, Any] = Field(default_factory=dict)
56
+ vector: list[float] | None = Field(
57
+ default=None,
58
+ description="Embedding vector (only included when include_vector=True in search)",
59
+ )
56
60
 
57
61
 
58
62
  class ClusterInfo(BaseModel):
@@ -52,6 +52,19 @@ class RateLimiter:
52
52
  self._tokens = min(self.capacity, self._tokens + elapsed * self.rate)
53
53
  self._last_refill = now
54
54
 
55
+ def can_acquire(self, tokens: int = 1) -> bool:
56
+ """Check if tokens could be acquired without consuming them.
57
+
58
+ Args:
59
+ tokens: Number of tokens to check.
60
+
61
+ Returns:
62
+ True if tokens are available, False otherwise.
63
+ """
64
+ with self._lock:
65
+ self._refill()
66
+ return self._tokens >= tokens
67
+
55
68
  def acquire(self, tokens: int = 1) -> bool:
56
69
  """Try to acquire tokens without blocking.
57
70
 
@@ -201,6 +214,7 @@ class AgentAwareRateLimiter:
201
214
  """Try to acquire tokens without blocking.
202
215
 
203
216
  Must pass BOTH global AND per-agent limits (if agent_id provided).
217
+ Tokens are only consumed if both limits pass.
204
218
 
205
219
  Args:
206
220
  agent_id: Optional agent identifier. If None, only global limit applies.
@@ -209,21 +223,24 @@ class AgentAwareRateLimiter:
209
223
  Returns:
210
224
  True if tokens were acquired, False if rate limited.
211
225
  """
212
- # Check global limit first (cheaper)
213
- if not self._global.acquire(tokens):
214
- return False
215
-
216
226
  # If no agent_id, only global limit applies
217
227
  if agent_id is None:
218
- return True
228
+ return self._global.acquire(tokens)
219
229
 
220
- # Check per-agent limit
230
+ # Check both limits first without consuming
221
231
  agent_limiter = self._get_agent_limiter(agent_id)
222
- if not agent_limiter.acquire(tokens):
223
- # Failed per-agent limit, but we already consumed global tokens
224
- # This is acceptable - prevents gaming by switching agents
232
+
233
+ if not self._global.can_acquire(tokens):
225
234
  return False
226
235
 
236
+ if not agent_limiter.can_acquire(tokens):
237
+ return False
238
+
239
+ # Both limits pass, now actually consume tokens from both
240
+ # Note: Small race window here, but acceptable for rate limiting
241
+ self._global.acquire(tokens)
242
+ agent_limiter.acquire(tokens)
243
+
227
244
  return True
228
245
 
229
246
  def wait(