spatial-memory-mcp 1.0.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spatial-memory-mcp might be problematic. Click here for more details.
- spatial_memory/__init__.py +97 -97
- spatial_memory/__main__.py +241 -2
- spatial_memory/adapters/lancedb_repository.py +74 -5
- spatial_memory/config.py +115 -2
- spatial_memory/core/__init__.py +35 -0
- spatial_memory/core/cache.py +317 -0
- spatial_memory/core/circuit_breaker.py +297 -0
- spatial_memory/core/connection_pool.py +41 -3
- spatial_memory/core/consolidation_strategies.py +402 -0
- spatial_memory/core/database.py +791 -769
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +575 -0
- spatial_memory/core/db_migrations.py +584 -0
- spatial_memory/core/db_search.py +509 -0
- spatial_memory/core/db_versioning.py +177 -0
- spatial_memory/core/embeddings.py +156 -19
- spatial_memory/core/errors.py +75 -3
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/logging.py +194 -103
- spatial_memory/core/models.py +4 -0
- spatial_memory/core/rate_limiter.py +326 -105
- spatial_memory/core/response_types.py +497 -0
- spatial_memory/core/tracing.py +300 -0
- spatial_memory/core/validation.py +403 -319
- spatial_memory/factory.py +407 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/repositories.py +52 -2
- spatial_memory/server.py +329 -188
- spatial_memory/services/export_import.py +61 -43
- spatial_memory/services/lifecycle.py +397 -122
- spatial_memory/services/memory.py +81 -4
- spatial_memory/services/spatial.py +129 -46
- spatial_memory/tools/definitions.py +695 -671
- {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
- spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
- spatial_memory_mcp-1.0.3.dist-info/RECORD +0 -41
- {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
- {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
- {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,6 +12,11 @@ from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
14
|
|
|
15
|
+
from spatial_memory.core.circuit_breaker import (
|
|
16
|
+
CircuitBreaker,
|
|
17
|
+
CircuitOpenError,
|
|
18
|
+
CircuitState,
|
|
19
|
+
)
|
|
15
20
|
from spatial_memory.core.errors import ConfigurationError, EmbeddingError
|
|
16
21
|
|
|
17
22
|
if TYPE_CHECKING:
|
|
@@ -158,6 +163,7 @@ class EmbeddingService:
|
|
|
158
163
|
|
|
159
164
|
Supports local sentence-transformers models and optional OpenAI API.
|
|
160
165
|
Uses ONNX Runtime by default for 2-3x faster inference.
|
|
166
|
+
Optionally uses a circuit breaker for fault tolerance with external services.
|
|
161
167
|
"""
|
|
162
168
|
|
|
163
169
|
def __init__(
|
|
@@ -165,6 +171,11 @@ class EmbeddingService:
|
|
|
165
171
|
model_name: str = "all-MiniLM-L6-v2",
|
|
166
172
|
openai_api_key: str | Any | None = None,
|
|
167
173
|
backend: EmbeddingBackend = "auto",
|
|
174
|
+
circuit_breaker: CircuitBreaker | None = None,
|
|
175
|
+
circuit_breaker_enabled: bool = True,
|
|
176
|
+
circuit_breaker_failure_threshold: int = 5,
|
|
177
|
+
circuit_breaker_reset_timeout: float = 60.0,
|
|
178
|
+
cache_max_size: int = 1000,
|
|
168
179
|
) -> None:
|
|
169
180
|
"""Initialize the embedding service.
|
|
170
181
|
|
|
@@ -174,6 +185,16 @@ class EmbeddingService:
|
|
|
174
185
|
Can be a string or a SecretStr (pydantic).
|
|
175
186
|
backend: Inference backend. 'auto' uses ONNX if available (default),
|
|
176
187
|
'onnx' forces ONNX Runtime, 'pytorch' forces PyTorch.
|
|
188
|
+
circuit_breaker: Optional pre-configured circuit breaker instance.
|
|
189
|
+
If provided, other circuit breaker parameters are ignored.
|
|
190
|
+
circuit_breaker_enabled: Whether to enable circuit breaker for OpenAI calls.
|
|
191
|
+
Defaults to True. Only applies to OpenAI models.
|
|
192
|
+
circuit_breaker_failure_threshold: Number of consecutive failures before
|
|
193
|
+
opening the circuit. Default is 5.
|
|
194
|
+
circuit_breaker_reset_timeout: Seconds to wait before attempting recovery.
|
|
195
|
+
Default is 60.0 seconds.
|
|
196
|
+
cache_max_size: Maximum number of embeddings to cache (LRU eviction).
|
|
197
|
+
Default is 1000. Set to 0 to disable caching.
|
|
177
198
|
"""
|
|
178
199
|
self.model_name = model_name
|
|
179
200
|
# Handle both plain strings and SecretStr (pydantic)
|
|
@@ -191,7 +212,7 @@ class EmbeddingService:
|
|
|
191
212
|
|
|
192
213
|
# Embedding cache (LRU with max size)
|
|
193
214
|
self._embed_cache: OrderedDict[str, np.ndarray] = OrderedDict()
|
|
194
|
-
self._cache_max_size =
|
|
215
|
+
self._cache_max_size = cache_max_size
|
|
195
216
|
self._cache_lock = threading.Lock()
|
|
196
217
|
|
|
197
218
|
# Determine if using OpenAI
|
|
@@ -203,6 +224,23 @@ class EmbeddingService:
|
|
|
203
224
|
"OpenAI API key required for OpenAI embedding models"
|
|
204
225
|
)
|
|
205
226
|
|
|
227
|
+
# Circuit breaker for OpenAI API calls (optional)
|
|
228
|
+
if circuit_breaker is not None:
|
|
229
|
+
self._circuit_breaker: CircuitBreaker | None = circuit_breaker
|
|
230
|
+
elif circuit_breaker_enabled and self.use_openai:
|
|
231
|
+
self._circuit_breaker = CircuitBreaker(
|
|
232
|
+
failure_threshold=circuit_breaker_failure_threshold,
|
|
233
|
+
reset_timeout=circuit_breaker_reset_timeout,
|
|
234
|
+
name=f"embedding_service_{model_name}",
|
|
235
|
+
)
|
|
236
|
+
logger.info(
|
|
237
|
+
f"Circuit breaker enabled for embedding service "
|
|
238
|
+
f"(threshold={circuit_breaker_failure_threshold}, "
|
|
239
|
+
f"timeout={circuit_breaker_reset_timeout}s)"
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
self._circuit_breaker = None
|
|
243
|
+
|
|
206
244
|
def _load_local_model(self) -> None:
|
|
207
245
|
"""Load local sentence-transformers model with ONNX or PyTorch backend."""
|
|
208
246
|
if self._model is not None:
|
|
@@ -273,8 +311,11 @@ class EmbeddingService:
|
|
|
273
311
|
raise EmbeddingError(f"Failed to initialize OpenAI client: {masked_error}") from e
|
|
274
312
|
|
|
275
313
|
def _get_cache_key(self, text: str) -> str:
|
|
276
|
-
"""Generate cache key from text content.
|
|
277
|
-
|
|
314
|
+
"""Generate cache key from text content.
|
|
315
|
+
|
|
316
|
+
Uses MD5 for speed (not security) - collisions are acceptable for cache.
|
|
317
|
+
"""
|
|
318
|
+
return hashlib.md5(text.encode(), usedforsecurity=False).hexdigest()
|
|
278
319
|
|
|
279
320
|
@property
|
|
280
321
|
def dimensions(self) -> int:
|
|
@@ -300,6 +341,26 @@ class EmbeddingService:
|
|
|
300
341
|
self._load_local_model()
|
|
301
342
|
return self._active_backend or "pytorch"
|
|
302
343
|
|
|
344
|
+
@property
|
|
345
|
+
def circuit_state(self) -> CircuitState | None:
|
|
346
|
+
"""Get the current circuit breaker state.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
CircuitState if circuit breaker is enabled, None otherwise.
|
|
350
|
+
"""
|
|
351
|
+
if self._circuit_breaker is None:
|
|
352
|
+
return None
|
|
353
|
+
return self._circuit_breaker.state
|
|
354
|
+
|
|
355
|
+
@property
|
|
356
|
+
def circuit_breaker(self) -> CircuitBreaker | None:
|
|
357
|
+
"""Get the circuit breaker instance.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
CircuitBreaker if enabled, None otherwise.
|
|
361
|
+
"""
|
|
362
|
+
return self._circuit_breaker
|
|
363
|
+
|
|
303
364
|
def embed(self, text: str) -> np.ndarray:
|
|
304
365
|
"""Generate embedding for a single text.
|
|
305
366
|
|
|
@@ -320,27 +381,31 @@ class EmbeddingService:
|
|
|
320
381
|
|
|
321
382
|
# Generate embedding (outside lock to allow concurrent generation)
|
|
322
383
|
if self.use_openai:
|
|
323
|
-
embedding = self.
|
|
384
|
+
embedding = self._embed_openai_with_circuit_breaker([text])[0]
|
|
324
385
|
else:
|
|
325
386
|
embedding = self._embed_local([text])[0]
|
|
326
387
|
|
|
327
|
-
# Cache the result
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
self._embed_cache.
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
388
|
+
# Cache the result (if caching enabled)
|
|
389
|
+
if self._cache_max_size > 0:
|
|
390
|
+
with self._cache_lock:
|
|
391
|
+
# Check if another thread already cached it
|
|
392
|
+
if cache_key not in self._embed_cache:
|
|
393
|
+
# Evict oldest entries if at capacity
|
|
394
|
+
while len(self._embed_cache) >= self._cache_max_size:
|
|
395
|
+
self._embed_cache.popitem(last=False)
|
|
396
|
+
self._embed_cache[cache_key] = embedding.copy()
|
|
397
|
+
else:
|
|
398
|
+
# Another thread cached it, move to end
|
|
399
|
+
self._embed_cache.move_to_end(cache_key)
|
|
338
400
|
|
|
339
401
|
return embedding
|
|
340
402
|
|
|
341
403
|
def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
|
|
342
404
|
"""Generate embeddings for multiple texts.
|
|
343
405
|
|
|
406
|
+
Uses cache for already-embedded texts and only generates
|
|
407
|
+
embeddings for texts not in cache.
|
|
408
|
+
|
|
344
409
|
Args:
|
|
345
410
|
texts: List of texts to embed.
|
|
346
411
|
|
|
@@ -351,10 +416,47 @@ class EmbeddingService:
|
|
|
351
416
|
logger.debug("embed_batch called with empty input, returning empty list")
|
|
352
417
|
return []
|
|
353
418
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
419
|
+
# If caching disabled, generate all embeddings directly
|
|
420
|
+
if self._cache_max_size <= 0:
|
|
421
|
+
if self.use_openai:
|
|
422
|
+
return self._embed_openai_with_circuit_breaker(texts)
|
|
423
|
+
else:
|
|
424
|
+
return self._embed_local(texts)
|
|
425
|
+
|
|
426
|
+
# Check cache for each text
|
|
427
|
+
results: list[np.ndarray | None] = [None] * len(texts)
|
|
428
|
+
texts_to_embed: list[tuple[int, str]] = [] # (index, text)
|
|
429
|
+
|
|
430
|
+
with self._cache_lock:
|
|
431
|
+
for i, text in enumerate(texts):
|
|
432
|
+
cache_key = self._get_cache_key(text)
|
|
433
|
+
if cache_key in self._embed_cache:
|
|
434
|
+
self._embed_cache.move_to_end(cache_key)
|
|
435
|
+
results[i] = self._embed_cache[cache_key].copy()
|
|
436
|
+
else:
|
|
437
|
+
texts_to_embed.append((i, text))
|
|
438
|
+
|
|
439
|
+
# Generate embeddings for uncached texts
|
|
440
|
+
if texts_to_embed:
|
|
441
|
+
uncached_texts = [t for _, t in texts_to_embed]
|
|
442
|
+
if self.use_openai:
|
|
443
|
+
new_embeddings = self._embed_openai_with_circuit_breaker(uncached_texts)
|
|
444
|
+
else:
|
|
445
|
+
new_embeddings = self._embed_local(uncached_texts)
|
|
446
|
+
|
|
447
|
+
# Store results and cache them
|
|
448
|
+
with self._cache_lock:
|
|
449
|
+
for (idx, text), embedding in zip(texts_to_embed, new_embeddings):
|
|
450
|
+
results[idx] = embedding
|
|
451
|
+
cache_key = self._get_cache_key(text)
|
|
452
|
+
if cache_key not in self._embed_cache:
|
|
453
|
+
# Evict oldest entries if at capacity
|
|
454
|
+
while len(self._embed_cache) >= self._cache_max_size:
|
|
455
|
+
self._embed_cache.popitem(last=False)
|
|
456
|
+
self._embed_cache[cache_key] = embedding.copy()
|
|
457
|
+
|
|
458
|
+
# Type assertion - all results should be filled
|
|
459
|
+
return [r for r in results if r is not None]
|
|
358
460
|
|
|
359
461
|
def clear_cache(self) -> int:
|
|
360
462
|
"""Clear embedding cache. Returns number of entries cleared."""
|
|
@@ -387,6 +489,41 @@ class EmbeddingService:
|
|
|
387
489
|
masked_error = _mask_api_key(str(e))
|
|
388
490
|
raise EmbeddingError(f"Failed to generate embeddings: {masked_error}") from e
|
|
389
491
|
|
|
492
|
+
def _embed_openai_with_circuit_breaker(self, texts: list[str]) -> list[np.ndarray]:
|
|
493
|
+
"""Generate embeddings using OpenAI API with circuit breaker protection.
|
|
494
|
+
|
|
495
|
+
Wraps the OpenAI embedding call with a circuit breaker to prevent
|
|
496
|
+
cascading failures when the API is unavailable.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
texts: List of texts to embed.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
List of embedding vectors.
|
|
503
|
+
|
|
504
|
+
Raises:
|
|
505
|
+
EmbeddingError: If circuit is open or embedding generation fails.
|
|
506
|
+
"""
|
|
507
|
+
if self._circuit_breaker is None:
|
|
508
|
+
# No circuit breaker, call directly
|
|
509
|
+
return self._embed_openai(texts)
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
return self._circuit_breaker.call(self._embed_openai, texts)
|
|
513
|
+
except CircuitOpenError as e:
|
|
514
|
+
logger.warning(
|
|
515
|
+
f"Circuit breaker is open for embedding service, "
|
|
516
|
+
f"time until retry: {e.time_until_retry:.1f}s"
|
|
517
|
+
if e.time_until_retry is not None
|
|
518
|
+
else "Circuit breaker is open for embedding service"
|
|
519
|
+
)
|
|
520
|
+
raise EmbeddingError(
|
|
521
|
+
f"Embedding service temporarily unavailable (circuit open). "
|
|
522
|
+
f"Try again in {e.time_until_retry:.0f} seconds."
|
|
523
|
+
if e.time_until_retry is not None
|
|
524
|
+
else "Embedding service temporarily unavailable (circuit open)."
|
|
525
|
+
) from e
|
|
526
|
+
|
|
390
527
|
@retry_on_api_error(max_attempts=3, backoff=1.0)
|
|
391
528
|
def _embed_openai(self, texts: list[str]) -> list[np.ndarray]:
|
|
392
529
|
"""Generate embeddings using OpenAI API with retry logic.
|
spatial_memory/core/errors.py
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
"""Custom exceptions for Spatial Memory MCP Server."""
|
|
2
2
|
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def sanitize_path_for_error(path: str | Path) -> str:
|
|
7
|
+
"""Extract only the filename from a path for safe error messages.
|
|
8
|
+
|
|
9
|
+
Prevents leaking full system paths in error messages which could
|
|
10
|
+
expose sensitive directory structure information.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
path: Full path or filename.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Just the filename portion.
|
|
17
|
+
"""
|
|
18
|
+
if isinstance(path, Path):
|
|
19
|
+
return path.name
|
|
20
|
+
return Path(path).name
|
|
21
|
+
|
|
3
22
|
|
|
4
23
|
class SpatialMemoryError(Exception):
|
|
5
24
|
"""Base exception for all spatial memory errors."""
|
|
@@ -35,6 +54,37 @@ class StorageError(SpatialMemoryError):
|
|
|
35
54
|
pass
|
|
36
55
|
|
|
37
56
|
|
|
57
|
+
class PartialBatchInsertError(StorageError):
|
|
58
|
+
"""Raised when batch insert partially fails.
|
|
59
|
+
|
|
60
|
+
Provides information about which records were successfully inserted
|
|
61
|
+
before the failure, enabling recovery or rollback.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
message: str,
|
|
67
|
+
succeeded_ids: list[str],
|
|
68
|
+
total_requested: int,
|
|
69
|
+
failed_batch_index: int | None = None,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Initialize with details about partial failure.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
message: Error description.
|
|
75
|
+
succeeded_ids: IDs of successfully inserted records.
|
|
76
|
+
total_requested: Total number of records requested to insert.
|
|
77
|
+
failed_batch_index: Index of the batch that failed (if batched).
|
|
78
|
+
"""
|
|
79
|
+
self.succeeded_ids = succeeded_ids
|
|
80
|
+
self.total_requested = total_requested
|
|
81
|
+
self.failed_batch_index = failed_batch_index
|
|
82
|
+
super().__init__(
|
|
83
|
+
f"{message}. "
|
|
84
|
+
f"Inserted {len(succeeded_ids)}/{total_requested} records before failure."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
38
88
|
class ValidationError(SpatialMemoryError):
|
|
39
89
|
"""Raised when input validation fails."""
|
|
40
90
|
|
|
@@ -141,6 +191,10 @@ class PathSecurityError(SpatialMemoryError):
|
|
|
141
191
|
- Path outside allowed directories
|
|
142
192
|
- Symlink to disallowed location
|
|
143
193
|
- Invalid file extension
|
|
194
|
+
|
|
195
|
+
Note:
|
|
196
|
+
Error messages only include the filename, not the full path,
|
|
197
|
+
to avoid leaking system directory structure.
|
|
144
198
|
"""
|
|
145
199
|
|
|
146
200
|
def __init__(
|
|
@@ -151,12 +205,18 @@ class PathSecurityError(SpatialMemoryError):
|
|
|
151
205
|
) -> None:
|
|
152
206
|
self.path = path
|
|
153
207
|
self.violation_type = violation_type
|
|
154
|
-
|
|
208
|
+
safe_name = sanitize_path_for_error(path)
|
|
209
|
+
self.message = message or f"Path security violation ({violation_type}): {safe_name}"
|
|
155
210
|
super().__init__(self.message)
|
|
156
211
|
|
|
157
212
|
|
|
158
213
|
class FileSizeLimitError(SpatialMemoryError):
|
|
159
|
-
"""Raised when a file exceeds size limits.
|
|
214
|
+
"""Raised when a file exceeds size limits.
|
|
215
|
+
|
|
216
|
+
Note:
|
|
217
|
+
Error messages only include the filename, not the full path,
|
|
218
|
+
to avoid leaking system directory structure.
|
|
219
|
+
"""
|
|
160
220
|
|
|
161
221
|
def __init__(
|
|
162
222
|
self,
|
|
@@ -169,8 +229,9 @@ class FileSizeLimitError(SpatialMemoryError):
|
|
|
169
229
|
self.max_size_bytes = max_size_bytes
|
|
170
230
|
actual_mb = actual_size_bytes / (1024 * 1024)
|
|
171
231
|
max_mb = max_size_bytes / (1024 * 1024)
|
|
232
|
+
safe_name = sanitize_path_for_error(path)
|
|
172
233
|
super().__init__(
|
|
173
|
-
f"File exceeds size limit: {
|
|
234
|
+
f"File exceeds size limit: {safe_name} is {actual_mb:.2f}MB "
|
|
174
235
|
f"(max: {max_mb:.2f}MB)"
|
|
175
236
|
)
|
|
176
237
|
|
|
@@ -243,3 +304,14 @@ class FileLockError(SpatialMemoryError):
|
|
|
243
304
|
self.timeout = timeout
|
|
244
305
|
self.message = message or f"Failed to acquire file lock at {lock_path} after {timeout}s"
|
|
245
306
|
super().__init__(self.message)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# =============================================================================
|
|
310
|
+
# Migration Error
|
|
311
|
+
# =============================================================================
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class MigrationError(SpatialMemoryError):
|
|
315
|
+
"""Raised when a database migration fails."""
|
|
316
|
+
|
|
317
|
+
pass
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Filesystem detection utilities for identifying network filesystems.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to detect if a path is on a network filesystem
|
|
4
|
+
(NFS, SMB/CIFS) where file-based locking may not work reliably.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import platform
|
|
12
|
+
import subprocess
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FilesystemType(Enum):
|
|
20
|
+
"""Types of filesystems that can be detected."""
|
|
21
|
+
|
|
22
|
+
LOCAL = "local"
|
|
23
|
+
NFS = "nfs"
|
|
24
|
+
SMB = "smb"
|
|
25
|
+
CIFS = "cifs"
|
|
26
|
+
NETWORK_UNKNOWN = "network_unknown"
|
|
27
|
+
UNKNOWN = "unknown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def detect_filesystem_type(path: Path) -> FilesystemType:
|
|
31
|
+
"""Detect the filesystem type for a given path.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
path: Path to check. Will resolve to absolute path.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
FilesystemType indicating the detected filesystem.
|
|
38
|
+
Returns LOCAL for local filesystems, specific types for
|
|
39
|
+
network filesystems, or UNKNOWN if detection fails.
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
resolved = path.resolve()
|
|
43
|
+
|
|
44
|
+
if platform.system() == "Windows":
|
|
45
|
+
return _detect_windows(resolved)
|
|
46
|
+
else:
|
|
47
|
+
return _detect_unix(resolved)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.debug(f"Filesystem detection failed for {path}: {e}")
|
|
50
|
+
return FilesystemType.UNKNOWN
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _detect_windows(path: Path) -> FilesystemType:
|
|
54
|
+
"""Detect filesystem type on Windows.
|
|
55
|
+
|
|
56
|
+
Uses GetDriveTypeW to check if drive is remote.
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
import ctypes
|
|
60
|
+
|
|
61
|
+
# Get the drive letter (e.g., "C:\\")
|
|
62
|
+
drive = str(path)[:3] if len(str(path)) >= 3 else str(path)
|
|
63
|
+
|
|
64
|
+
# Ensure it ends with backslash for GetDriveTypeW
|
|
65
|
+
if not drive.endswith("\\"):
|
|
66
|
+
drive = drive + "\\"
|
|
67
|
+
|
|
68
|
+
# DRIVE_REMOTE = 4
|
|
69
|
+
drive_type = ctypes.windll.kernel32.GetDriveTypeW(drive)
|
|
70
|
+
|
|
71
|
+
if drive_type == 4: # DRIVE_REMOTE
|
|
72
|
+
logger.debug(f"Detected remote drive: {drive}")
|
|
73
|
+
return FilesystemType.NETWORK_UNKNOWN
|
|
74
|
+
else:
|
|
75
|
+
return FilesystemType.LOCAL
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.debug(f"Windows filesystem detection failed: {e}")
|
|
79
|
+
return FilesystemType.UNKNOWN
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _detect_unix(path: Path) -> FilesystemType:
|
|
83
|
+
"""Detect filesystem type on Unix-like systems.
|
|
84
|
+
|
|
85
|
+
Uses 'df -T' or 'mount' to determine filesystem type.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
# Try using df -T first (more portable)
|
|
89
|
+
result = subprocess.run(
|
|
90
|
+
["df", "-T", str(path)],
|
|
91
|
+
capture_output=True,
|
|
92
|
+
text=True,
|
|
93
|
+
timeout=5,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if result.returncode == 0:
|
|
97
|
+
output = result.stdout.lower()
|
|
98
|
+
# Check for common network filesystem types
|
|
99
|
+
if "nfs" in output:
|
|
100
|
+
return FilesystemType.NFS
|
|
101
|
+
if "cifs" in output:
|
|
102
|
+
return FilesystemType.CIFS
|
|
103
|
+
if "smb" in output:
|
|
104
|
+
return FilesystemType.SMB
|
|
105
|
+
if "fuse.sshfs" in output:
|
|
106
|
+
return FilesystemType.NETWORK_UNKNOWN
|
|
107
|
+
# If none of the above, assume local
|
|
108
|
+
return FilesystemType.LOCAL
|
|
109
|
+
|
|
110
|
+
except subprocess.TimeoutExpired:
|
|
111
|
+
logger.debug("df command timed out - may indicate network filesystem issue")
|
|
112
|
+
return FilesystemType.NETWORK_UNKNOWN
|
|
113
|
+
except FileNotFoundError:
|
|
114
|
+
# df not available, try alternative
|
|
115
|
+
pass
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.debug(f"df command failed: {e}")
|
|
118
|
+
|
|
119
|
+
# Fallback: try reading /proc/mounts on Linux
|
|
120
|
+
try:
|
|
121
|
+
if os.path.exists("/proc/mounts"):
|
|
122
|
+
with open("/proc/mounts") as f:
|
|
123
|
+
mounts = f.read().lower()
|
|
124
|
+
path_str = str(path).lower()
|
|
125
|
+
# Find the mount point for this path
|
|
126
|
+
for line in mounts.split("\n"):
|
|
127
|
+
parts = line.split()
|
|
128
|
+
if len(parts) >= 3:
|
|
129
|
+
mount_point = parts[1]
|
|
130
|
+
fs_type = parts[2]
|
|
131
|
+
if path_str.startswith(mount_point):
|
|
132
|
+
if "nfs" in fs_type:
|
|
133
|
+
return FilesystemType.NFS
|
|
134
|
+
if "cifs" in fs_type or "smb" in fs_type:
|
|
135
|
+
return FilesystemType.SMB
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.debug(f"/proc/mounts check failed: {e}")
|
|
138
|
+
|
|
139
|
+
return FilesystemType.LOCAL
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def is_network_filesystem(path: Path) -> bool:
|
|
143
|
+
"""Check if a path is on a network filesystem.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
path: Path to check.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
True if the path appears to be on a network filesystem
|
|
150
|
+
(NFS, SMB, CIFS, or unknown network type).
|
|
151
|
+
"""
|
|
152
|
+
fs_type = detect_filesystem_type(path)
|
|
153
|
+
return fs_type in (
|
|
154
|
+
FilesystemType.NFS,
|
|
155
|
+
FilesystemType.SMB,
|
|
156
|
+
FilesystemType.CIFS,
|
|
157
|
+
FilesystemType.NETWORK_UNKNOWN,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_filesystem_warning_message(fs_type: FilesystemType, path: Path) -> str:
|
|
162
|
+
"""Generate a warning message for network filesystem detection.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
fs_type: The detected filesystem type.
|
|
166
|
+
path: The path that was checked.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A warning message string explaining the risks.
|
|
170
|
+
"""
|
|
171
|
+
return (
|
|
172
|
+
f"WARNING: Storage path appears to be on a network filesystem ({fs_type.value}). "
|
|
173
|
+
f"Path: {path}\n"
|
|
174
|
+
f"File-based locking does not work reliably on network filesystems. "
|
|
175
|
+
f"Running multiple instances against this storage may cause data corruption. "
|
|
176
|
+
f"To suppress this warning, set SPATIAL_MEMORY_ACKNOWLEDGE_NETWORK_FS_RISK=true "
|
|
177
|
+
f"or use a local filesystem path."
|
|
178
|
+
)
|