spatial-memory-mcp 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spatial_memory/__init__.py +97 -0
- spatial_memory/__main__.py +271 -0
- spatial_memory/adapters/__init__.py +7 -0
- spatial_memory/adapters/lancedb_repository.py +880 -0
- spatial_memory/config.py +769 -0
- spatial_memory/core/__init__.py +118 -0
- spatial_memory/core/cache.py +317 -0
- spatial_memory/core/circuit_breaker.py +297 -0
- spatial_memory/core/connection_pool.py +220 -0
- spatial_memory/core/consolidation_strategies.py +401 -0
- spatial_memory/core/database.py +3072 -0
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +576 -0
- spatial_memory/core/db_migrations.py +588 -0
- spatial_memory/core/db_search.py +512 -0
- spatial_memory/core/db_versioning.py +178 -0
- spatial_memory/core/embeddings.py +558 -0
- spatial_memory/core/errors.py +317 -0
- spatial_memory/core/file_security.py +701 -0
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/health.py +289 -0
- spatial_memory/core/helpers.py +79 -0
- spatial_memory/core/import_security.py +433 -0
- spatial_memory/core/lifecycle_ops.py +1067 -0
- spatial_memory/core/logging.py +194 -0
- spatial_memory/core/metrics.py +192 -0
- spatial_memory/core/models.py +660 -0
- spatial_memory/core/rate_limiter.py +326 -0
- spatial_memory/core/response_types.py +500 -0
- spatial_memory/core/security.py +588 -0
- spatial_memory/core/spatial_ops.py +430 -0
- spatial_memory/core/tracing.py +300 -0
- spatial_memory/core/utils.py +110 -0
- spatial_memory/core/validation.py +406 -0
- spatial_memory/factory.py +444 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/__init__.py +11 -0
- spatial_memory/ports/repositories.py +630 -0
- spatial_memory/py.typed +0 -0
- spatial_memory/server.py +1214 -0
- spatial_memory/services/__init__.py +70 -0
- spatial_memory/services/decay_manager.py +411 -0
- spatial_memory/services/export_import.py +1031 -0
- spatial_memory/services/lifecycle.py +1139 -0
- spatial_memory/services/memory.py +412 -0
- spatial_memory/services/spatial.py +1152 -0
- spatial_memory/services/utility.py +429 -0
- spatial_memory/tools/__init__.py +5 -0
- spatial_memory/tools/definitions.py +695 -0
- spatial_memory/verify.py +140 -0
- spatial_memory_mcp-1.9.1.dist-info/METADATA +509 -0
- spatial_memory_mcp-1.9.1.dist-info/RECORD +55 -0
- spatial_memory_mcp-1.9.1.dist-info/WHEEL +4 -0
- spatial_memory_mcp-1.9.1.dist-info/entry_points.txt +2 -0
- spatial_memory_mcp-1.9.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""Centralized input validation for Spatial Memory MCP.
|
|
2
|
+
|
|
3
|
+
This module consolidates all validation logic from database.py and memory.py
|
|
4
|
+
to provide a single source of truth for input validation.
|
|
5
|
+
|
|
6
|
+
Security features:
|
|
7
|
+
- SQL injection prevention through pattern matching and escaping
|
|
8
|
+
- UUID format validation
|
|
9
|
+
- Content length validation
|
|
10
|
+
- Tag format and count validation
|
|
11
|
+
- Metadata size and serializability validation
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
import uuid
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from spatial_memory.core.errors import ValidationError
|
|
22
|
+
|
|
23
|
+
# Content validation constants
|
|
24
|
+
MAX_CONTENT_LENGTH = 100_000 # 100KB of text
|
|
25
|
+
|
|
26
|
+
# Tag validation constants
|
|
27
|
+
MAX_TAGS = 100 # Maximum number of tags per memory
|
|
28
|
+
MAX_TAG_LENGTH = 50 # Maximum length of a single tag
|
|
29
|
+
|
|
30
|
+
# Metadata validation constants
|
|
31
|
+
MAX_METADATA_SIZE = 65536 # 64KB serialized JSON
|
|
32
|
+
|
|
33
|
+
# Namespace validation pattern
|
|
34
|
+
# Must start with letter, followed by letters/numbers/dash/underscore, max 63 chars
|
|
35
|
+
NAMESPACE_PATTERN = re.compile(r"^[a-zA-Z][a-zA-Z0-9_-]{0,62}$")
|
|
36
|
+
|
|
37
|
+
# Tag validation pattern
|
|
38
|
+
# Must start with letter or number, followed by letters/numbers/dash/underscore/dot, max 50 chars
|
|
39
|
+
TAG_PATTERN = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\-.]{0,49}$")
|
|
40
|
+
|
|
41
|
+
# Dangerous SQL patterns for injection prevention
|
|
42
|
+
DANGEROUS_PATTERNS = [
|
|
43
|
+
r";\s*(?:DROP|DELETE|UPDATE|INSERT|ALTER|CREATE|TRUNCATE)",
|
|
44
|
+
r"--\s*$",
|
|
45
|
+
r"/\*.*\*/",
|
|
46
|
+
r"'\s*OR\s*'",
|
|
47
|
+
r"'\s*AND\s*'",
|
|
48
|
+
r"'\s*UNION\s+(?:ALL\s+)?SELECT",
|
|
49
|
+
# Additional patterns for stored procedures and timing attacks
|
|
50
|
+
r";\s*EXEC(?:UTE)?\s", # EXEC/EXECUTE stored procedures
|
|
51
|
+
r"WAITFOR\s+DELAY", # Time-based SQL injection
|
|
52
|
+
r"(?:xp_|sp_)\w+", # SQL Server stored procedures
|
|
53
|
+
r"0x[0-9a-fA-F]+", # Hex-encoded strings
|
|
54
|
+
r"BENCHMARK\s*\(", # MySQL timing attack
|
|
55
|
+
r"SLEEP\s*\(", # MySQL/PostgreSQL sleep
|
|
56
|
+
r"PG_SLEEP\s*\(", # PostgreSQL specific
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def validate_uuid(value: str) -> str:
|
|
61
|
+
"""Validate and return a UUID string.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
value: The value to validate as a UUID.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The validated UUID string.
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ValidationError: If the value is not a valid UUID format.
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
>>> validate_uuid("550e8400-e29b-41d4-a716-446655440000")
|
|
74
|
+
'550e8400-e29b-41d4-a716-446655440000'
|
|
75
|
+
>>> validate_uuid("not-a-uuid")
|
|
76
|
+
Traceback (most recent call last):
|
|
77
|
+
...
|
|
78
|
+
ValidationError: Invalid UUID format: not-a-uuid
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
# Attempt to parse as UUID to validate format
|
|
82
|
+
uuid.UUID(value)
|
|
83
|
+
return value
|
|
84
|
+
except (ValueError, AttributeError) as e:
|
|
85
|
+
raise ValidationError(f"Invalid UUID format: {value}") from e
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_namespace(namespace: str) -> str:
|
|
89
|
+
"""Validate namespace format.
|
|
90
|
+
|
|
91
|
+
Namespaces must:
|
|
92
|
+
- Start with a letter (a-z, A-Z)
|
|
93
|
+
- Contain only letters, numbers, dash, or underscore
|
|
94
|
+
- Be between 1-63 characters (DNS label compatible)
|
|
95
|
+
- Not be empty
|
|
96
|
+
|
|
97
|
+
Uses NAMESPACE_PATTERN for consistent validation across the codebase.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
namespace: The namespace to validate.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The validated namespace string.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
ValidationError: If the namespace is invalid.
|
|
107
|
+
|
|
108
|
+
Examples:
|
|
109
|
+
>>> validate_namespace("default")
|
|
110
|
+
'default'
|
|
111
|
+
>>> validate_namespace("my-namespace_v1")
|
|
112
|
+
'my-namespace_v1'
|
|
113
|
+
>>> validate_namespace("Projects")
|
|
114
|
+
'Projects'
|
|
115
|
+
>>> validate_namespace("")
|
|
116
|
+
Traceback (most recent call last):
|
|
117
|
+
...
|
|
118
|
+
ValidationError: Namespace cannot be empty
|
|
119
|
+
"""
|
|
120
|
+
if not namespace:
|
|
121
|
+
raise ValidationError("Namespace cannot be empty")
|
|
122
|
+
|
|
123
|
+
# Use canonical NAMESPACE_PATTERN for consistent validation
|
|
124
|
+
if not NAMESPACE_PATTERN.match(namespace):
|
|
125
|
+
raise ValidationError(
|
|
126
|
+
f"Invalid namespace format: {namespace}. "
|
|
127
|
+
"Must start with a letter, contain only letters/numbers/dash/underscore, "
|
|
128
|
+
"and be max 63 characters."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return namespace
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def validate_content(content: str) -> None:
|
|
135
|
+
"""Validate memory content.
|
|
136
|
+
|
|
137
|
+
Content must:
|
|
138
|
+
- Not be empty or whitespace-only
|
|
139
|
+
- Not exceed MAX_CONTENT_LENGTH characters
|
|
140
|
+
|
|
141
|
+
Security Note:
|
|
142
|
+
Content is NOT validated for SQL injection patterns because:
|
|
143
|
+
1. All database operations use parameterized queries (LanceDB's PyArrow-based API)
|
|
144
|
+
2. Content is never interpolated into SQL strings
|
|
145
|
+
3. LanceDB filter expressions use a separate DSL with proper escaping
|
|
146
|
+
|
|
147
|
+
This approach follows the principle of defense-in-depth: input validation
|
|
148
|
+
catches obvious issues, but the primary protection is parameterized queries.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
content: Content to validate.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
ValidationError: If content is empty, whitespace-only, or too long.
|
|
155
|
+
|
|
156
|
+
Examples:
|
|
157
|
+
>>> validate_content("This is valid content")
|
|
158
|
+
>>> validate_content("")
|
|
159
|
+
Traceback (most recent call last):
|
|
160
|
+
...
|
|
161
|
+
ValidationError: Content cannot be empty
|
|
162
|
+
>>> validate_content("x" * 100001)
|
|
163
|
+
Traceback (most recent call last):
|
|
164
|
+
...
|
|
165
|
+
ValidationError: Content exceeds maximum length...
|
|
166
|
+
"""
|
|
167
|
+
if not content or not content.strip():
|
|
168
|
+
raise ValidationError("Content cannot be empty")
|
|
169
|
+
|
|
170
|
+
if len(content) > MAX_CONTENT_LENGTH:
|
|
171
|
+
raise ValidationError(
|
|
172
|
+
f"Content exceeds maximum length of {MAX_CONTENT_LENGTH} characters "
|
|
173
|
+
f"(got {len(content)} characters)"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def validate_importance(importance: float) -> None:
|
|
178
|
+
"""Validate importance value (0.0-1.0).
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
importance: Importance to validate.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
ValidationError: If importance is out of range.
|
|
185
|
+
|
|
186
|
+
Examples:
|
|
187
|
+
>>> validate_importance(0.5)
|
|
188
|
+
>>> validate_importance(1.5)
|
|
189
|
+
Traceback (most recent call last):
|
|
190
|
+
...
|
|
191
|
+
ValidationError: Importance must be between 0.0 and 1.0
|
|
192
|
+
"""
|
|
193
|
+
if not 0.0 <= importance <= 1.0:
|
|
194
|
+
raise ValidationError("Importance must be between 0.0 and 1.0")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def validate_tags(tags: list[str] | None) -> list[str]:
|
|
198
|
+
"""Validate and return tags list.
|
|
199
|
+
|
|
200
|
+
Tags must:
|
|
201
|
+
- Start with a letter or number
|
|
202
|
+
- Contain only letters, numbers, dash, or underscore
|
|
203
|
+
- Be between 1-50 characters each
|
|
204
|
+
- Have at most MAX_TAGS total tags
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
tags: List of tags to validate (None is treated as empty list).
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Validated tags list (empty list if None was provided).
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
ValidationError: If tags are invalid.
|
|
214
|
+
|
|
215
|
+
Examples:
|
|
216
|
+
>>> validate_tags(["tag1", "tag2"])
|
|
217
|
+
['tag1', 'tag2']
|
|
218
|
+
>>> validate_tags(None)
|
|
219
|
+
[]
|
|
220
|
+
>>> validate_tags(["invalid tag"])
|
|
221
|
+
Traceback (most recent call last):
|
|
222
|
+
...
|
|
223
|
+
ValidationError: Invalid tag format...
|
|
224
|
+
"""
|
|
225
|
+
if tags is None:
|
|
226
|
+
return []
|
|
227
|
+
|
|
228
|
+
if len(tags) > MAX_TAGS:
|
|
229
|
+
raise ValidationError(f"Maximum {MAX_TAGS} tags allowed, got {len(tags)}")
|
|
230
|
+
|
|
231
|
+
validated = []
|
|
232
|
+
for tag in tags:
|
|
233
|
+
# Must be a string
|
|
234
|
+
if not isinstance(tag, str):
|
|
235
|
+
raise ValidationError(f"Tag must be a string, got {type(tag).__name__}")
|
|
236
|
+
|
|
237
|
+
# Must match pattern: start with letter/number, alphanumeric with dash/underscore/dot
|
|
238
|
+
if not TAG_PATTERN.match(tag):
|
|
239
|
+
raise ValidationError(
|
|
240
|
+
f"Invalid tag format: '{tag}'. Tags must be 1-{MAX_TAG_LENGTH} characters, "
|
|
241
|
+
"start with letter or number, and contain only letters, numbers, dash, "
|
|
242
|
+
"underscore, or dot."
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
validated.append(tag)
|
|
246
|
+
|
|
247
|
+
return validated
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# Metadata validation constants
|
|
251
|
+
MAX_METADATA_DEPTH = 10 # Maximum nesting depth for metadata
|
|
252
|
+
MAX_METADATA_KEY_LENGTH = 128 # Maximum length for metadata keys
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def validate_metadata(
|
|
256
|
+
metadata: dict[str, Any] | None,
|
|
257
|
+
max_depth: int | None = None,
|
|
258
|
+
validate_keys: bool = True,
|
|
259
|
+
) -> dict[str, Any]:
|
|
260
|
+
"""Validate and return metadata dict.
|
|
261
|
+
|
|
262
|
+
Metadata must:
|
|
263
|
+
- Be a dictionary
|
|
264
|
+
- Be JSON-serializable
|
|
265
|
+
- Not exceed MAX_METADATA_SIZE bytes when serialized
|
|
266
|
+
- Not exceed max_depth nesting levels (if specified)
|
|
267
|
+
- Have keys that are valid identifiers (if validate_keys=True)
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
metadata: Metadata dictionary to validate (None is treated as empty dict).
|
|
271
|
+
max_depth: Maximum nesting depth (default: MAX_METADATA_DEPTH).
|
|
272
|
+
Set to None to disable depth checking.
|
|
273
|
+
validate_keys: Whether to validate key format (default: True).
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Validated metadata dictionary (empty dict if None was provided).
|
|
277
|
+
|
|
278
|
+
Raises:
|
|
279
|
+
ValidationError: If metadata is invalid.
|
|
280
|
+
|
|
281
|
+
Examples:
|
|
282
|
+
>>> validate_metadata({"key": "value"})
|
|
283
|
+
{'key': 'value'}
|
|
284
|
+
>>> validate_metadata(None)
|
|
285
|
+
{}
|
|
286
|
+
>>> validate_metadata("not a dict")
|
|
287
|
+
Traceback (most recent call last):
|
|
288
|
+
...
|
|
289
|
+
ValidationError: Metadata must be a dictionary...
|
|
290
|
+
"""
|
|
291
|
+
if metadata is None:
|
|
292
|
+
return {}
|
|
293
|
+
|
|
294
|
+
if not isinstance(metadata, dict):
|
|
295
|
+
raise ValidationError(f"Metadata must be a dictionary, got {type(metadata).__name__}")
|
|
296
|
+
|
|
297
|
+
# Check nesting depth and key format
|
|
298
|
+
effective_max_depth = max_depth if max_depth is not None else MAX_METADATA_DEPTH
|
|
299
|
+
_validate_metadata_structure(metadata, effective_max_depth, validate_keys, current_depth=0)
|
|
300
|
+
|
|
301
|
+
# Check serialized size (max 64KB)
|
|
302
|
+
try:
|
|
303
|
+
serialized = json.dumps(metadata)
|
|
304
|
+
if len(serialized) > MAX_METADATA_SIZE:
|
|
305
|
+
raise ValidationError(
|
|
306
|
+
f"Metadata exceeds 64KB limit ({len(serialized)} bytes)"
|
|
307
|
+
)
|
|
308
|
+
except (TypeError, ValueError) as e:
|
|
309
|
+
raise ValidationError(f"Metadata must be JSON-serializable: {e}") from e
|
|
310
|
+
|
|
311
|
+
return metadata
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _validate_metadata_structure(
|
|
315
|
+
value: Any,
|
|
316
|
+
max_depth: int,
|
|
317
|
+
validate_keys: bool,
|
|
318
|
+
current_depth: int,
|
|
319
|
+
path: str = "",
|
|
320
|
+
) -> None:
|
|
321
|
+
"""Recursively validate metadata structure.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
value: The value to validate.
|
|
325
|
+
max_depth: Maximum allowed nesting depth.
|
|
326
|
+
validate_keys: Whether to validate dictionary key format.
|
|
327
|
+
current_depth: Current nesting level.
|
|
328
|
+
path: Dot-separated path for error messages.
|
|
329
|
+
"""
|
|
330
|
+
if current_depth > max_depth:
|
|
331
|
+
raise ValidationError(
|
|
332
|
+
f"Metadata exceeds maximum nesting depth of {max_depth}"
|
|
333
|
+
+ (f" at '{path}'" if path else "")
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
if isinstance(value, dict):
|
|
337
|
+
for key, val in value.items():
|
|
338
|
+
# Validate key format
|
|
339
|
+
if validate_keys:
|
|
340
|
+
if not isinstance(key, str):
|
|
341
|
+
raise ValidationError(
|
|
342
|
+
f"Metadata keys must be strings, got {type(key).__name__}"
|
|
343
|
+
+ (f" at '{path}'" if path else "")
|
|
344
|
+
)
|
|
345
|
+
if len(key) > MAX_METADATA_KEY_LENGTH:
|
|
346
|
+
raise ValidationError(
|
|
347
|
+
f"Metadata key '{key[:50]}...' exceeds maximum length of "
|
|
348
|
+
f"{MAX_METADATA_KEY_LENGTH} characters"
|
|
349
|
+
)
|
|
350
|
+
if not key:
|
|
351
|
+
raise ValidationError(
|
|
352
|
+
"Metadata keys cannot be empty"
|
|
353
|
+
+ (f" at '{path}'" if path else "")
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Recurse into nested dicts/lists
|
|
357
|
+
new_path = f"{path}.{key}" if path else key
|
|
358
|
+
_validate_metadata_structure(
|
|
359
|
+
val, max_depth, validate_keys, current_depth + 1, new_path
|
|
360
|
+
)
|
|
361
|
+
elif isinstance(value, list):
|
|
362
|
+
for i, item in enumerate(value):
|
|
363
|
+
new_path = f"{path}[{i}]" if path else f"[{i}]"
|
|
364
|
+
_validate_metadata_structure(
|
|
365
|
+
item, max_depth, validate_keys, current_depth + 1, new_path
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def sanitize_string(value: str) -> str:
|
|
370
|
+
"""Sanitize string for safe SQL usage.
|
|
371
|
+
|
|
372
|
+
Prevents SQL injection by:
|
|
373
|
+
1. Validating input type
|
|
374
|
+
2. Detecting dangerous SQL patterns
|
|
375
|
+
3. Escaping single quotes
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
value: The string value to sanitize.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Sanitized string safe for use in filter expressions.
|
|
382
|
+
|
|
383
|
+
Raises:
|
|
384
|
+
ValidationError: If the value contains invalid characters or SQL injection patterns.
|
|
385
|
+
|
|
386
|
+
Examples:
|
|
387
|
+
>>> sanitize_string("hello")
|
|
388
|
+
'hello'
|
|
389
|
+
>>> sanitize_string("it's")
|
|
390
|
+
"it''s"
|
|
391
|
+
>>> sanitize_string("'; DROP TABLE users--")
|
|
392
|
+
Traceback (most recent call last):
|
|
393
|
+
...
|
|
394
|
+
ValidationError: Invalid characters in value...
|
|
395
|
+
"""
|
|
396
|
+
if not isinstance(value, str):
|
|
397
|
+
raise ValidationError(f"Expected string, got {type(value).__name__}")
|
|
398
|
+
|
|
399
|
+
# Check for dangerous SQL injection patterns
|
|
400
|
+
for pattern in DANGEROUS_PATTERNS:
|
|
401
|
+
if re.search(pattern, value, re.IGNORECASE):
|
|
402
|
+
# Only show first 50 chars in error to prevent log flooding
|
|
403
|
+
raise ValidationError(f"Invalid characters in value: {value[:50]}")
|
|
404
|
+
|
|
405
|
+
# Escape single quotes by doubling them (standard SQL escaping)
|
|
406
|
+
return value.replace("'", "''")
|