spatial-memory-mcp 1.0.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (39) hide show
  1. spatial_memory/__init__.py +97 -97
  2. spatial_memory/__main__.py +241 -2
  3. spatial_memory/adapters/lancedb_repository.py +74 -5
  4. spatial_memory/config.py +115 -2
  5. spatial_memory/core/__init__.py +35 -0
  6. spatial_memory/core/cache.py +317 -0
  7. spatial_memory/core/circuit_breaker.py +297 -0
  8. spatial_memory/core/connection_pool.py +41 -3
  9. spatial_memory/core/consolidation_strategies.py +402 -0
  10. spatial_memory/core/database.py +791 -769
  11. spatial_memory/core/db_idempotency.py +242 -0
  12. spatial_memory/core/db_indexes.py +575 -0
  13. spatial_memory/core/db_migrations.py +584 -0
  14. spatial_memory/core/db_search.py +509 -0
  15. spatial_memory/core/db_versioning.py +177 -0
  16. spatial_memory/core/embeddings.py +156 -19
  17. spatial_memory/core/errors.py +75 -3
  18. spatial_memory/core/filesystem.py +178 -0
  19. spatial_memory/core/logging.py +194 -103
  20. spatial_memory/core/models.py +4 -0
  21. spatial_memory/core/rate_limiter.py +326 -105
  22. spatial_memory/core/response_types.py +497 -0
  23. spatial_memory/core/tracing.py +300 -0
  24. spatial_memory/core/validation.py +403 -319
  25. spatial_memory/factory.py +407 -0
  26. spatial_memory/migrations/__init__.py +40 -0
  27. spatial_memory/ports/repositories.py +52 -2
  28. spatial_memory/server.py +329 -188
  29. spatial_memory/services/export_import.py +61 -43
  30. spatial_memory/services/lifecycle.py +397 -122
  31. spatial_memory/services/memory.py +81 -4
  32. spatial_memory/services/spatial.py +129 -46
  33. spatial_memory/tools/definitions.py +695 -671
  34. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
  35. spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
  36. spatial_memory_mcp-1.0.3.dist-info/RECORD +0 -41
  37. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
  38. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
  39. {spatial_memory_mcp-1.0.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,319 +1,403 @@
1
- """Centralized input validation for Spatial Memory MCP.
2
-
3
- This module consolidates all validation logic from database.py and memory.py
4
- to provide a single source of truth for input validation.
5
-
6
- Security features:
7
- - SQL injection prevention through pattern matching and escaping
8
- - UUID format validation
9
- - Content length validation
10
- - Tag format and count validation
11
- - Metadata size and serializability validation
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- import json
17
- import re
18
- import uuid
19
- from typing import Any
20
-
21
- from spatial_memory.core.errors import ValidationError
22
-
23
- # Content validation constants
24
- MAX_CONTENT_LENGTH = 100_000 # 100KB of text
25
-
26
- # Tag validation constants
27
- MAX_TAGS = 100 # Maximum number of tags per memory
28
- MAX_TAG_LENGTH = 50 # Maximum length of a single tag
29
-
30
- # Metadata validation constants
31
- MAX_METADATA_SIZE = 65536 # 64KB serialized JSON
32
-
33
- # Namespace validation pattern
34
- # Must start with letter, followed by letters/numbers/dash/underscore, max 63 chars
35
- NAMESPACE_PATTERN = re.compile(r"^[a-zA-Z][a-zA-Z0-9_-]{0,62}$")
36
-
37
- # Tag validation pattern
38
- # Must start with letter or number, followed by letters/numbers/dash/underscore, max 50 chars
39
- TAG_PATTERN = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_-]{0,49}$")
40
-
41
- # Dangerous SQL patterns for injection prevention
42
- DANGEROUS_PATTERNS = [
43
- r";\s*(?:DROP|DELETE|UPDATE|INSERT|ALTER|CREATE|TRUNCATE)",
44
- r"--\s*$",
45
- r"/\*.*\*/",
46
- r"'\s*OR\s*'",
47
- r"'\s*AND\s*'",
48
- r"'\s*UNION\s+(?:ALL\s+)?SELECT",
49
- # Additional patterns for stored procedures and timing attacks
50
- r";\s*EXEC(?:UTE)?\s", # EXEC/EXECUTE stored procedures
51
- r"WAITFOR\s+DELAY", # Time-based SQL injection
52
- r"(?:xp_|sp_)\w+", # SQL Server stored procedures
53
- r"0x[0-9a-fA-F]+", # Hex-encoded strings
54
- r"BENCHMARK\s*\(", # MySQL timing attack
55
- r"SLEEP\s*\(", # MySQL/PostgreSQL sleep
56
- r"PG_SLEEP\s*\(", # PostgreSQL specific
57
- ]
58
-
59
-
60
- def validate_uuid(value: str) -> str:
61
- """Validate and return a UUID string.
62
-
63
- Args:
64
- value: The value to validate as a UUID.
65
-
66
- Returns:
67
- The validated UUID string.
68
-
69
- Raises:
70
- ValidationError: If the value is not a valid UUID format.
71
-
72
- Examples:
73
- >>> validate_uuid("550e8400-e29b-41d4-a716-446655440000")
74
- '550e8400-e29b-41d4-a716-446655440000'
75
- >>> validate_uuid("not-a-uuid")
76
- Traceback (most recent call last):
77
- ...
78
- ValidationError: Invalid UUID format: not-a-uuid
79
- """
80
- try:
81
- # Attempt to parse as UUID to validate format
82
- uuid.UUID(value)
83
- return value
84
- except (ValueError, AttributeError) as e:
85
- raise ValidationError(f"Invalid UUID format: {value}") from e
86
-
87
-
88
- def validate_namespace(namespace: str) -> str:
89
- """Validate namespace format.
90
-
91
- Namespaces must:
92
- - Start with a letter
93
- - Contain only letters, numbers, dash, underscore, or dot
94
- - Be between 1-256 characters
95
- - Not be empty
96
-
97
- Args:
98
- namespace: The namespace to validate.
99
-
100
- Returns:
101
- The validated namespace string.
102
-
103
- Raises:
104
- ValidationError: If the namespace is invalid.
105
-
106
- Examples:
107
- >>> validate_namespace("default")
108
- 'default'
109
- >>> validate_namespace("my-namespace_v1.0")
110
- 'my-namespace_v1.0'
111
- >>> validate_namespace("")
112
- Traceback (most recent call last):
113
- ...
114
- ValidationError: Namespace cannot be empty
115
- """
116
- if not namespace:
117
- raise ValidationError("Namespace cannot be empty")
118
-
119
- if len(namespace) > 256:
120
- raise ValidationError("Namespace too long (max 256 characters)")
121
-
122
- # Allow alphanumeric, dash, underscore, dot
123
- if not re.match(r"^[\w\-\.]+$", namespace):
124
- raise ValidationError(f"Invalid namespace format: {namespace}")
125
-
126
- return namespace
127
-
128
-
129
- def validate_content(content: str) -> None:
130
- """Validate memory content.
131
-
132
- Content must:
133
- - Not be empty or whitespace-only
134
- - Not exceed MAX_CONTENT_LENGTH characters
135
-
136
- Args:
137
- content: Content to validate.
138
-
139
- Raises:
140
- ValidationError: If content is empty, whitespace-only, or too long.
141
-
142
- Examples:
143
- >>> validate_content("This is valid content")
144
- >>> validate_content("")
145
- Traceback (most recent call last):
146
- ...
147
- ValidationError: Content cannot be empty
148
- >>> validate_content("x" * 100001)
149
- Traceback (most recent call last):
150
- ...
151
- ValidationError: Content exceeds maximum length...
152
- """
153
- if not content or not content.strip():
154
- raise ValidationError("Content cannot be empty")
155
-
156
- if len(content) > MAX_CONTENT_LENGTH:
157
- raise ValidationError(
158
- f"Content exceeds maximum length of {MAX_CONTENT_LENGTH} characters "
159
- f"(got {len(content)} characters)"
160
- )
161
-
162
-
163
- def validate_importance(importance: float) -> None:
164
- """Validate importance value (0.0-1.0).
165
-
166
- Args:
167
- importance: Importance to validate.
168
-
169
- Raises:
170
- ValidationError: If importance is out of range.
171
-
172
- Examples:
173
- >>> validate_importance(0.5)
174
- >>> validate_importance(1.5)
175
- Traceback (most recent call last):
176
- ...
177
- ValidationError: Importance must be between 0.0 and 1.0
178
- """
179
- if not 0.0 <= importance <= 1.0:
180
- raise ValidationError("Importance must be between 0.0 and 1.0")
181
-
182
-
183
- def validate_tags(tags: list[str] | None) -> list[str]:
184
- """Validate and return tags list.
185
-
186
- Tags must:
187
- - Start with a letter or number
188
- - Contain only letters, numbers, dash, or underscore
189
- - Be between 1-50 characters each
190
- - Have at most MAX_TAGS total tags
191
-
192
- Args:
193
- tags: List of tags to validate (None is treated as empty list).
194
-
195
- Returns:
196
- Validated tags list (empty list if None was provided).
197
-
198
- Raises:
199
- ValidationError: If tags are invalid.
200
-
201
- Examples:
202
- >>> validate_tags(["tag1", "tag2"])
203
- ['tag1', 'tag2']
204
- >>> validate_tags(None)
205
- []
206
- >>> validate_tags(["invalid tag"])
207
- Traceback (most recent call last):
208
- ...
209
- ValidationError: Invalid tag format...
210
- """
211
- if tags is None:
212
- return []
213
-
214
- if len(tags) > MAX_TAGS:
215
- raise ValidationError(f"Maximum {MAX_TAGS} tags allowed, got {len(tags)}")
216
-
217
- validated = []
218
- for tag in tags:
219
- # Must be a string
220
- if not isinstance(tag, str):
221
- raise ValidationError(f"Tag must be a string, got {type(tag).__name__}")
222
-
223
- # Must match pattern: start with letter/number, alphanumeric with dash/underscore
224
- if not TAG_PATTERN.match(tag):
225
- raise ValidationError(
226
- f"Invalid tag format: '{tag}'. Tags must be 1-{MAX_TAG_LENGTH} characters, "
227
- "start with letter or number, and contain only letters, numbers, dash, "
228
- "or underscore."
229
- )
230
-
231
- validated.append(tag)
232
-
233
- return validated
234
-
235
-
236
- def validate_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
237
- """Validate and return metadata dict.
238
-
239
- Metadata must:
240
- - Be a dictionary
241
- - Be JSON-serializable
242
- - Not exceed MAX_METADATA_SIZE bytes when serialized
243
-
244
- Args:
245
- metadata: Metadata dictionary to validate (None is treated as empty dict).
246
-
247
- Returns:
248
- Validated metadata dictionary (empty dict if None was provided).
249
-
250
- Raises:
251
- ValidationError: If metadata is invalid.
252
-
253
- Examples:
254
- >>> validate_metadata({"key": "value"})
255
- {'key': 'value'}
256
- >>> validate_metadata(None)
257
- {}
258
- >>> validate_metadata("not a dict")
259
- Traceback (most recent call last):
260
- ...
261
- ValidationError: Metadata must be a dictionary...
262
- """
263
- if metadata is None:
264
- return {}
265
-
266
- if not isinstance(metadata, dict):
267
- raise ValidationError(f"Metadata must be a dictionary, got {type(metadata).__name__}")
268
-
269
- # Check serialized size (max 64KB)
270
- try:
271
- serialized = json.dumps(metadata)
272
- if len(serialized) > MAX_METADATA_SIZE:
273
- raise ValidationError(
274
- f"Metadata exceeds 64KB limit ({len(serialized)} bytes)"
275
- )
276
- except (TypeError, ValueError) as e:
277
- raise ValidationError(f"Metadata must be JSON-serializable: {e}") from e
278
-
279
- return metadata
280
-
281
-
282
- def sanitize_string(value: str) -> str:
283
- """Sanitize string for safe SQL usage.
284
-
285
- Prevents SQL injection by:
286
- 1. Validating input type
287
- 2. Detecting dangerous SQL patterns
288
- 3. Escaping single quotes
289
-
290
- Args:
291
- value: The string value to sanitize.
292
-
293
- Returns:
294
- Sanitized string safe for use in filter expressions.
295
-
296
- Raises:
297
- ValidationError: If the value contains invalid characters or SQL injection patterns.
298
-
299
- Examples:
300
- >>> sanitize_string("hello")
301
- 'hello'
302
- >>> sanitize_string("it's")
303
- "it''s"
304
- >>> sanitize_string("'; DROP TABLE users--")
305
- Traceback (most recent call last):
306
- ...
307
- ValidationError: Invalid characters in value...
308
- """
309
- if not isinstance(value, str):
310
- raise ValidationError(f"Expected string, got {type(value).__name__}")
311
-
312
- # Check for dangerous SQL injection patterns
313
- for pattern in DANGEROUS_PATTERNS:
314
- if re.search(pattern, value, re.IGNORECASE):
315
- # Only show first 50 chars in error to prevent log flooding
316
- raise ValidationError(f"Invalid characters in value: {value[:50]}")
317
-
318
- # Escape single quotes by doubling them (standard SQL escaping)
319
- return value.replace("'", "''")
1
+ """Centralized input validation for Spatial Memory MCP.
2
+
3
+ This module consolidates all validation logic from database.py and memory.py
4
+ to provide a single source of truth for input validation.
5
+
6
+ Security features:
7
+ - SQL injection prevention through pattern matching and escaping
8
+ - UUID format validation
9
+ - Content length validation
10
+ - Tag format and count validation
11
+ - Metadata size and serializability validation
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import re
18
+ import uuid
19
+ from typing import Any
20
+
21
+ from spatial_memory.core.errors import ValidationError
22
+
23
+ # Content validation constants
24
+ MAX_CONTENT_LENGTH = 100_000 # 100KB of text
25
+
26
+ # Tag validation constants
27
+ MAX_TAGS = 100 # Maximum number of tags per memory
28
+ MAX_TAG_LENGTH = 50 # Maximum length of a single tag
29
+
30
+ # Metadata validation constants
31
+ MAX_METADATA_SIZE = 65536 # 64KB serialized JSON
32
+
33
+ # Namespace validation pattern
34
+ # Must start with letter, followed by letters/numbers/dash/underscore, max 63 chars
35
+ NAMESPACE_PATTERN = re.compile(r"^[a-zA-Z][a-zA-Z0-9_-]{0,62}$")
36
+
37
+ # Tag validation pattern
38
+ # Must start with letter or number, followed by letters/numbers/dash/underscore/dot, max 50 chars
39
+ TAG_PATTERN = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\-.]{0,49}$")
40
+
41
+ # Dangerous SQL patterns for injection prevention
42
+ DANGEROUS_PATTERNS = [
43
+ r";\s*(?:DROP|DELETE|UPDATE|INSERT|ALTER|CREATE|TRUNCATE)",
44
+ r"--\s*$",
45
+ r"/\*.*\*/",
46
+ r"'\s*OR\s*'",
47
+ r"'\s*AND\s*'",
48
+ r"'\s*UNION\s+(?:ALL\s+)?SELECT",
49
+ # Additional patterns for stored procedures and timing attacks
50
+ r";\s*EXEC(?:UTE)?\s", # EXEC/EXECUTE stored procedures
51
+ r"WAITFOR\s+DELAY", # Time-based SQL injection
52
+ r"(?:xp_|sp_)\w+", # SQL Server stored procedures
53
+ r"0x[0-9a-fA-F]+", # Hex-encoded strings
54
+ r"BENCHMARK\s*\(", # MySQL timing attack
55
+ r"SLEEP\s*\(", # MySQL/PostgreSQL sleep
56
+ r"PG_SLEEP\s*\(", # PostgreSQL specific
57
+ ]
58
+
59
+
60
+ def validate_uuid(value: str) -> str:
61
+ """Validate and return a UUID string.
62
+
63
+ Args:
64
+ value: The value to validate as a UUID.
65
+
66
+ Returns:
67
+ The validated UUID string.
68
+
69
+ Raises:
70
+ ValidationError: If the value is not a valid UUID format.
71
+
72
+ Examples:
73
+ >>> validate_uuid("550e8400-e29b-41d4-a716-446655440000")
74
+ '550e8400-e29b-41d4-a716-446655440000'
75
+ >>> validate_uuid("not-a-uuid")
76
+ Traceback (most recent call last):
77
+ ...
78
+ ValidationError: Invalid UUID format: not-a-uuid
79
+ """
80
+ try:
81
+ # Attempt to parse as UUID to validate format
82
+ uuid.UUID(value)
83
+ return value
84
+ except (ValueError, AttributeError) as e:
85
+ raise ValidationError(f"Invalid UUID format: {value}") from e
86
+
87
+
88
+ def validate_namespace(namespace: str) -> str:
89
+ """Validate namespace format.
90
+
91
+ Namespaces must:
92
+ - Start with a letter, number, or underscore
93
+ - Contain only letters, numbers, dash, underscore, or dot
94
+ - Be between 1-256 characters
95
+ - Not be empty
96
+
97
+ Args:
98
+ namespace: The namespace to validate.
99
+
100
+ Returns:
101
+ The validated namespace string.
102
+
103
+ Raises:
104
+ ValidationError: If the namespace is invalid.
105
+
106
+ Examples:
107
+ >>> validate_namespace("default")
108
+ 'default'
109
+ >>> validate_namespace("my-namespace_v1.0")
110
+ 'my-namespace_v1.0'
111
+ >>> validate_namespace("123numeric")
112
+ '123numeric'
113
+ >>> validate_namespace("")
114
+ Traceback (most recent call last):
115
+ ...
116
+ ValidationError: Namespace cannot be empty
117
+ """
118
+ if not namespace:
119
+ raise ValidationError("Namespace cannot be empty")
120
+
121
+ if len(namespace) > 256:
122
+ raise ValidationError("Namespace too long (max 256 characters)")
123
+
124
+ # Allow alphanumeric, dash, underscore, dot
125
+ if not re.match(r"^[\w\-\.]+$", namespace):
126
+ raise ValidationError(f"Invalid namespace format: {namespace}")
127
+
128
+ return namespace
129
+
130
+
131
+ def validate_content(content: str) -> None:
132
+ """Validate memory content.
133
+
134
+ Content must:
135
+ - Not be empty or whitespace-only
136
+ - Not exceed MAX_CONTENT_LENGTH characters
137
+
138
+ Security Note:
139
+ Content is NOT validated for SQL injection patterns because:
140
+ 1. All database operations use parameterized queries (LanceDB's PyArrow-based API)
141
+ 2. Content is never interpolated into SQL strings
142
+ 3. LanceDB filter expressions use a separate DSL with proper escaping
143
+
144
+ This approach follows the principle of defense-in-depth: input validation
145
+ catches obvious issues, but the primary protection is parameterized queries.
146
+
147
+ Args:
148
+ content: Content to validate.
149
+
150
+ Raises:
151
+ ValidationError: If content is empty, whitespace-only, or too long.
152
+
153
+ Examples:
154
+ >>> validate_content("This is valid content")
155
+ >>> validate_content("")
156
+ Traceback (most recent call last):
157
+ ...
158
+ ValidationError: Content cannot be empty
159
+ >>> validate_content("x" * 100001)
160
+ Traceback (most recent call last):
161
+ ...
162
+ ValidationError: Content exceeds maximum length...
163
+ """
164
+ if not content or not content.strip():
165
+ raise ValidationError("Content cannot be empty")
166
+
167
+ if len(content) > MAX_CONTENT_LENGTH:
168
+ raise ValidationError(
169
+ f"Content exceeds maximum length of {MAX_CONTENT_LENGTH} characters "
170
+ f"(got {len(content)} characters)"
171
+ )
172
+
173
+
174
+ def validate_importance(importance: float) -> None:
175
+ """Validate importance value (0.0-1.0).
176
+
177
+ Args:
178
+ importance: Importance to validate.
179
+
180
+ Raises:
181
+ ValidationError: If importance is out of range.
182
+
183
+ Examples:
184
+ >>> validate_importance(0.5)
185
+ >>> validate_importance(1.5)
186
+ Traceback (most recent call last):
187
+ ...
188
+ ValidationError: Importance must be between 0.0 and 1.0
189
+ """
190
+ if not 0.0 <= importance <= 1.0:
191
+ raise ValidationError("Importance must be between 0.0 and 1.0")
192
+
193
+
194
+ def validate_tags(tags: list[str] | None) -> list[str]:
195
+ """Validate and return tags list.
196
+
197
+ Tags must:
198
+ - Start with a letter or number
199
+ - Contain only letters, numbers, dash, or underscore
200
+ - Be between 1-50 characters each
201
+ - Have at most MAX_TAGS total tags
202
+
203
+ Args:
204
+ tags: List of tags to validate (None is treated as empty list).
205
+
206
+ Returns:
207
+ Validated tags list (empty list if None was provided).
208
+
209
+ Raises:
210
+ ValidationError: If tags are invalid.
211
+
212
+ Examples:
213
+ >>> validate_tags(["tag1", "tag2"])
214
+ ['tag1', 'tag2']
215
+ >>> validate_tags(None)
216
+ []
217
+ >>> validate_tags(["invalid tag"])
218
+ Traceback (most recent call last):
219
+ ...
220
+ ValidationError: Invalid tag format...
221
+ """
222
+ if tags is None:
223
+ return []
224
+
225
+ if len(tags) > MAX_TAGS:
226
+ raise ValidationError(f"Maximum {MAX_TAGS} tags allowed, got {len(tags)}")
227
+
228
+ validated = []
229
+ for tag in tags:
230
+ # Must be a string
231
+ if not isinstance(tag, str):
232
+ raise ValidationError(f"Tag must be a string, got {type(tag).__name__}")
233
+
234
+ # Must match pattern: start with letter/number, alphanumeric with dash/underscore/dot
235
+ if not TAG_PATTERN.match(tag):
236
+ raise ValidationError(
237
+ f"Invalid tag format: '{tag}'. Tags must be 1-{MAX_TAG_LENGTH} characters, "
238
+ "start with letter or number, and contain only letters, numbers, dash, "
239
+ "underscore, or dot."
240
+ )
241
+
242
+ validated.append(tag)
243
+
244
+ return validated
245
+
246
+
247
+ # Metadata validation constants
248
+ MAX_METADATA_DEPTH = 10 # Maximum nesting depth for metadata
249
+ MAX_METADATA_KEY_LENGTH = 128 # Maximum length for metadata keys
250
+
251
+
252
+ def validate_metadata(
253
+ metadata: dict[str, Any] | None,
254
+ max_depth: int | None = None,
255
+ validate_keys: bool = True,
256
+ ) -> dict[str, Any]:
257
+ """Validate and return metadata dict.
258
+
259
+ Metadata must:
260
+ - Be a dictionary
261
+ - Be JSON-serializable
262
+ - Not exceed MAX_METADATA_SIZE bytes when serialized
263
+ - Not exceed max_depth nesting levels (if specified)
264
+ - Have keys that are valid identifiers (if validate_keys=True)
265
+
266
+ Args:
267
+ metadata: Metadata dictionary to validate (None is treated as empty dict).
268
+ max_depth: Maximum nesting depth (default: MAX_METADATA_DEPTH).
269
+ Set to None to disable depth checking.
270
+ validate_keys: Whether to validate key format (default: True).
271
+
272
+ Returns:
273
+ Validated metadata dictionary (empty dict if None was provided).
274
+
275
+ Raises:
276
+ ValidationError: If metadata is invalid.
277
+
278
+ Examples:
279
+ >>> validate_metadata({"key": "value"})
280
+ {'key': 'value'}
281
+ >>> validate_metadata(None)
282
+ {}
283
+ >>> validate_metadata("not a dict")
284
+ Traceback (most recent call last):
285
+ ...
286
+ ValidationError: Metadata must be a dictionary...
287
+ """
288
+ if metadata is None:
289
+ return {}
290
+
291
+ if not isinstance(metadata, dict):
292
+ raise ValidationError(f"Metadata must be a dictionary, got {type(metadata).__name__}")
293
+
294
+ # Check nesting depth and key format
295
+ effective_max_depth = max_depth if max_depth is not None else MAX_METADATA_DEPTH
296
+ _validate_metadata_structure(metadata, effective_max_depth, validate_keys, current_depth=0)
297
+
298
+ # Check serialized size (max 64KB)
299
+ try:
300
+ serialized = json.dumps(metadata)
301
+ if len(serialized) > MAX_METADATA_SIZE:
302
+ raise ValidationError(
303
+ f"Metadata exceeds 64KB limit ({len(serialized)} bytes)"
304
+ )
305
+ except (TypeError, ValueError) as e:
306
+ raise ValidationError(f"Metadata must be JSON-serializable: {e}") from e
307
+
308
+ return metadata
309
+
310
+
311
+ def _validate_metadata_structure(
312
+ value: Any,
313
+ max_depth: int,
314
+ validate_keys: bool,
315
+ current_depth: int,
316
+ path: str = "",
317
+ ) -> None:
318
+ """Recursively validate metadata structure.
319
+
320
+ Args:
321
+ value: The value to validate.
322
+ max_depth: Maximum allowed nesting depth.
323
+ validate_keys: Whether to validate dictionary key format.
324
+ current_depth: Current nesting level.
325
+ path: Dot-separated path for error messages.
326
+ """
327
+ if current_depth > max_depth:
328
+ raise ValidationError(
329
+ f"Metadata exceeds maximum nesting depth of {max_depth}"
330
+ + (f" at '{path}'" if path else "")
331
+ )
332
+
333
+ if isinstance(value, dict):
334
+ for key, val in value.items():
335
+ # Validate key format
336
+ if validate_keys:
337
+ if not isinstance(key, str):
338
+ raise ValidationError(
339
+ f"Metadata keys must be strings, got {type(key).__name__}"
340
+ + (f" at '{path}'" if path else "")
341
+ )
342
+ if len(key) > MAX_METADATA_KEY_LENGTH:
343
+ raise ValidationError(
344
+ f"Metadata key '{key[:50]}...' exceeds maximum length of "
345
+ f"{MAX_METADATA_KEY_LENGTH} characters"
346
+ )
347
+ if not key:
348
+ raise ValidationError(
349
+ "Metadata keys cannot be empty"
350
+ + (f" at '{path}'" if path else "")
351
+ )
352
+
353
+ # Recurse into nested dicts/lists
354
+ new_path = f"{path}.{key}" if path else key
355
+ _validate_metadata_structure(
356
+ val, max_depth, validate_keys, current_depth + 1, new_path
357
+ )
358
+ elif isinstance(value, list):
359
+ for i, item in enumerate(value):
360
+ new_path = f"{path}[{i}]" if path else f"[{i}]"
361
+ _validate_metadata_structure(
362
+ item, max_depth, validate_keys, current_depth + 1, new_path
363
+ )
364
+
365
+
366
+ def sanitize_string(value: str) -> str:
367
+ """Sanitize string for safe SQL usage.
368
+
369
+ Prevents SQL injection by:
370
+ 1. Validating input type
371
+ 2. Detecting dangerous SQL patterns
372
+ 3. Escaping single quotes
373
+
374
+ Args:
375
+ value: The string value to sanitize.
376
+
377
+ Returns:
378
+ Sanitized string safe for use in filter expressions.
379
+
380
+ Raises:
381
+ ValidationError: If the value contains invalid characters or SQL injection patterns.
382
+
383
+ Examples:
384
+ >>> sanitize_string("hello")
385
+ 'hello'
386
+ >>> sanitize_string("it's")
387
+ "it''s"
388
+ >>> sanitize_string("'; DROP TABLE users--")
389
+ Traceback (most recent call last):
390
+ ...
391
+ ValidationError: Invalid characters in value...
392
+ """
393
+ if not isinstance(value, str):
394
+ raise ValidationError(f"Expected string, got {type(value).__name__}")
395
+
396
+ # Check for dangerous SQL injection patterns
397
+ for pattern in DANGEROUS_PATTERNS:
398
+ if re.search(pattern, value, re.IGNORECASE):
399
+ # Only show first 50 chars in error to prevent log flooding
400
+ raise ValidationError(f"Invalid characters in value: {value[:50]}")
401
+
402
+ # Escape single quotes by doubling them (standard SQL escaping)
403
+ return value.replace("'", "''")