spatial-memory-mcp 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spatial-memory-mcp might be problematic. Click here for more details.
- spatial_memory/__init__.py +1 -1
- spatial_memory/__main__.py +241 -2
- spatial_memory/adapters/lancedb_repository.py +74 -5
- spatial_memory/config.py +10 -2
- spatial_memory/core/__init__.py +9 -0
- spatial_memory/core/connection_pool.py +41 -3
- spatial_memory/core/consolidation_strategies.py +402 -0
- spatial_memory/core/database.py +774 -918
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +575 -0
- spatial_memory/core/db_migrations.py +584 -0
- spatial_memory/core/db_search.py +509 -0
- spatial_memory/core/db_versioning.py +177 -0
- spatial_memory/core/embeddings.py +65 -18
- spatial_memory/core/errors.py +75 -3
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/models.py +4 -0
- spatial_memory/core/rate_limiter.py +26 -9
- spatial_memory/core/response_types.py +497 -0
- spatial_memory/core/validation.py +86 -2
- spatial_memory/factory.py +407 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/repositories.py +52 -2
- spatial_memory/server.py +131 -189
- spatial_memory/services/export_import.py +61 -43
- spatial_memory/services/lifecycle.py +397 -122
- spatial_memory/services/memory.py +2 -2
- spatial_memory/services/spatial.py +129 -46
- {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/METADATA +83 -3
- spatial_memory_mcp-1.6.0.dist-info/RECORD +54 -0
- spatial_memory_mcp-1.5.3.dist-info/RECORD +0 -44
- {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/WHEEL +0 -0
- {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/entry_points.txt +0 -0
- {spatial_memory_mcp-1.5.3.dist-info → spatial_memory_mcp-1.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
"""TypedDict response types for MCP handler responses.
|
|
2
|
+
|
|
3
|
+
This module provides compile-time type checking for all 22 handler responses
|
|
4
|
+
in the Spatial Memory MCP server. Using TypedDicts enables mypy to catch
|
|
5
|
+
type mismatches in handler implementations.
|
|
6
|
+
|
|
7
|
+
Usage in server.py:
|
|
8
|
+
def _handle_recall(self, arguments: dict[str, Any]) -> RecallResponse:
|
|
9
|
+
...
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, TypedDict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# =============================================================================
|
|
18
|
+
# Nested TypedDicts (shared across multiple responses)
|
|
19
|
+
# =============================================================================
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MemoryResultDict(TypedDict):
|
|
23
|
+
"""Memory with similarity score from search operations."""
|
|
24
|
+
|
|
25
|
+
id: str
|
|
26
|
+
content: str
|
|
27
|
+
similarity: float
|
|
28
|
+
namespace: str
|
|
29
|
+
tags: list[str]
|
|
30
|
+
importance: float
|
|
31
|
+
created_at: str # ISO 8601 format
|
|
32
|
+
metadata: dict[str, Any]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class MemoryReferenceDict(TypedDict):
|
|
36
|
+
"""Minimal memory reference for nearby operations."""
|
|
37
|
+
|
|
38
|
+
id: str
|
|
39
|
+
content: str
|
|
40
|
+
namespace: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class NeighborDict(TypedDict):
|
|
44
|
+
"""Neighbor memory with similarity for nearby operations."""
|
|
45
|
+
|
|
46
|
+
id: str
|
|
47
|
+
content: str
|
|
48
|
+
similarity: float
|
|
49
|
+
namespace: str
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class JourneyMemoryDict(TypedDict):
|
|
53
|
+
"""Memory found along a journey path."""
|
|
54
|
+
|
|
55
|
+
id: str
|
|
56
|
+
content: str
|
|
57
|
+
similarity: float
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class JourneyStepDict(TypedDict):
|
|
61
|
+
"""A step along the journey path."""
|
|
62
|
+
|
|
63
|
+
step: int
|
|
64
|
+
t: float
|
|
65
|
+
nearby_memories: list[JourneyMemoryDict]
|
|
66
|
+
distance_to_path: float
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class WanderMemoryDict(TypedDict):
|
|
70
|
+
"""Memory at a wander step."""
|
|
71
|
+
|
|
72
|
+
id: str
|
|
73
|
+
content: str
|
|
74
|
+
namespace: str
|
|
75
|
+
tags: list[str]
|
|
76
|
+
similarity: float
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class WanderStepDict(TypedDict):
|
|
80
|
+
"""A step in a random walk."""
|
|
81
|
+
|
|
82
|
+
step: int
|
|
83
|
+
memory: WanderMemoryDict
|
|
84
|
+
similarity_to_previous: float
|
|
85
|
+
selection_probability: float
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class RepresentativeMemoryDict(TypedDict):
|
|
89
|
+
"""Representative memory for a cluster."""
|
|
90
|
+
|
|
91
|
+
id: str
|
|
92
|
+
content: str
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class SampleMemoryDict(TypedDict):
|
|
96
|
+
"""Sample memory from a cluster."""
|
|
97
|
+
|
|
98
|
+
id: str
|
|
99
|
+
content: str
|
|
100
|
+
similarity: float
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class ClusterDict(TypedDict):
|
|
104
|
+
"""A discovered cluster in regions analysis."""
|
|
105
|
+
|
|
106
|
+
cluster_id: int
|
|
107
|
+
size: int
|
|
108
|
+
keywords: list[str]
|
|
109
|
+
representative_memory: RepresentativeMemoryDict
|
|
110
|
+
sample_memories: list[SampleMemoryDict]
|
|
111
|
+
coherence: float
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class VisualizationNodeDict(TypedDict):
|
|
115
|
+
"""A node in the visualization."""
|
|
116
|
+
|
|
117
|
+
id: str
|
|
118
|
+
x: float
|
|
119
|
+
y: float
|
|
120
|
+
label: str
|
|
121
|
+
cluster: int
|
|
122
|
+
importance: float
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class VisualizationEdgeDict(TypedDict):
|
|
126
|
+
"""An edge in the visualization."""
|
|
127
|
+
|
|
128
|
+
from_id: str
|
|
129
|
+
to_id: str
|
|
130
|
+
weight: float
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class DecayedMemoryDict(TypedDict):
|
|
134
|
+
"""A memory with calculated decay."""
|
|
135
|
+
|
|
136
|
+
id: str
|
|
137
|
+
content_preview: str
|
|
138
|
+
old_importance: float
|
|
139
|
+
new_importance: float
|
|
140
|
+
decay_factor: float
|
|
141
|
+
days_since_access: int
|
|
142
|
+
access_count: int
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class ReinforcedMemoryDict(TypedDict):
|
|
146
|
+
"""A memory that was reinforced."""
|
|
147
|
+
|
|
148
|
+
id: str
|
|
149
|
+
content_preview: str
|
|
150
|
+
old_importance: float
|
|
151
|
+
new_importance: float
|
|
152
|
+
boost_applied: float
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class ExtractionDict(TypedDict):
|
|
156
|
+
"""An extracted memory from text."""
|
|
157
|
+
|
|
158
|
+
content: str
|
|
159
|
+
confidence: float
|
|
160
|
+
pattern_matched: str
|
|
161
|
+
start_pos: int
|
|
162
|
+
end_pos: int
|
|
163
|
+
stored: bool
|
|
164
|
+
memory_id: str | None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ConsolidationGroupDict(TypedDict):
|
|
168
|
+
"""A group of similar memories for consolidation."""
|
|
169
|
+
|
|
170
|
+
representative_id: str
|
|
171
|
+
member_ids: list[str]
|
|
172
|
+
avg_similarity: float
|
|
173
|
+
action_taken: str
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class IndexInfoDict(TypedDict):
|
|
177
|
+
"""Information about a database index."""
|
|
178
|
+
|
|
179
|
+
name: str
|
|
180
|
+
index_type: str
|
|
181
|
+
column: str
|
|
182
|
+
num_indexed_rows: int
|
|
183
|
+
status: str
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class NamespaceInfoDict(TypedDict):
|
|
187
|
+
"""Information about a namespace."""
|
|
188
|
+
|
|
189
|
+
name: str
|
|
190
|
+
memory_count: int
|
|
191
|
+
oldest_memory: str | None # ISO 8601 format
|
|
192
|
+
newest_memory: str | None # ISO 8601 format
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class HealthCheckDict(TypedDict):
|
|
196
|
+
"""A single health check result."""
|
|
197
|
+
|
|
198
|
+
name: str
|
|
199
|
+
status: str
|
|
200
|
+
message: str | None
|
|
201
|
+
latency_ms: float | None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class ImportValidationErrorDict(TypedDict):
|
|
205
|
+
"""A validation error during import."""
|
|
206
|
+
|
|
207
|
+
row_number: int
|
|
208
|
+
field: str
|
|
209
|
+
error: str
|
|
210
|
+
value: str | None
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class ImportedMemoryDict(TypedDict):
|
|
214
|
+
"""Information about an imported memory."""
|
|
215
|
+
|
|
216
|
+
id: str
|
|
217
|
+
content_preview: str
|
|
218
|
+
namespace: str
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class HybridMemoryDict(TypedDict):
|
|
222
|
+
"""A memory matched by hybrid search."""
|
|
223
|
+
|
|
224
|
+
id: str
|
|
225
|
+
content: str
|
|
226
|
+
similarity: float
|
|
227
|
+
namespace: str
|
|
228
|
+
tags: list[str]
|
|
229
|
+
importance: float
|
|
230
|
+
created_at: str | None # ISO 8601 format
|
|
231
|
+
metadata: dict[str, Any]
|
|
232
|
+
vector_score: float | None
|
|
233
|
+
fts_score: float | None
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# =============================================================================
|
|
237
|
+
# Handler Response TypedDicts (22 total)
|
|
238
|
+
# =============================================================================
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class RememberResponse(TypedDict):
|
|
242
|
+
"""Response for remember handler."""
|
|
243
|
+
|
|
244
|
+
id: str
|
|
245
|
+
content: str
|
|
246
|
+
namespace: str
|
|
247
|
+
deduplicated: bool
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class RememberBatchResponse(TypedDict):
|
|
251
|
+
"""Response for remember_batch handler."""
|
|
252
|
+
|
|
253
|
+
ids: list[str]
|
|
254
|
+
count: int
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class RecallResponse(TypedDict):
|
|
258
|
+
"""Response for recall handler."""
|
|
259
|
+
|
|
260
|
+
memories: list[MemoryResultDict]
|
|
261
|
+
total: int
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class NearbyResponse(TypedDict):
|
|
265
|
+
"""Response for nearby handler."""
|
|
266
|
+
|
|
267
|
+
reference: MemoryReferenceDict
|
|
268
|
+
neighbors: list[NeighborDict]
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class ForgetResponse(TypedDict):
|
|
272
|
+
"""Response for forget handler."""
|
|
273
|
+
|
|
274
|
+
deleted: int
|
|
275
|
+
ids: list[str]
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class ForgetBatchResponse(TypedDict):
|
|
279
|
+
"""Response for forget_batch handler."""
|
|
280
|
+
|
|
281
|
+
deleted: int
|
|
282
|
+
ids: list[str]
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class HealthResponse(TypedDict, total=False):
|
|
286
|
+
"""Response for health handler.
|
|
287
|
+
|
|
288
|
+
Uses total=False for optional 'checks' field.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
version: str
|
|
292
|
+
status: str
|
|
293
|
+
timestamp: str # ISO 8601 format
|
|
294
|
+
ready: bool
|
|
295
|
+
alive: bool
|
|
296
|
+
checks: list[HealthCheckDict] # Optional, only with verbose=True
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class JourneyResponse(TypedDict):
|
|
300
|
+
"""Response for journey handler."""
|
|
301
|
+
|
|
302
|
+
start_id: str
|
|
303
|
+
end_id: str
|
|
304
|
+
steps: list[JourneyStepDict]
|
|
305
|
+
path_coverage: float
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class WanderResponse(TypedDict):
|
|
309
|
+
"""Response for wander handler."""
|
|
310
|
+
|
|
311
|
+
start_id: str
|
|
312
|
+
steps: list[WanderStepDict]
|
|
313
|
+
total_distance: float
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class RegionsResponse(TypedDict):
|
|
317
|
+
"""Response for regions handler."""
|
|
318
|
+
|
|
319
|
+
clusters: list[ClusterDict]
|
|
320
|
+
total_memories: int
|
|
321
|
+
noise_count: int
|
|
322
|
+
clustering_quality: float
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class VisualizeJsonResponse(TypedDict):
|
|
326
|
+
"""Response for visualize handler with JSON format."""
|
|
327
|
+
|
|
328
|
+
nodes: list[VisualizationNodeDict]
|
|
329
|
+
edges: list[VisualizationEdgeDict]
|
|
330
|
+
bounds: dict[str, float]
|
|
331
|
+
format: str
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class VisualizeTextResponse(TypedDict):
|
|
335
|
+
"""Response for visualize handler with mermaid/svg format."""
|
|
336
|
+
|
|
337
|
+
format: str
|
|
338
|
+
output: str
|
|
339
|
+
node_count: int
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# Union type for visualize response
|
|
343
|
+
VisualizeResponse = VisualizeJsonResponse | VisualizeTextResponse
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class DecayResponse(TypedDict):
|
|
347
|
+
"""Response for decay handler."""
|
|
348
|
+
|
|
349
|
+
memories_analyzed: int
|
|
350
|
+
memories_decayed: int
|
|
351
|
+
avg_decay_factor: float
|
|
352
|
+
decayed_memories: list[DecayedMemoryDict]
|
|
353
|
+
dry_run: bool
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
class ReinforceResponse(TypedDict):
|
|
357
|
+
"""Response for reinforce handler."""
|
|
358
|
+
|
|
359
|
+
memories_reinforced: int
|
|
360
|
+
avg_boost: float
|
|
361
|
+
reinforced: list[ReinforcedMemoryDict]
|
|
362
|
+
not_found: list[str]
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class ExtractResponse(TypedDict):
|
|
366
|
+
"""Response for extract handler."""
|
|
367
|
+
|
|
368
|
+
candidates_found: int
|
|
369
|
+
memories_created: int
|
|
370
|
+
deduplicated_count: int
|
|
371
|
+
extractions: list[ExtractionDict]
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class ConsolidateResponse(TypedDict):
|
|
375
|
+
"""Response for consolidate handler."""
|
|
376
|
+
|
|
377
|
+
groups_found: int
|
|
378
|
+
memories_merged: int
|
|
379
|
+
memories_deleted: int
|
|
380
|
+
groups: list[ConsolidationGroupDict]
|
|
381
|
+
dry_run: bool
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class StatsResponse(TypedDict):
|
|
385
|
+
"""Response for stats handler."""
|
|
386
|
+
|
|
387
|
+
total_memories: int
|
|
388
|
+
memories_by_namespace: dict[str, int]
|
|
389
|
+
storage_bytes: int
|
|
390
|
+
storage_mb: float
|
|
391
|
+
estimated_vector_bytes: int
|
|
392
|
+
has_vector_index: bool
|
|
393
|
+
has_fts_index: bool
|
|
394
|
+
indices: list[IndexInfoDict]
|
|
395
|
+
num_fragments: int
|
|
396
|
+
needs_compaction: bool
|
|
397
|
+
table_version: int
|
|
398
|
+
oldest_memory_date: str | None # ISO 8601 format
|
|
399
|
+
newest_memory_date: str | None # ISO 8601 format
|
|
400
|
+
avg_content_length: float | None
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class NamespacesResponse(TypedDict):
|
|
404
|
+
"""Response for namespaces handler."""
|
|
405
|
+
|
|
406
|
+
namespaces: list[NamespaceInfoDict]
|
|
407
|
+
total_namespaces: int
|
|
408
|
+
total_memories: int
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class DeleteNamespaceResponse(TypedDict):
|
|
412
|
+
"""Response for delete_namespace handler."""
|
|
413
|
+
|
|
414
|
+
namespace: str
|
|
415
|
+
memories_deleted: int
|
|
416
|
+
success: bool
|
|
417
|
+
message: str
|
|
418
|
+
dry_run: bool
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
class RenameNamespaceResponse(TypedDict):
|
|
422
|
+
"""Response for rename_namespace handler."""
|
|
423
|
+
|
|
424
|
+
old_namespace: str
|
|
425
|
+
new_namespace: str
|
|
426
|
+
memories_renamed: int
|
|
427
|
+
success: bool
|
|
428
|
+
message: str
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class ExportResponse(TypedDict):
|
|
432
|
+
"""Response for export_memories handler."""
|
|
433
|
+
|
|
434
|
+
format: str
|
|
435
|
+
output_path: str
|
|
436
|
+
memories_exported: int
|
|
437
|
+
file_size_bytes: int
|
|
438
|
+
file_size_mb: float
|
|
439
|
+
namespaces_included: list[str]
|
|
440
|
+
duration_seconds: float
|
|
441
|
+
compression: str | None
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class ImportResponse(TypedDict):
|
|
445
|
+
"""Response for import_memories handler."""
|
|
446
|
+
|
|
447
|
+
source_path: str
|
|
448
|
+
format: str
|
|
449
|
+
total_records_in_file: int
|
|
450
|
+
memories_imported: int
|
|
451
|
+
memories_skipped: int
|
|
452
|
+
memories_failed: int
|
|
453
|
+
validation_errors: list[ImportValidationErrorDict]
|
|
454
|
+
namespace_override: str | None
|
|
455
|
+
duration_seconds: float
|
|
456
|
+
dry_run: bool
|
|
457
|
+
imported_memories: list[ImportedMemoryDict]
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class HybridRecallResponse(TypedDict):
|
|
461
|
+
"""Response for hybrid_recall handler."""
|
|
462
|
+
|
|
463
|
+
query: str
|
|
464
|
+
alpha: float
|
|
465
|
+
memories: list[HybridMemoryDict]
|
|
466
|
+
total: int
|
|
467
|
+
search_type: str
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# =============================================================================
|
|
471
|
+
# Type alias for any handler response
|
|
472
|
+
# =============================================================================
|
|
473
|
+
|
|
474
|
+
HandlerResponse = (
|
|
475
|
+
RememberResponse
|
|
476
|
+
| RememberBatchResponse
|
|
477
|
+
| RecallResponse
|
|
478
|
+
| NearbyResponse
|
|
479
|
+
| ForgetResponse
|
|
480
|
+
| ForgetBatchResponse
|
|
481
|
+
| HealthResponse
|
|
482
|
+
| JourneyResponse
|
|
483
|
+
| WanderResponse
|
|
484
|
+
| RegionsResponse
|
|
485
|
+
| VisualizeResponse
|
|
486
|
+
| DecayResponse
|
|
487
|
+
| ReinforceResponse
|
|
488
|
+
| ExtractResponse
|
|
489
|
+
| ConsolidateResponse
|
|
490
|
+
| StatsResponse
|
|
491
|
+
| NamespacesResponse
|
|
492
|
+
| DeleteNamespaceResponse
|
|
493
|
+
| RenameNamespaceResponse
|
|
494
|
+
| ExportResponse
|
|
495
|
+
| ImportResponse
|
|
496
|
+
| HybridRecallResponse
|
|
497
|
+
)
|
|
@@ -89,7 +89,7 @@ def validate_namespace(namespace: str) -> str:
|
|
|
89
89
|
"""Validate namespace format.
|
|
90
90
|
|
|
91
91
|
Namespaces must:
|
|
92
|
-
- Start with a letter
|
|
92
|
+
- Start with a letter, number, or underscore
|
|
93
93
|
- Contain only letters, numbers, dash, underscore, or dot
|
|
94
94
|
- Be between 1-256 characters
|
|
95
95
|
- Not be empty
|
|
@@ -108,6 +108,8 @@ def validate_namespace(namespace: str) -> str:
|
|
|
108
108
|
'default'
|
|
109
109
|
>>> validate_namespace("my-namespace_v1.0")
|
|
110
110
|
'my-namespace_v1.0'
|
|
111
|
+
>>> validate_namespace("123numeric")
|
|
112
|
+
'123numeric'
|
|
111
113
|
>>> validate_namespace("")
|
|
112
114
|
Traceback (most recent call last):
|
|
113
115
|
...
|
|
@@ -133,6 +135,15 @@ def validate_content(content: str) -> None:
|
|
|
133
135
|
- Not be empty or whitespace-only
|
|
134
136
|
- Not exceed MAX_CONTENT_LENGTH characters
|
|
135
137
|
|
|
138
|
+
Security Note:
|
|
139
|
+
Content is NOT validated for SQL injection patterns because:
|
|
140
|
+
1. All database operations use parameterized queries (LanceDB's PyArrow-based API)
|
|
141
|
+
2. Content is never interpolated into SQL strings
|
|
142
|
+
3. LanceDB filter expressions use a separate DSL with proper escaping
|
|
143
|
+
|
|
144
|
+
This approach follows the principle of defense-in-depth: input validation
|
|
145
|
+
catches obvious issues, but the primary protection is parameterized queries.
|
|
146
|
+
|
|
136
147
|
Args:
|
|
137
148
|
content: Content to validate.
|
|
138
149
|
|
|
@@ -233,16 +244,30 @@ def validate_tags(tags: list[str] | None) -> list[str]:
|
|
|
233
244
|
return validated
|
|
234
245
|
|
|
235
246
|
|
|
236
|
-
|
|
247
|
+
# Metadata validation constants
|
|
248
|
+
MAX_METADATA_DEPTH = 10 # Maximum nesting depth for metadata
|
|
249
|
+
MAX_METADATA_KEY_LENGTH = 128 # Maximum length for metadata keys
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def validate_metadata(
|
|
253
|
+
metadata: dict[str, Any] | None,
|
|
254
|
+
max_depth: int | None = None,
|
|
255
|
+
validate_keys: bool = True,
|
|
256
|
+
) -> dict[str, Any]:
|
|
237
257
|
"""Validate and return metadata dict.
|
|
238
258
|
|
|
239
259
|
Metadata must:
|
|
240
260
|
- Be a dictionary
|
|
241
261
|
- Be JSON-serializable
|
|
242
262
|
- Not exceed MAX_METADATA_SIZE bytes when serialized
|
|
263
|
+
- Not exceed max_depth nesting levels (if specified)
|
|
264
|
+
- Have keys that are valid identifiers (if validate_keys=True)
|
|
243
265
|
|
|
244
266
|
Args:
|
|
245
267
|
metadata: Metadata dictionary to validate (None is treated as empty dict).
|
|
268
|
+
max_depth: Maximum nesting depth (default: MAX_METADATA_DEPTH).
|
|
269
|
+
Set to None to disable depth checking.
|
|
270
|
+
validate_keys: Whether to validate key format (default: True).
|
|
246
271
|
|
|
247
272
|
Returns:
|
|
248
273
|
Validated metadata dictionary (empty dict if None was provided).
|
|
@@ -266,6 +291,10 @@ def validate_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
|
|
|
266
291
|
if not isinstance(metadata, dict):
|
|
267
292
|
raise ValidationError(f"Metadata must be a dictionary, got {type(metadata).__name__}")
|
|
268
293
|
|
|
294
|
+
# Check nesting depth and key format
|
|
295
|
+
effective_max_depth = max_depth if max_depth is not None else MAX_METADATA_DEPTH
|
|
296
|
+
_validate_metadata_structure(metadata, effective_max_depth, validate_keys, current_depth=0)
|
|
297
|
+
|
|
269
298
|
# Check serialized size (max 64KB)
|
|
270
299
|
try:
|
|
271
300
|
serialized = json.dumps(metadata)
|
|
@@ -279,6 +308,61 @@ def validate_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
|
|
|
279
308
|
return metadata
|
|
280
309
|
|
|
281
310
|
|
|
311
|
+
def _validate_metadata_structure(
|
|
312
|
+
value: Any,
|
|
313
|
+
max_depth: int,
|
|
314
|
+
validate_keys: bool,
|
|
315
|
+
current_depth: int,
|
|
316
|
+
path: str = "",
|
|
317
|
+
) -> None:
|
|
318
|
+
"""Recursively validate metadata structure.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
value: The value to validate.
|
|
322
|
+
max_depth: Maximum allowed nesting depth.
|
|
323
|
+
validate_keys: Whether to validate dictionary key format.
|
|
324
|
+
current_depth: Current nesting level.
|
|
325
|
+
path: Dot-separated path for error messages.
|
|
326
|
+
"""
|
|
327
|
+
if current_depth > max_depth:
|
|
328
|
+
raise ValidationError(
|
|
329
|
+
f"Metadata exceeds maximum nesting depth of {max_depth}"
|
|
330
|
+
+ (f" at '{path}'" if path else "")
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
if isinstance(value, dict):
|
|
334
|
+
for key, val in value.items():
|
|
335
|
+
# Validate key format
|
|
336
|
+
if validate_keys:
|
|
337
|
+
if not isinstance(key, str):
|
|
338
|
+
raise ValidationError(
|
|
339
|
+
f"Metadata keys must be strings, got {type(key).__name__}"
|
|
340
|
+
+ (f" at '{path}'" if path else "")
|
|
341
|
+
)
|
|
342
|
+
if len(key) > MAX_METADATA_KEY_LENGTH:
|
|
343
|
+
raise ValidationError(
|
|
344
|
+
f"Metadata key '{key[:50]}...' exceeds maximum length of "
|
|
345
|
+
f"{MAX_METADATA_KEY_LENGTH} characters"
|
|
346
|
+
)
|
|
347
|
+
if not key:
|
|
348
|
+
raise ValidationError(
|
|
349
|
+
"Metadata keys cannot be empty"
|
|
350
|
+
+ (f" at '{path}'" if path else "")
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Recurse into nested dicts/lists
|
|
354
|
+
new_path = f"{path}.{key}" if path else key
|
|
355
|
+
_validate_metadata_structure(
|
|
356
|
+
val, max_depth, validate_keys, current_depth + 1, new_path
|
|
357
|
+
)
|
|
358
|
+
elif isinstance(value, list):
|
|
359
|
+
for i, item in enumerate(value):
|
|
360
|
+
new_path = f"{path}[{i}]" if path else f"[{i}]"
|
|
361
|
+
_validate_metadata_structure(
|
|
362
|
+
item, max_depth, validate_keys, current_depth + 1, new_path
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
|
|
282
366
|
def sanitize_string(value: str) -> str:
|
|
283
367
|
"""Sanitize string for safe SQL usage.
|
|
284
368
|
|