spatial-memory-mcp 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spatial-memory-mcp might be problematic. Click here for more details.

Files changed (54) hide show
  1. spatial_memory/__init__.py +97 -0
  2. spatial_memory/__main__.py +270 -0
  3. spatial_memory/adapters/__init__.py +7 -0
  4. spatial_memory/adapters/lancedb_repository.py +878 -0
  5. spatial_memory/config.py +728 -0
  6. spatial_memory/core/__init__.py +118 -0
  7. spatial_memory/core/cache.py +317 -0
  8. spatial_memory/core/circuit_breaker.py +297 -0
  9. spatial_memory/core/connection_pool.py +220 -0
  10. spatial_memory/core/consolidation_strategies.py +402 -0
  11. spatial_memory/core/database.py +3069 -0
  12. spatial_memory/core/db_idempotency.py +242 -0
  13. spatial_memory/core/db_indexes.py +575 -0
  14. spatial_memory/core/db_migrations.py +584 -0
  15. spatial_memory/core/db_search.py +509 -0
  16. spatial_memory/core/db_versioning.py +177 -0
  17. spatial_memory/core/embeddings.py +557 -0
  18. spatial_memory/core/errors.py +317 -0
  19. spatial_memory/core/file_security.py +702 -0
  20. spatial_memory/core/filesystem.py +178 -0
  21. spatial_memory/core/health.py +289 -0
  22. spatial_memory/core/helpers.py +79 -0
  23. spatial_memory/core/import_security.py +432 -0
  24. spatial_memory/core/lifecycle_ops.py +1067 -0
  25. spatial_memory/core/logging.py +194 -0
  26. spatial_memory/core/metrics.py +192 -0
  27. spatial_memory/core/models.py +628 -0
  28. spatial_memory/core/rate_limiter.py +326 -0
  29. spatial_memory/core/response_types.py +497 -0
  30. spatial_memory/core/security.py +588 -0
  31. spatial_memory/core/spatial_ops.py +426 -0
  32. spatial_memory/core/tracing.py +300 -0
  33. spatial_memory/core/utils.py +110 -0
  34. spatial_memory/core/validation.py +403 -0
  35. spatial_memory/factory.py +407 -0
  36. spatial_memory/migrations/__init__.py +40 -0
  37. spatial_memory/ports/__init__.py +11 -0
  38. spatial_memory/ports/repositories.py +631 -0
  39. spatial_memory/py.typed +0 -0
  40. spatial_memory/server.py +1141 -0
  41. spatial_memory/services/__init__.py +70 -0
  42. spatial_memory/services/export_import.py +1023 -0
  43. spatial_memory/services/lifecycle.py +1120 -0
  44. spatial_memory/services/memory.py +412 -0
  45. spatial_memory/services/spatial.py +1147 -0
  46. spatial_memory/services/utility.py +409 -0
  47. spatial_memory/tools/__init__.py +5 -0
  48. spatial_memory/tools/definitions.py +695 -0
  49. spatial_memory/verify.py +140 -0
  50. spatial_memory_mcp-1.6.1.dist-info/METADATA +499 -0
  51. spatial_memory_mcp-1.6.1.dist-info/RECORD +54 -0
  52. spatial_memory_mcp-1.6.1.dist-info/WHEEL +4 -0
  53. spatial_memory_mcp-1.6.1.dist-info/entry_points.txt +2 -0
  54. spatial_memory_mcp-1.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,702 @@
1
+ """File security module for path validation and attack prevention.
2
+
3
+ This module provides security-critical path validation to prevent:
4
+ - Path traversal attacks (../, %2e%2e, etc.)
5
+ - Windows UNC path attacks
6
+ - Symlink-based escapes from allowed directories
7
+ - File size limit bypass
8
+ - Invalid file extension attacks
9
+
10
+ Security is implemented through defense-in-depth:
11
+ 1. Pattern-based detection of known attack vectors
12
+ 2. Path canonicalization to resolve symbolic elements
13
+ 3. Allowlist validation to restrict accessible directories
14
+ 4. Extension validation to limit file types
15
+ 5. Symlink resolution and validation
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import errno
21
+ import os
22
+ import re
23
+ import stat
24
+ import urllib.parse
25
+ from collections.abc import Sequence
26
+ from io import BufferedReader
27
+ from pathlib import Path
28
+ from typing import BinaryIO
29
+
30
+ from spatial_memory.core.errors import FileSizeLimitError, PathSecurityError
31
+
32
+ # =============================================================================
33
+ # Security Constants
34
+ # =============================================================================
35
+
36
+ # Regex patterns to detect path traversal attempts
37
+ # These patterns detect various encoding schemes used to bypass filters
38
+ PATH_TRAVERSAL_PATTERNS: list[re.Pattern[str]] = [
39
+ # Basic parent directory traversal
40
+ re.compile(r"\.\."),
41
+ # URL-encoded .. (%2e = '.')
42
+ re.compile(r"%2e%2e", re.IGNORECASE),
43
+ # Double URL-encoded .. (%252e = '%2e')
44
+ re.compile(r"%252e%252e", re.IGNORECASE),
45
+ # Windows UNC paths (\\server\share or \\?\)
46
+ re.compile(r"^\\\\"),
47
+ # Unix-style UNC paths (//server/share)
48
+ re.compile(r"^//"),
49
+ # Null byte injection (historic attack, blocked by modern OSes but still checked)
50
+ re.compile(r"%00|\x00"),
51
+ # Overlong UTF-8 encoding of '.' (CVE-2000-0884 style)
52
+ re.compile(r"%c0%ae|%c0%2e|%c1%9c", re.IGNORECASE),
53
+ ]
54
+
55
+ # Sensitive system directories that should never be accessible
56
+ # These are common targets for path traversal attacks
57
+ SENSITIVE_DIRECTORIES: frozenset[str] = frozenset(
58
+ {
59
+ # Unix/Linux sensitive directories
60
+ "/etc",
61
+ "/usr",
62
+ "/bin",
63
+ "/sbin",
64
+ "/var/log",
65
+ "/root",
66
+ "/home",
67
+ "/tmp",
68
+ "/var/tmp",
69
+ "/proc",
70
+ "/sys",
71
+ "/dev",
72
+ # macOS specific
73
+ "/System",
74
+ "/Library",
75
+ "/private",
76
+ # Windows sensitive directories
77
+ "C:\\Windows",
78
+ "C:\\Program Files",
79
+ "C:\\Program Files (x86)",
80
+ "C:\\ProgramData",
81
+ "C:\\Users",
82
+ "C:\\System32",
83
+ "C:\\SysWOW64",
84
+ }
85
+ )
86
+
87
+ # Valid file extensions for export/import operations
88
+ # Only data formats are allowed - no executables or scripts
89
+ VALID_EXTENSIONS: frozenset[str] = frozenset(
90
+ {
91
+ ".parquet",
92
+ ".json",
93
+ ".csv",
94
+ }
95
+ )
96
+
97
+ # Maximum number of URL decode iterations to catch double/triple encoding attacks
98
+ # Three passes catches: single encoding (%2e), double (%252e), and triple (%25252e)
99
+ MAX_URL_DECODE_ITERATIONS = 3
100
+
101
+
102
+ # =============================================================================
103
+ # PathValidator Class
104
+ # =============================================================================
105
+
106
+
107
+ class PathValidator:
108
+ """Validates file paths for security constraints.
109
+
110
+ This class implements defense-in-depth path validation:
111
+ 1. Detects path traversal patterns in raw input
112
+ 2. Canonicalizes paths to resolve symbolic elements
113
+ 3. Validates against allowed directories (allowlist)
114
+ 4. Validates file extensions
115
+ 5. Detects and optionally blocks symlinks
116
+
117
+ Thread Safety: This class is thread-safe. All methods are stateless
118
+ and only read from immutable configuration.
119
+
120
+ Example:
121
+ validator = PathValidator(
122
+ allowed_export_paths=[Path("/data/exports")],
123
+ allowed_import_paths=[Path("/data/imports")],
124
+ )
125
+
126
+ # Validate export path
127
+ safe_path = validator.validate_export_path("/data/exports/backup.parquet")
128
+
129
+ # Validate import path with size check
130
+ safe_path = validator.validate_import_path(
131
+ "/data/imports/restore.json",
132
+ max_size_bytes=100 * 1024 * 1024,
133
+ )
134
+ """
135
+
136
+ def __init__(
137
+ self,
138
+ allowed_export_paths: Sequence[str | Path],
139
+ allowed_import_paths: Sequence[str | Path],
140
+ allow_symlinks: bool = False,
141
+ ) -> None:
142
+ """Initialize the PathValidator.
143
+
144
+ Args:
145
+ allowed_export_paths: Directories where exports are permitted.
146
+ allowed_import_paths: Directories where imports are permitted.
147
+ allow_symlinks: Whether to allow following symlinks. Default False
148
+ for security - symlinks can be used to escape allowed directories.
149
+ """
150
+ # Convert and resolve allowed paths to absolute paths
151
+ self._allowed_export_paths: tuple[Path, ...] = tuple(
152
+ Path(p).resolve() for p in allowed_export_paths
153
+ )
154
+ self._allowed_import_paths: tuple[Path, ...] = tuple(
155
+ Path(p).resolve() for p in allowed_import_paths
156
+ )
157
+ self._allow_symlinks = allow_symlinks
158
+
159
+ def validate_export_path(self, path: str | Path) -> Path:
160
+ """Validate a path for export operations.
161
+
162
+ Performs security checks without requiring the file to exist.
163
+ Parent directories will be created if needed during export.
164
+
165
+ Args:
166
+ path: The path to validate. Can be absolute or relative.
167
+
168
+ Returns:
169
+ Canonicalized Path object that is safe to use.
170
+
171
+ Raises:
172
+ PathSecurityError: If the path fails any security check.
173
+ ValueError: If the path is empty or invalid.
174
+ """
175
+ # Basic input validation
176
+ path_str = str(path).strip() if path else ""
177
+ if not path_str:
178
+ raise ValueError("Path cannot be empty")
179
+
180
+ # Check for null bytes
181
+ if "\x00" in path_str:
182
+ raise ValueError("Path cannot contain null bytes")
183
+
184
+ # Step 1: Detect path traversal patterns in raw input
185
+ self._check_traversal_patterns(path_str)
186
+
187
+ # Step 2: Detect UNC paths
188
+ self._check_unc_path(path_str)
189
+
190
+ # Step 3: URL decode and check again (defense in depth)
191
+ decoded = self._url_decode_path(path_str)
192
+ if decoded != path_str:
193
+ self._check_traversal_patterns(decoded)
194
+
195
+ # Step 4: Convert to Path and canonicalize
196
+ path_obj = Path(path_str)
197
+
198
+ # Resolve without strict (file doesn't need to exist for export)
199
+ # We resolve parents to detect traversal attempts
200
+ try:
201
+ # For non-existent paths, resolve what we can
202
+ if path_obj.exists():
203
+ canonical = path_obj.resolve()
204
+ else:
205
+ # Resolve existing parents, keep filename
206
+ parent = path_obj.parent
207
+ while not parent.exists() and parent != parent.parent:
208
+ parent = parent.parent
209
+ if parent.exists():
210
+ resolved_parent = parent.resolve()
211
+ # Build the rest of the path
212
+ if parent != path_obj:
213
+ relative = path_obj.relative_to(parent)
214
+ else:
215
+ relative = Path(path_obj.name)
216
+ canonical = resolved_parent / relative
217
+ else:
218
+ canonical = path_obj.absolute()
219
+ except (OSError, ValueError) as e:
220
+ raise PathSecurityError(
221
+ path=path_str,
222
+ violation_type="path_resolution_failed",
223
+ message=f"Failed to resolve path: {e}",
224
+ )
225
+
226
+ # Step 5: Check for traversal in canonical path (defense in depth)
227
+ canonical_str = str(canonical)
228
+ if ".." in canonical_str:
229
+ raise PathSecurityError(
230
+ path=path_str,
231
+ violation_type="traversal_attempt",
232
+ message=f"Path contains traversal after canonicalization: {path_str}",
233
+ )
234
+
235
+ # Step 6: Validate extension
236
+ self._validate_extension(canonical)
237
+
238
+ # Step 7: Check symlink (if path exists)
239
+ if canonical.exists() and not self._allow_symlinks:
240
+ self._check_symlink(canonical, path_str)
241
+
242
+ # Step 8: Validate against allowlist
243
+ self._validate_allowlist(canonical, self._allowed_export_paths, path_str)
244
+
245
+ return canonical
246
+
247
+ def validate_import_path(self, path: str | Path, max_size_bytes: int) -> Path:
248
+ """Validate a path for import operations.
249
+
250
+ Performs all security checks and additionally verifies:
251
+ - File exists
252
+ - File is not a directory
253
+ - File size is within limits
254
+
255
+ Args:
256
+ path: The path to validate. Can be absolute or relative.
257
+ max_size_bytes: Maximum allowed file size in bytes.
258
+
259
+ Returns:
260
+ Canonicalized Path object that is safe to use.
261
+
262
+ Raises:
263
+ PathSecurityError: If the path fails any security check.
264
+ FileSizeLimitError: If the file exceeds the size limit.
265
+ ValueError: If the path is empty or invalid.
266
+ """
267
+ # Basic input validation
268
+ path_str = str(path).strip() if path else ""
269
+ if not path_str:
270
+ raise ValueError("Path cannot be empty")
271
+
272
+ # Check for null bytes
273
+ if "\x00" in path_str:
274
+ raise ValueError("Path cannot contain null bytes")
275
+
276
+ # Step 1: Detect path traversal patterns in raw input
277
+ self._check_traversal_patterns(path_str)
278
+
279
+ # Step 2: Detect UNC paths
280
+ self._check_unc_path(path_str)
281
+
282
+ # Step 3: URL decode and check again
283
+ decoded = self._url_decode_path(path_str)
284
+ if decoded != path_str:
285
+ self._check_traversal_patterns(decoded)
286
+
287
+ # Step 4: Convert to Path
288
+ path_obj = Path(path_str)
289
+
290
+ # Step 5: Check file exists
291
+ if not path_obj.exists():
292
+ raise PathSecurityError(
293
+ path=path_str,
294
+ violation_type="file_not_found",
295
+ message=f"File does not exist: {path_str}",
296
+ )
297
+
298
+ # Step 6: Check it's a file, not a directory
299
+ if path_obj.is_dir():
300
+ raise PathSecurityError(
301
+ path=path_str,
302
+ violation_type="not_a_file",
303
+ message=f"Path is a directory, not a file: {path_str}",
304
+ )
305
+
306
+ # Step 7: Canonicalize (resolve symlinks unless blocked)
307
+ try:
308
+ canonical = path_obj.resolve(strict=True)
309
+ except (OSError, RuntimeError) as e:
310
+ raise PathSecurityError(
311
+ path=path_str,
312
+ violation_type="path_resolution_failed",
313
+ message=f"Failed to resolve path: {e}",
314
+ )
315
+
316
+ # Step 8: Check for traversal in canonical path
317
+ canonical_str = str(canonical)
318
+ if ".." in canonical_str:
319
+ raise PathSecurityError(
320
+ path=path_str,
321
+ violation_type="traversal_attempt",
322
+ message=f"Path contains traversal after canonicalization: {path_str}",
323
+ )
324
+
325
+ # Step 9: Validate extension
326
+ self._validate_extension(canonical)
327
+
328
+ # Step 10: Check symlink
329
+ if not self._allow_symlinks:
330
+ self._check_symlink(path_obj, path_str)
331
+
332
+ # Step 11: Validate against allowlist
333
+ self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
334
+
335
+ # Step 12: Check file size
336
+ try:
337
+ file_size = canonical.stat().st_size
338
+ except OSError as e:
339
+ raise PathSecurityError(
340
+ path=path_str,
341
+ violation_type="stat_failed",
342
+ message=f"Failed to get file size: {e}",
343
+ )
344
+
345
+ if file_size > max_size_bytes:
346
+ raise FileSizeLimitError(
347
+ path=path_str,
348
+ actual_size_bytes=file_size,
349
+ max_size_bytes=max_size_bytes,
350
+ )
351
+
352
+ return canonical
353
+
354
+ def validate_and_open_import_file(
355
+ self, path: str | Path, max_size_bytes: int
356
+ ) -> tuple[Path, BinaryIO]:
357
+ """Atomically validate and open a file for import.
358
+
359
+ This method prevents TOCTOU (Time-of-Check-Time-of-Use) race conditions
360
+ by opening the file FIRST, then validating properties on the open file
361
+ descriptor. The caller MUST use the returned file handle for reading.
362
+
363
+ Args:
364
+ path: The path to validate and open.
365
+ max_size_bytes: Maximum allowed file size in bytes.
366
+
367
+ Returns:
368
+ Tuple of (canonical_path, open_file_handle). The file handle is
369
+ opened in binary read mode. Caller is responsible for closing it.
370
+
371
+ Raises:
372
+ PathSecurityError: If the path fails any security check.
373
+ FileSizeLimitError: If the file exceeds the size limit.
374
+ ValueError: If the path is empty or invalid.
375
+ """
376
+ # Basic input validation
377
+ path_str = str(path).strip() if path else ""
378
+ if not path_str:
379
+ raise ValueError("Path cannot be empty")
380
+
381
+ if "\x00" in path_str:
382
+ raise ValueError("Path cannot contain null bytes")
383
+
384
+ # Step 1: Detect path traversal patterns in raw input
385
+ self._check_traversal_patterns(path_str)
386
+
387
+ # Step 2: Detect UNC paths
388
+ self._check_unc_path(path_str)
389
+
390
+ # Step 3: URL decode and check again
391
+ decoded = self._url_decode_path(path_str)
392
+ if decoded != path_str:
393
+ self._check_traversal_patterns(decoded)
394
+
395
+ # Step 4: Validate extension BEFORE opening
396
+ path_obj = Path(path_str)
397
+ self._validate_extension(path_obj)
398
+
399
+ # Step 5: Check basic allowlist BEFORE opening (path-based check)
400
+ # We'll re-verify after opening but this catches obvious violations early
401
+ try:
402
+ preliminary_canonical = path_obj.resolve()
403
+ except (OSError, RuntimeError):
404
+ # Can't resolve - will fail when we try to open
405
+ pass
406
+ else:
407
+ # Quick check that we're in allowed territory
408
+ in_allowed = False
409
+ for allowed in self._allowed_import_paths:
410
+ try:
411
+ if preliminary_canonical.is_relative_to(allowed):
412
+ in_allowed = True
413
+ break
414
+ except AttributeError:
415
+ try:
416
+ preliminary_canonical.relative_to(allowed)
417
+ in_allowed = True
418
+ break
419
+ except ValueError:
420
+ continue
421
+
422
+ if not in_allowed:
423
+ allowed_str = ", ".join(str(p) for p in self._allowed_import_paths)
424
+ raise PathSecurityError(
425
+ path=path_str,
426
+ violation_type="path_outside_allowlist",
427
+ message=f"Path is not in allowed directories. Allowed: {allowed_str}",
428
+ )
429
+
430
+ # Step 5.5: Pre-check symlinks BEFORE opening (defense in depth)
431
+ # This catches obvious symlinks before we open the file
432
+ if not self._allow_symlinks:
433
+ # Check the path itself and all parents
434
+ self._check_symlink(path_obj, path_str)
435
+
436
+ # Step 6: ATOMICALLY open the file using low-level os.open
437
+ # This prevents TOCTOU - all subsequent checks use the open descriptor
438
+ # On Unix, use O_NOFOLLOW to prevent opening through symlinks at OS level
439
+ flags = os.O_RDONLY
440
+ if hasattr(os, "O_NOFOLLOW") and not self._allow_symlinks:
441
+ flags |= os.O_NOFOLLOW
442
+
443
+ try:
444
+ fd = os.open(str(path_obj), flags)
445
+ except FileNotFoundError:
446
+ # FileNotFoundError is a subclass of OSError, so catch it first
447
+ raise PathSecurityError(
448
+ path=path_str,
449
+ violation_type="file_not_found",
450
+ message=f"File does not exist: {path_str}",
451
+ )
452
+ except IsADirectoryError:
453
+ raise PathSecurityError(
454
+ path=path_str,
455
+ violation_type="not_a_file",
456
+ message=f"Path is a directory, not a file: {path_str}",
457
+ )
458
+ except PermissionError as e:
459
+ raise PathSecurityError(
460
+ path=path_str,
461
+ violation_type="permission_denied",
462
+ message=f"Permission denied: {e}",
463
+ )
464
+ except OSError as e:
465
+ # O_NOFOLLOW causes ELOOP (or EMLINK on some systems) if path is a symlink
466
+ if e.errno in (errno.ELOOP, getattr(errno, "EMLINK", None)):
467
+ raise PathSecurityError(
468
+ path=path_str,
469
+ violation_type="symlink_not_allowed",
470
+ message=f"Symlinks are not allowed: {path_str}",
471
+ )
472
+ raise PathSecurityError(
473
+ path=path_str,
474
+ violation_type="open_failed",
475
+ message=f"Failed to open file: {e}",
476
+ )
477
+
478
+ # From this point, we must close fd on any error
479
+ try:
480
+ # Step 7: Get file stats from the OPEN descriptor (not the path!)
481
+ try:
482
+ fd_stat = os.fstat(fd)
483
+ except OSError as e:
484
+ raise PathSecurityError(
485
+ path=path_str,
486
+ violation_type="stat_failed",
487
+ message=f"Failed to stat file: {e}",
488
+ )
489
+
490
+ # Step 8: Verify it's a regular file (not directory, device, etc.)
491
+ if not stat.S_ISREG(fd_stat.st_mode):
492
+ raise PathSecurityError(
493
+ path=path_str,
494
+ violation_type="not_a_regular_file",
495
+ message=f"Path is not a regular file: {path_str}",
496
+ )
497
+
498
+ # Step 9: Re-check symlink status after open (detect race conditions)
499
+ # If a symlink appeared between our pre-check and open, detect it here
500
+ if not self._allow_symlinks and path_obj.is_symlink():
501
+ raise PathSecurityError(
502
+ path=path_str,
503
+ violation_type="symlink_race_detected",
504
+ message=f"Symlink detected after open (possible race condition): {path_str}",
505
+ )
506
+
507
+ # Step 10: Check file size using the open descriptor
508
+ if fd_stat.st_size > max_size_bytes:
509
+ raise FileSizeLimitError(
510
+ path=path_str,
511
+ actual_size_bytes=fd_stat.st_size,
512
+ max_size_bytes=max_size_bytes,
513
+ )
514
+
515
+ # Step 11: Resolve the canonical path for the file we actually opened
516
+ # Use /proc/self/fd on Linux or os.path.realpath
517
+ try:
518
+ canonical = path_obj.resolve(strict=True)
519
+ except (OSError, RuntimeError) as e:
520
+ raise PathSecurityError(
521
+ path=path_str,
522
+ violation_type="path_resolution_failed",
523
+ message=f"Failed to resolve path: {e}",
524
+ )
525
+
526
+ # Step 12: Final allowlist validation on canonical path
527
+ self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
528
+
529
+ # Step 13: Convert fd to a Python file object
530
+ # The file object now owns the fd and will close it
531
+ file_handle: BinaryIO = os.fdopen(fd, "rb")
532
+
533
+ return canonical, file_handle
534
+
535
+ except Exception:
536
+ # Close fd on any error (before it's converted to file object)
537
+ os.close(fd)
538
+ raise
539
+
540
+ def _check_traversal_patterns(self, path_str: str) -> None:
541
+ """Check for path traversal patterns in the input string.
542
+
543
+ Args:
544
+ path_str: The path string to check.
545
+
546
+ Raises:
547
+ PathSecurityError: If a traversal pattern is detected.
548
+ """
549
+ for pattern in PATH_TRAVERSAL_PATTERNS:
550
+ if pattern.search(path_str):
551
+ raise PathSecurityError(
552
+ path=path_str,
553
+ violation_type="traversal_attempt",
554
+ message=f"Path traversal pattern detected: {path_str}",
555
+ )
556
+
557
+ def _check_unc_path(self, path_str: str) -> None:
558
+ """Check for Windows UNC paths.
559
+
560
+ Args:
561
+ path_str: The path string to check.
562
+
563
+ Raises:
564
+ PathSecurityError: If a UNC path is detected.
565
+ """
566
+ # Check for Windows UNC (\\server\share)
567
+ if path_str.startswith("\\\\"):
568
+ raise PathSecurityError(
569
+ path=path_str,
570
+ violation_type="unc_path",
571
+ message=f"UNC paths are not allowed: {path_str}",
572
+ )
573
+ # Check for \\?\ prefix (Windows extended path)
574
+ if path_str.startswith("\\\\?\\"):
575
+ raise PathSecurityError(
576
+ path=path_str,
577
+ violation_type="unc_path",
578
+ message=f"Extended UNC paths are not allowed: {path_str}",
579
+ )
580
+ # Check for Unix-style UNC (//server/share)
581
+ if path_str.startswith("//"):
582
+ raise PathSecurityError(
583
+ path=path_str,
584
+ violation_type="unc_path",
585
+ message=f"UNC-style paths are not allowed: {path_str}",
586
+ )
587
+
588
+ def _url_decode_path(self, path_str: str) -> str:
589
+ """URL decode a path string (multiple passes for double encoding).
590
+
591
+ Args:
592
+ path_str: The path string to decode.
593
+
594
+ Returns:
595
+ The decoded path string.
596
+ """
597
+ decoded = path_str
598
+ # Multiple passes to catch double/triple encoding
599
+ for _ in range(MAX_URL_DECODE_ITERATIONS):
600
+ new_decoded = urllib.parse.unquote(decoded)
601
+ if new_decoded == decoded:
602
+ break
603
+ decoded = new_decoded
604
+ return decoded
605
+
606
+ def _validate_extension(self, path: Path) -> None:
607
+ """Validate that the file has an allowed extension.
608
+
609
+ Args:
610
+ path: The path to validate.
611
+
612
+ Raises:
613
+ PathSecurityError: If the extension is not allowed.
614
+ """
615
+ # Get extension (lowercase for comparison)
616
+ ext = path.suffix.lower()
617
+
618
+ if ext not in VALID_EXTENSIONS:
619
+ allowed = ", ".join(sorted(VALID_EXTENSIONS))
620
+ raise PathSecurityError(
621
+ path=str(path),
622
+ violation_type="invalid_extension",
623
+ message=f"Invalid file extension '{ext}'. Allowed: {allowed}",
624
+ )
625
+
626
+ def _check_symlink(self, path: Path, original_path_str: str) -> None:
627
+ """Check if path or any parent is a symlink.
628
+
629
+ Args:
630
+ path: The path to check.
631
+ original_path_str: Original path string for error messages.
632
+
633
+ Raises:
634
+ PathSecurityError: If symlinks are found and not allowed.
635
+ """
636
+ # Check the path itself
637
+ if path.is_symlink():
638
+ raise PathSecurityError(
639
+ path=original_path_str,
640
+ violation_type="symlink_not_allowed",
641
+ message=f"Symlinks are not allowed: {original_path_str}",
642
+ )
643
+
644
+ # Check all parents
645
+ current = path
646
+ while current != current.parent:
647
+ current = current.parent
648
+ if current.is_symlink():
649
+ raise PathSecurityError(
650
+ path=original_path_str,
651
+ violation_type="symlink_not_allowed",
652
+ message=f"Path contains symlink in parent directory: {original_path_str}",
653
+ )
654
+
655
+ def _validate_allowlist(
656
+ self,
657
+ canonical_path: Path,
658
+ allowed_paths: tuple[Path, ...],
659
+ original_path_str: str,
660
+ ) -> None:
661
+ """Validate that the canonical path is within allowed directories.
662
+
663
+ Args:
664
+ canonical_path: The canonicalized path to validate.
665
+ allowed_paths: Tuple of allowed directory paths.
666
+ original_path_str: Original path string for error messages.
667
+
668
+ Raises:
669
+ PathSecurityError: If the path is outside all allowed directories.
670
+ """
671
+ # Check if canonical path is under any allowed directory FIRST
672
+ # If explicitly allowed, skip sensitive directory check
673
+ for allowed in allowed_paths:
674
+ try:
675
+ # Use is_relative_to for Python 3.9+
676
+ if canonical_path.is_relative_to(allowed):
677
+ return # Path is explicitly allowed, skip all other checks
678
+ except AttributeError:
679
+ # Fallback for older Python
680
+ try:
681
+ canonical_path.relative_to(allowed)
682
+ return # Path is explicitly allowed, skip all other checks
683
+ except ValueError:
684
+ continue
685
+
686
+ # Path not in allowlist - now check sensitive directories
687
+ canonical_str = str(canonical_path)
688
+ for sensitive in SENSITIVE_DIRECTORIES:
689
+ if canonical_str.startswith(sensitive):
690
+ raise PathSecurityError(
691
+ path=original_path_str,
692
+ violation_type="sensitive_directory",
693
+ message=f"Access to sensitive directory is blocked: {sensitive}",
694
+ )
695
+
696
+ # Path is not in any allowed directory
697
+ allowed_str = ", ".join(str(p) for p in allowed_paths)
698
+ raise PathSecurityError(
699
+ path=original_path_str,
700
+ violation_type="path_outside_allowlist",
701
+ message=f"Path is not in allowed directories. Allowed: {allowed_str}",
702
+ )