spatial-memory-mcp 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. spatial_memory/__init__.py +97 -0
  2. spatial_memory/__main__.py +271 -0
  3. spatial_memory/adapters/__init__.py +7 -0
  4. spatial_memory/adapters/lancedb_repository.py +880 -0
  5. spatial_memory/config.py +769 -0
  6. spatial_memory/core/__init__.py +118 -0
  7. spatial_memory/core/cache.py +317 -0
  8. spatial_memory/core/circuit_breaker.py +297 -0
  9. spatial_memory/core/connection_pool.py +220 -0
  10. spatial_memory/core/consolidation_strategies.py +401 -0
  11. spatial_memory/core/database.py +3072 -0
  12. spatial_memory/core/db_idempotency.py +242 -0
  13. spatial_memory/core/db_indexes.py +576 -0
  14. spatial_memory/core/db_migrations.py +588 -0
  15. spatial_memory/core/db_search.py +512 -0
  16. spatial_memory/core/db_versioning.py +178 -0
  17. spatial_memory/core/embeddings.py +558 -0
  18. spatial_memory/core/errors.py +317 -0
  19. spatial_memory/core/file_security.py +701 -0
  20. spatial_memory/core/filesystem.py +178 -0
  21. spatial_memory/core/health.py +289 -0
  22. spatial_memory/core/helpers.py +79 -0
  23. spatial_memory/core/import_security.py +433 -0
  24. spatial_memory/core/lifecycle_ops.py +1067 -0
  25. spatial_memory/core/logging.py +194 -0
  26. spatial_memory/core/metrics.py +192 -0
  27. spatial_memory/core/models.py +660 -0
  28. spatial_memory/core/rate_limiter.py +326 -0
  29. spatial_memory/core/response_types.py +500 -0
  30. spatial_memory/core/security.py +588 -0
  31. spatial_memory/core/spatial_ops.py +430 -0
  32. spatial_memory/core/tracing.py +300 -0
  33. spatial_memory/core/utils.py +110 -0
  34. spatial_memory/core/validation.py +406 -0
  35. spatial_memory/factory.py +444 -0
  36. spatial_memory/migrations/__init__.py +40 -0
  37. spatial_memory/ports/__init__.py +11 -0
  38. spatial_memory/ports/repositories.py +630 -0
  39. spatial_memory/py.typed +0 -0
  40. spatial_memory/server.py +1214 -0
  41. spatial_memory/services/__init__.py +70 -0
  42. spatial_memory/services/decay_manager.py +411 -0
  43. spatial_memory/services/export_import.py +1031 -0
  44. spatial_memory/services/lifecycle.py +1139 -0
  45. spatial_memory/services/memory.py +412 -0
  46. spatial_memory/services/spatial.py +1152 -0
  47. spatial_memory/services/utility.py +429 -0
  48. spatial_memory/tools/__init__.py +5 -0
  49. spatial_memory/tools/definitions.py +695 -0
  50. spatial_memory/verify.py +140 -0
  51. spatial_memory_mcp-1.9.1.dist-info/METADATA +509 -0
  52. spatial_memory_mcp-1.9.1.dist-info/RECORD +55 -0
  53. spatial_memory_mcp-1.9.1.dist-info/WHEEL +4 -0
  54. spatial_memory_mcp-1.9.1.dist-info/entry_points.txt +2 -0
  55. spatial_memory_mcp-1.9.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,701 @@
1
+ """File security module for path validation and attack prevention.
2
+
3
+ This module provides security-critical path validation to prevent:
4
+ - Path traversal attacks (../, %2e%2e, etc.)
5
+ - Windows UNC path attacks
6
+ - Symlink-based escapes from allowed directories
7
+ - File size limit bypass
8
+ - Invalid file extension attacks
9
+
10
+ Security is implemented through defense-in-depth:
11
+ 1. Pattern-based detection of known attack vectors
12
+ 2. Path canonicalization to resolve symbolic elements
13
+ 3. Allowlist validation to restrict accessible directories
14
+ 4. Extension validation to limit file types
15
+ 5. Symlink resolution and validation
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import errno
21
+ import os
22
+ import re
23
+ import stat
24
+ import urllib.parse
25
+ from collections.abc import Sequence
26
+ from pathlib import Path
27
+ from typing import BinaryIO
28
+
29
+ from spatial_memory.core.errors import FileSizeLimitError, PathSecurityError
30
+
31
+ # =============================================================================
32
+ # Security Constants
33
+ # =============================================================================
34
+
35
+ # Regex patterns to detect path traversal attempts
36
+ # These patterns detect various encoding schemes used to bypass filters
37
+ PATH_TRAVERSAL_PATTERNS: list[re.Pattern[str]] = [
38
+ # Basic parent directory traversal
39
+ re.compile(r"\.\."),
40
+ # URL-encoded .. (%2e = '.')
41
+ re.compile(r"%2e%2e", re.IGNORECASE),
42
+ # Double URL-encoded .. (%252e = '%2e')
43
+ re.compile(r"%252e%252e", re.IGNORECASE),
44
+ # Windows UNC paths (\\server\share or \\?\)
45
+ re.compile(r"^\\\\"),
46
+ # Unix-style UNC paths (//server/share)
47
+ re.compile(r"^//"),
48
+ # Null byte injection (historic attack, blocked by modern OSes but still checked)
49
+ re.compile(r"%00|\x00"),
50
+ # Overlong UTF-8 encoding of '.' (CVE-2000-0884 style)
51
+ re.compile(r"%c0%ae|%c0%2e|%c1%9c", re.IGNORECASE),
52
+ ]
53
+
54
+ # Sensitive system directories that should never be accessible
55
+ # These are common targets for path traversal attacks
56
+ SENSITIVE_DIRECTORIES: frozenset[str] = frozenset(
57
+ {
58
+ # Unix/Linux sensitive directories
59
+ "/etc",
60
+ "/usr",
61
+ "/bin",
62
+ "/sbin",
63
+ "/var/log",
64
+ "/root",
65
+ "/home",
66
+ "/tmp",
67
+ "/var/tmp",
68
+ "/proc",
69
+ "/sys",
70
+ "/dev",
71
+ # macOS specific
72
+ "/System",
73
+ "/Library",
74
+ "/private",
75
+ # Windows sensitive directories
76
+ "C:\\Windows",
77
+ "C:\\Program Files",
78
+ "C:\\Program Files (x86)",
79
+ "C:\\ProgramData",
80
+ "C:\\Users",
81
+ "C:\\System32",
82
+ "C:\\SysWOW64",
83
+ }
84
+ )
85
+
86
+ # Valid file extensions for export/import operations
87
+ # Only data formats are allowed - no executables or scripts
88
+ VALID_EXTENSIONS: frozenset[str] = frozenset(
89
+ {
90
+ ".parquet",
91
+ ".json",
92
+ ".csv",
93
+ }
94
+ )
95
+
96
+ # Maximum number of URL decode iterations to catch double/triple encoding attacks
97
+ # Three passes catches: single encoding (%2e), double (%252e), and triple (%25252e)
98
+ MAX_URL_DECODE_ITERATIONS = 3
99
+
100
+
101
+ # =============================================================================
102
+ # PathValidator Class
103
+ # =============================================================================
104
+
105
+
106
+ class PathValidator:
107
+ """Validates file paths for security constraints.
108
+
109
+ This class implements defense-in-depth path validation:
110
+ 1. Detects path traversal patterns in raw input
111
+ 2. Canonicalizes paths to resolve symbolic elements
112
+ 3. Validates against allowed directories (allowlist)
113
+ 4. Validates file extensions
114
+ 5. Detects and optionally blocks symlinks
115
+
116
+ Thread Safety: This class is thread-safe. All methods are stateless
117
+ and only read from immutable configuration.
118
+
119
+ Example:
120
+ validator = PathValidator(
121
+ allowed_export_paths=[Path("/data/exports")],
122
+ allowed_import_paths=[Path("/data/imports")],
123
+ )
124
+
125
+ # Validate export path
126
+ safe_path = validator.validate_export_path("/data/exports/backup.parquet")
127
+
128
+ # Validate import path with size check
129
+ safe_path = validator.validate_import_path(
130
+ "/data/imports/restore.json",
131
+ max_size_bytes=100 * 1024 * 1024,
132
+ )
133
+ """
134
+
135
+ def __init__(
136
+ self,
137
+ allowed_export_paths: Sequence[str | Path],
138
+ allowed_import_paths: Sequence[str | Path],
139
+ allow_symlinks: bool = False,
140
+ ) -> None:
141
+ """Initialize the PathValidator.
142
+
143
+ Args:
144
+ allowed_export_paths: Directories where exports are permitted.
145
+ allowed_import_paths: Directories where imports are permitted.
146
+ allow_symlinks: Whether to allow following symlinks. Default False
147
+ for security - symlinks can be used to escape allowed directories.
148
+ """
149
+ # Convert and resolve allowed paths to absolute paths
150
+ self._allowed_export_paths: tuple[Path, ...] = tuple(
151
+ Path(p).resolve() for p in allowed_export_paths
152
+ )
153
+ self._allowed_import_paths: tuple[Path, ...] = tuple(
154
+ Path(p).resolve() for p in allowed_import_paths
155
+ )
156
+ self._allow_symlinks = allow_symlinks
157
+
158
+ def validate_export_path(self, path: str | Path) -> Path:
159
+ """Validate a path for export operations.
160
+
161
+ Performs security checks without requiring the file to exist.
162
+ Parent directories will be created if needed during export.
163
+
164
+ Args:
165
+ path: The path to validate. Can be absolute or relative.
166
+
167
+ Returns:
168
+ Canonicalized Path object that is safe to use.
169
+
170
+ Raises:
171
+ PathSecurityError: If the path fails any security check.
172
+ ValueError: If the path is empty or invalid.
173
+ """
174
+ # Basic input validation
175
+ path_str = str(path).strip() if path else ""
176
+ if not path_str:
177
+ raise ValueError("Path cannot be empty")
178
+
179
+ # Check for null bytes
180
+ if "\x00" in path_str:
181
+ raise ValueError("Path cannot contain null bytes")
182
+
183
+ # Step 1: Detect path traversal patterns in raw input
184
+ self._check_traversal_patterns(path_str)
185
+
186
+ # Step 2: Detect UNC paths
187
+ self._check_unc_path(path_str)
188
+
189
+ # Step 3: URL decode and check again (defense in depth)
190
+ decoded = self._url_decode_path(path_str)
191
+ if decoded != path_str:
192
+ self._check_traversal_patterns(decoded)
193
+
194
+ # Step 4: Convert to Path and canonicalize
195
+ path_obj = Path(path_str)
196
+
197
+ # Resolve without strict (file doesn't need to exist for export)
198
+ # We resolve parents to detect traversal attempts
199
+ try:
200
+ # For non-existent paths, resolve what we can
201
+ if path_obj.exists():
202
+ canonical = path_obj.resolve()
203
+ else:
204
+ # Resolve existing parents, keep filename
205
+ parent = path_obj.parent
206
+ while not parent.exists() and parent != parent.parent:
207
+ parent = parent.parent
208
+ if parent.exists():
209
+ resolved_parent = parent.resolve()
210
+ # Build the rest of the path
211
+ if parent != path_obj:
212
+ relative = path_obj.relative_to(parent)
213
+ else:
214
+ relative = Path(path_obj.name)
215
+ canonical = resolved_parent / relative
216
+ else:
217
+ canonical = path_obj.absolute()
218
+ except (OSError, ValueError) as e:
219
+ raise PathSecurityError(
220
+ path=path_str,
221
+ violation_type="path_resolution_failed",
222
+ message=f"Failed to resolve path: {e}",
223
+ )
224
+
225
+ # Step 5: Check for traversal in canonical path (defense in depth)
226
+ canonical_str = str(canonical)
227
+ if ".." in canonical_str:
228
+ raise PathSecurityError(
229
+ path=path_str,
230
+ violation_type="traversal_attempt",
231
+ message=f"Path contains traversal after canonicalization: {path_str}",
232
+ )
233
+
234
+ # Step 6: Validate extension
235
+ self._validate_extension(canonical)
236
+
237
+ # Step 7: Check symlink (if path exists)
238
+ if canonical.exists() and not self._allow_symlinks:
239
+ self._check_symlink(canonical, path_str)
240
+
241
+ # Step 8: Validate against allowlist
242
+ self._validate_allowlist(canonical, self._allowed_export_paths, path_str)
243
+
244
+ return canonical
245
+
246
+ def validate_import_path(self, path: str | Path, max_size_bytes: int) -> Path:
247
+ """Validate a path for import operations.
248
+
249
+ Performs all security checks and additionally verifies:
250
+ - File exists
251
+ - File is not a directory
252
+ - File size is within limits
253
+
254
+ Args:
255
+ path: The path to validate. Can be absolute or relative.
256
+ max_size_bytes: Maximum allowed file size in bytes.
257
+
258
+ Returns:
259
+ Canonicalized Path object that is safe to use.
260
+
261
+ Raises:
262
+ PathSecurityError: If the path fails any security check.
263
+ FileSizeLimitError: If the file exceeds the size limit.
264
+ ValueError: If the path is empty or invalid.
265
+ """
266
+ # Basic input validation
267
+ path_str = str(path).strip() if path else ""
268
+ if not path_str:
269
+ raise ValueError("Path cannot be empty")
270
+
271
+ # Check for null bytes
272
+ if "\x00" in path_str:
273
+ raise ValueError("Path cannot contain null bytes")
274
+
275
+ # Step 1: Detect path traversal patterns in raw input
276
+ self._check_traversal_patterns(path_str)
277
+
278
+ # Step 2: Detect UNC paths
279
+ self._check_unc_path(path_str)
280
+
281
+ # Step 3: URL decode and check again
282
+ decoded = self._url_decode_path(path_str)
283
+ if decoded != path_str:
284
+ self._check_traversal_patterns(decoded)
285
+
286
+ # Step 4: Convert to Path
287
+ path_obj = Path(path_str)
288
+
289
+ # Step 5: Check file exists
290
+ if not path_obj.exists():
291
+ raise PathSecurityError(
292
+ path=path_str,
293
+ violation_type="file_not_found",
294
+ message=f"File does not exist: {path_str}",
295
+ )
296
+
297
+ # Step 6: Check it's a file, not a directory
298
+ if path_obj.is_dir():
299
+ raise PathSecurityError(
300
+ path=path_str,
301
+ violation_type="not_a_file",
302
+ message=f"Path is a directory, not a file: {path_str}",
303
+ )
304
+
305
+ # Step 7: Canonicalize (resolve symlinks unless blocked)
306
+ try:
307
+ canonical = path_obj.resolve(strict=True)
308
+ except (OSError, RuntimeError) as e:
309
+ raise PathSecurityError(
310
+ path=path_str,
311
+ violation_type="path_resolution_failed",
312
+ message=f"Failed to resolve path: {e}",
313
+ )
314
+
315
+ # Step 8: Check for traversal in canonical path
316
+ canonical_str = str(canonical)
317
+ if ".." in canonical_str:
318
+ raise PathSecurityError(
319
+ path=path_str,
320
+ violation_type="traversal_attempt",
321
+ message=f"Path contains traversal after canonicalization: {path_str}",
322
+ )
323
+
324
+ # Step 9: Validate extension
325
+ self._validate_extension(canonical)
326
+
327
+ # Step 10: Check symlink
328
+ if not self._allow_symlinks:
329
+ self._check_symlink(path_obj, path_str)
330
+
331
+ # Step 11: Validate against allowlist
332
+ self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
333
+
334
+ # Step 12: Check file size
335
+ try:
336
+ file_size = canonical.stat().st_size
337
+ except OSError as e:
338
+ raise PathSecurityError(
339
+ path=path_str,
340
+ violation_type="stat_failed",
341
+ message=f"Failed to get file size: {e}",
342
+ )
343
+
344
+ if file_size > max_size_bytes:
345
+ raise FileSizeLimitError(
346
+ path=path_str,
347
+ actual_size_bytes=file_size,
348
+ max_size_bytes=max_size_bytes,
349
+ )
350
+
351
+ return canonical
352
+
353
+ def validate_and_open_import_file(
354
+ self, path: str | Path, max_size_bytes: int
355
+ ) -> tuple[Path, BinaryIO]:
356
+ """Atomically validate and open a file for import.
357
+
358
+ This method prevents TOCTOU (Time-of-Check-Time-of-Use) race conditions
359
+ by opening the file FIRST, then validating properties on the open file
360
+ descriptor. The caller MUST use the returned file handle for reading.
361
+
362
+ Args:
363
+ path: The path to validate and open.
364
+ max_size_bytes: Maximum allowed file size in bytes.
365
+
366
+ Returns:
367
+ Tuple of (canonical_path, open_file_handle). The file handle is
368
+ opened in binary read mode. Caller is responsible for closing it.
369
+
370
+ Raises:
371
+ PathSecurityError: If the path fails any security check.
372
+ FileSizeLimitError: If the file exceeds the size limit.
373
+ ValueError: If the path is empty or invalid.
374
+ """
375
+ # Basic input validation
376
+ path_str = str(path).strip() if path else ""
377
+ if not path_str:
378
+ raise ValueError("Path cannot be empty")
379
+
380
+ if "\x00" in path_str:
381
+ raise ValueError("Path cannot contain null bytes")
382
+
383
+ # Step 1: Detect path traversal patterns in raw input
384
+ self._check_traversal_patterns(path_str)
385
+
386
+ # Step 2: Detect UNC paths
387
+ self._check_unc_path(path_str)
388
+
389
+ # Step 3: URL decode and check again
390
+ decoded = self._url_decode_path(path_str)
391
+ if decoded != path_str:
392
+ self._check_traversal_patterns(decoded)
393
+
394
+ # Step 4: Validate extension BEFORE opening
395
+ path_obj = Path(path_str)
396
+ self._validate_extension(path_obj)
397
+
398
+ # Step 5: Check basic allowlist BEFORE opening (path-based check)
399
+ # We'll re-verify after opening but this catches obvious violations early
400
+ try:
401
+ preliminary_canonical = path_obj.resolve()
402
+ except (OSError, RuntimeError):
403
+ # Can't resolve - will fail when we try to open
404
+ pass
405
+ else:
406
+ # Quick check that we're in allowed territory
407
+ in_allowed = False
408
+ for allowed in self._allowed_import_paths:
409
+ try:
410
+ if preliminary_canonical.is_relative_to(allowed):
411
+ in_allowed = True
412
+ break
413
+ except AttributeError:
414
+ try:
415
+ preliminary_canonical.relative_to(allowed)
416
+ in_allowed = True
417
+ break
418
+ except ValueError:
419
+ continue
420
+
421
+ if not in_allowed:
422
+ allowed_str = ", ".join(str(p) for p in self._allowed_import_paths)
423
+ raise PathSecurityError(
424
+ path=path_str,
425
+ violation_type="path_outside_allowlist",
426
+ message=f"Path is not in allowed directories. Allowed: {allowed_str}",
427
+ )
428
+
429
+ # Step 5.5: Pre-check symlinks BEFORE opening (defense in depth)
430
+ # This catches obvious symlinks before we open the file
431
+ if not self._allow_symlinks:
432
+ # Check the path itself and all parents
433
+ self._check_symlink(path_obj, path_str)
434
+
435
+ # Step 6: ATOMICALLY open the file using low-level os.open
436
+ # This prevents TOCTOU - all subsequent checks use the open descriptor
437
+ # On Unix, use O_NOFOLLOW to prevent opening through symlinks at OS level
438
+ flags = os.O_RDONLY
439
+ if hasattr(os, "O_NOFOLLOW") and not self._allow_symlinks:
440
+ flags |= os.O_NOFOLLOW
441
+
442
+ try:
443
+ fd = os.open(str(path_obj), flags)
444
+ except FileNotFoundError:
445
+ # FileNotFoundError is a subclass of OSError, so catch it first
446
+ raise PathSecurityError(
447
+ path=path_str,
448
+ violation_type="file_not_found",
449
+ message=f"File does not exist: {path_str}",
450
+ )
451
+ except IsADirectoryError:
452
+ raise PathSecurityError(
453
+ path=path_str,
454
+ violation_type="not_a_file",
455
+ message=f"Path is a directory, not a file: {path_str}",
456
+ )
457
+ except PermissionError as e:
458
+ raise PathSecurityError(
459
+ path=path_str,
460
+ violation_type="permission_denied",
461
+ message=f"Permission denied: {e}",
462
+ )
463
+ except OSError as e:
464
+ # O_NOFOLLOW causes ELOOP (or EMLINK on some systems) if path is a symlink
465
+ if e.errno in (errno.ELOOP, getattr(errno, "EMLINK", None)):
466
+ raise PathSecurityError(
467
+ path=path_str,
468
+ violation_type="symlink_not_allowed",
469
+ message=f"Symlinks are not allowed: {path_str}",
470
+ )
471
+ raise PathSecurityError(
472
+ path=path_str,
473
+ violation_type="open_failed",
474
+ message=f"Failed to open file: {e}",
475
+ )
476
+
477
+ # From this point, we must close fd on any error
478
+ try:
479
+ # Step 7: Get file stats from the OPEN descriptor (not the path!)
480
+ try:
481
+ fd_stat = os.fstat(fd)
482
+ except OSError as e:
483
+ raise PathSecurityError(
484
+ path=path_str,
485
+ violation_type="stat_failed",
486
+ message=f"Failed to stat file: {e}",
487
+ )
488
+
489
+ # Step 8: Verify it's a regular file (not directory, device, etc.)
490
+ if not stat.S_ISREG(fd_stat.st_mode):
491
+ raise PathSecurityError(
492
+ path=path_str,
493
+ violation_type="not_a_regular_file",
494
+ message=f"Path is not a regular file: {path_str}",
495
+ )
496
+
497
+ # Step 9: Re-check symlink status after open (detect race conditions)
498
+ # If a symlink appeared between our pre-check and open, detect it here
499
+ if not self._allow_symlinks and path_obj.is_symlink():
500
+ raise PathSecurityError(
501
+ path=path_str,
502
+ violation_type="symlink_race_detected",
503
+ message=f"Symlink detected after open (possible race condition): {path_str}",
504
+ )
505
+
506
+ # Step 10: Check file size using the open descriptor
507
+ if fd_stat.st_size > max_size_bytes:
508
+ raise FileSizeLimitError(
509
+ path=path_str,
510
+ actual_size_bytes=fd_stat.st_size,
511
+ max_size_bytes=max_size_bytes,
512
+ )
513
+
514
+ # Step 11: Resolve the canonical path for the file we actually opened
515
+ # Use /proc/self/fd on Linux or os.path.realpath
516
+ try:
517
+ canonical = path_obj.resolve(strict=True)
518
+ except (OSError, RuntimeError) as e:
519
+ raise PathSecurityError(
520
+ path=path_str,
521
+ violation_type="path_resolution_failed",
522
+ message=f"Failed to resolve path: {e}",
523
+ )
524
+
525
+ # Step 12: Final allowlist validation on canonical path
526
+ self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
527
+
528
+ # Step 13: Convert fd to a Python file object
529
+ # The file object now owns the fd and will close it
530
+ file_handle: BinaryIO = os.fdopen(fd, "rb")
531
+
532
+ return canonical, file_handle
533
+
534
+ except Exception:
535
+ # Close fd on any error (before it's converted to file object)
536
+ os.close(fd)
537
+ raise
538
+
539
+ def _check_traversal_patterns(self, path_str: str) -> None:
540
+ """Check for path traversal patterns in the input string.
541
+
542
+ Args:
543
+ path_str: The path string to check.
544
+
545
+ Raises:
546
+ PathSecurityError: If a traversal pattern is detected.
547
+ """
548
+ for pattern in PATH_TRAVERSAL_PATTERNS:
549
+ if pattern.search(path_str):
550
+ raise PathSecurityError(
551
+ path=path_str,
552
+ violation_type="traversal_attempt",
553
+ message=f"Path traversal pattern detected: {path_str}",
554
+ )
555
+
556
+ def _check_unc_path(self, path_str: str) -> None:
557
+ """Check for Windows UNC paths.
558
+
559
+ Args:
560
+ path_str: The path string to check.
561
+
562
+ Raises:
563
+ PathSecurityError: If a UNC path is detected.
564
+ """
565
+ # Check for Windows UNC (\\server\share)
566
+ if path_str.startswith("\\\\"):
567
+ raise PathSecurityError(
568
+ path=path_str,
569
+ violation_type="unc_path",
570
+ message=f"UNC paths are not allowed: {path_str}",
571
+ )
572
+ # Check for \\?\ prefix (Windows extended path)
573
+ if path_str.startswith("\\\\?\\"):
574
+ raise PathSecurityError(
575
+ path=path_str,
576
+ violation_type="unc_path",
577
+ message=f"Extended UNC paths are not allowed: {path_str}",
578
+ )
579
+ # Check for Unix-style UNC (//server/share)
580
+ if path_str.startswith("//"):
581
+ raise PathSecurityError(
582
+ path=path_str,
583
+ violation_type="unc_path",
584
+ message=f"UNC-style paths are not allowed: {path_str}",
585
+ )
586
+
587
+ def _url_decode_path(self, path_str: str) -> str:
588
+ """URL decode a path string (multiple passes for double encoding).
589
+
590
+ Args:
591
+ path_str: The path string to decode.
592
+
593
+ Returns:
594
+ The decoded path string.
595
+ """
596
+ decoded = path_str
597
+ # Multiple passes to catch double/triple encoding
598
+ for _ in range(MAX_URL_DECODE_ITERATIONS):
599
+ new_decoded = urllib.parse.unquote(decoded)
600
+ if new_decoded == decoded:
601
+ break
602
+ decoded = new_decoded
603
+ return decoded
604
+
605
+ def _validate_extension(self, path: Path) -> None:
606
+ """Validate that the file has an allowed extension.
607
+
608
+ Args:
609
+ path: The path to validate.
610
+
611
+ Raises:
612
+ PathSecurityError: If the extension is not allowed.
613
+ """
614
+ # Get extension (lowercase for comparison)
615
+ ext = path.suffix.lower()
616
+
617
+ if ext not in VALID_EXTENSIONS:
618
+ allowed = ", ".join(sorted(VALID_EXTENSIONS))
619
+ raise PathSecurityError(
620
+ path=str(path),
621
+ violation_type="invalid_extension",
622
+ message=f"Invalid file extension '{ext}'. Allowed: {allowed}",
623
+ )
624
+
625
+ def _check_symlink(self, path: Path, original_path_str: str) -> None:
626
+ """Check if path or any parent is a symlink.
627
+
628
+ Args:
629
+ path: The path to check.
630
+ original_path_str: Original path string for error messages.
631
+
632
+ Raises:
633
+ PathSecurityError: If symlinks are found and not allowed.
634
+ """
635
+ # Check the path itself
636
+ if path.is_symlink():
637
+ raise PathSecurityError(
638
+ path=original_path_str,
639
+ violation_type="symlink_not_allowed",
640
+ message=f"Symlinks are not allowed: {original_path_str}",
641
+ )
642
+
643
+ # Check all parents
644
+ current = path
645
+ while current != current.parent:
646
+ current = current.parent
647
+ if current.is_symlink():
648
+ raise PathSecurityError(
649
+ path=original_path_str,
650
+ violation_type="symlink_not_allowed",
651
+ message=f"Path contains symlink in parent directory: {original_path_str}",
652
+ )
653
+
654
+ def _validate_allowlist(
655
+ self,
656
+ canonical_path: Path,
657
+ allowed_paths: tuple[Path, ...],
658
+ original_path_str: str,
659
+ ) -> None:
660
+ """Validate that the canonical path is within allowed directories.
661
+
662
+ Args:
663
+ canonical_path: The canonicalized path to validate.
664
+ allowed_paths: Tuple of allowed directory paths.
665
+ original_path_str: Original path string for error messages.
666
+
667
+ Raises:
668
+ PathSecurityError: If the path is outside all allowed directories.
669
+ """
670
+ # Check if canonical path is under any allowed directory FIRST
671
+ # If explicitly allowed, skip sensitive directory check
672
+ for allowed in allowed_paths:
673
+ try:
674
+ # Use is_relative_to for Python 3.9+
675
+ if canonical_path.is_relative_to(allowed):
676
+ return # Path is explicitly allowed, skip all other checks
677
+ except AttributeError:
678
+ # Fallback for older Python
679
+ try:
680
+ canonical_path.relative_to(allowed)
681
+ return # Path is explicitly allowed, skip all other checks
682
+ except ValueError:
683
+ continue
684
+
685
+ # Path not in allowlist - now check sensitive directories
686
+ canonical_str = str(canonical_path)
687
+ for sensitive in SENSITIVE_DIRECTORIES:
688
+ if canonical_str.startswith(sensitive):
689
+ raise PathSecurityError(
690
+ path=original_path_str,
691
+ violation_type="sensitive_directory",
692
+ message=f"Access to sensitive directory is blocked: {sensitive}",
693
+ )
694
+
695
+ # Path is not in any allowed directory
696
+ allowed_str = ", ".join(str(p) for p in allowed_paths)
697
+ raise PathSecurityError(
698
+ path=original_path_str,
699
+ violation_type="path_outside_allowlist",
700
+ message=f"Path is not in allowed directories. Allowed: {allowed_str}",
701
+ )