spatial-memory-mcp 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spatial-memory-mcp might be problematic. Click here for more details.
- spatial_memory/__init__.py +97 -0
- spatial_memory/__main__.py +270 -0
- spatial_memory/adapters/__init__.py +7 -0
- spatial_memory/adapters/lancedb_repository.py +878 -0
- spatial_memory/config.py +728 -0
- spatial_memory/core/__init__.py +118 -0
- spatial_memory/core/cache.py +317 -0
- spatial_memory/core/circuit_breaker.py +297 -0
- spatial_memory/core/connection_pool.py +220 -0
- spatial_memory/core/consolidation_strategies.py +402 -0
- spatial_memory/core/database.py +3069 -0
- spatial_memory/core/db_idempotency.py +242 -0
- spatial_memory/core/db_indexes.py +575 -0
- spatial_memory/core/db_migrations.py +584 -0
- spatial_memory/core/db_search.py +509 -0
- spatial_memory/core/db_versioning.py +177 -0
- spatial_memory/core/embeddings.py +557 -0
- spatial_memory/core/errors.py +317 -0
- spatial_memory/core/file_security.py +702 -0
- spatial_memory/core/filesystem.py +178 -0
- spatial_memory/core/health.py +289 -0
- spatial_memory/core/helpers.py +79 -0
- spatial_memory/core/import_security.py +432 -0
- spatial_memory/core/lifecycle_ops.py +1067 -0
- spatial_memory/core/logging.py +194 -0
- spatial_memory/core/metrics.py +192 -0
- spatial_memory/core/models.py +628 -0
- spatial_memory/core/rate_limiter.py +326 -0
- spatial_memory/core/response_types.py +497 -0
- spatial_memory/core/security.py +588 -0
- spatial_memory/core/spatial_ops.py +426 -0
- spatial_memory/core/tracing.py +300 -0
- spatial_memory/core/utils.py +110 -0
- spatial_memory/core/validation.py +403 -0
- spatial_memory/factory.py +407 -0
- spatial_memory/migrations/__init__.py +40 -0
- spatial_memory/ports/__init__.py +11 -0
- spatial_memory/ports/repositories.py +631 -0
- spatial_memory/py.typed +0 -0
- spatial_memory/server.py +1141 -0
- spatial_memory/services/__init__.py +70 -0
- spatial_memory/services/export_import.py +1023 -0
- spatial_memory/services/lifecycle.py +1120 -0
- spatial_memory/services/memory.py +412 -0
- spatial_memory/services/spatial.py +1147 -0
- spatial_memory/services/utility.py +409 -0
- spatial_memory/tools/__init__.py +5 -0
- spatial_memory/tools/definitions.py +695 -0
- spatial_memory/verify.py +140 -0
- spatial_memory_mcp-1.6.1.dist-info/METADATA +499 -0
- spatial_memory_mcp-1.6.1.dist-info/RECORD +54 -0
- spatial_memory_mcp-1.6.1.dist-info/WHEEL +4 -0
- spatial_memory_mcp-1.6.1.dist-info/entry_points.txt +2 -0
- spatial_memory_mcp-1.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
"""File security module for path validation and attack prevention.
|
|
2
|
+
|
|
3
|
+
This module provides security-critical path validation to prevent:
|
|
4
|
+
- Path traversal attacks (../, %2e%2e, etc.)
|
|
5
|
+
- Windows UNC path attacks
|
|
6
|
+
- Symlink-based escapes from allowed directories
|
|
7
|
+
- File size limit bypass
|
|
8
|
+
- Invalid file extension attacks
|
|
9
|
+
|
|
10
|
+
Security is implemented through defense-in-depth:
|
|
11
|
+
1. Pattern-based detection of known attack vectors
|
|
12
|
+
2. Path canonicalization to resolve symbolic elements
|
|
13
|
+
3. Allowlist validation to restrict accessible directories
|
|
14
|
+
4. Extension validation to limit file types
|
|
15
|
+
5. Symlink resolution and validation
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import errno
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import stat
|
|
24
|
+
import urllib.parse
|
|
25
|
+
from collections.abc import Sequence
|
|
26
|
+
from io import BufferedReader
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import BinaryIO
|
|
29
|
+
|
|
30
|
+
from spatial_memory.core.errors import FileSizeLimitError, PathSecurityError
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# Security Constants
|
|
34
|
+
# =============================================================================
|
|
35
|
+
|
|
36
|
+
# Regex patterns to detect path traversal attempts
|
|
37
|
+
# These patterns detect various encoding schemes used to bypass filters
|
|
38
|
+
PATH_TRAVERSAL_PATTERNS: list[re.Pattern[str]] = [
|
|
39
|
+
# Basic parent directory traversal
|
|
40
|
+
re.compile(r"\.\."),
|
|
41
|
+
# URL-encoded .. (%2e = '.')
|
|
42
|
+
re.compile(r"%2e%2e", re.IGNORECASE),
|
|
43
|
+
# Double URL-encoded .. (%252e = '%2e')
|
|
44
|
+
re.compile(r"%252e%252e", re.IGNORECASE),
|
|
45
|
+
# Windows UNC paths (\\server\share or \\?\)
|
|
46
|
+
re.compile(r"^\\\\"),
|
|
47
|
+
# Unix-style UNC paths (//server/share)
|
|
48
|
+
re.compile(r"^//"),
|
|
49
|
+
# Null byte injection (historic attack, blocked by modern OSes but still checked)
|
|
50
|
+
re.compile(r"%00|\x00"),
|
|
51
|
+
# Overlong UTF-8 encoding of '.' (CVE-2000-0884 style)
|
|
52
|
+
re.compile(r"%c0%ae|%c0%2e|%c1%9c", re.IGNORECASE),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# Sensitive system directories that should never be accessible
|
|
56
|
+
# These are common targets for path traversal attacks
|
|
57
|
+
SENSITIVE_DIRECTORIES: frozenset[str] = frozenset(
|
|
58
|
+
{
|
|
59
|
+
# Unix/Linux sensitive directories
|
|
60
|
+
"/etc",
|
|
61
|
+
"/usr",
|
|
62
|
+
"/bin",
|
|
63
|
+
"/sbin",
|
|
64
|
+
"/var/log",
|
|
65
|
+
"/root",
|
|
66
|
+
"/home",
|
|
67
|
+
"/tmp",
|
|
68
|
+
"/var/tmp",
|
|
69
|
+
"/proc",
|
|
70
|
+
"/sys",
|
|
71
|
+
"/dev",
|
|
72
|
+
# macOS specific
|
|
73
|
+
"/System",
|
|
74
|
+
"/Library",
|
|
75
|
+
"/private",
|
|
76
|
+
# Windows sensitive directories
|
|
77
|
+
"C:\\Windows",
|
|
78
|
+
"C:\\Program Files",
|
|
79
|
+
"C:\\Program Files (x86)",
|
|
80
|
+
"C:\\ProgramData",
|
|
81
|
+
"C:\\Users",
|
|
82
|
+
"C:\\System32",
|
|
83
|
+
"C:\\SysWOW64",
|
|
84
|
+
}
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Valid file extensions for export/import operations
|
|
88
|
+
# Only data formats are allowed - no executables or scripts
|
|
89
|
+
VALID_EXTENSIONS: frozenset[str] = frozenset(
|
|
90
|
+
{
|
|
91
|
+
".parquet",
|
|
92
|
+
".json",
|
|
93
|
+
".csv",
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Maximum number of URL decode iterations to catch double/triple encoding attacks
|
|
98
|
+
# Three passes catches: single encoding (%2e), double (%252e), and triple (%25252e)
|
|
99
|
+
MAX_URL_DECODE_ITERATIONS = 3
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# =============================================================================
|
|
103
|
+
# PathValidator Class
|
|
104
|
+
# =============================================================================
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class PathValidator:
|
|
108
|
+
"""Validates file paths for security constraints.
|
|
109
|
+
|
|
110
|
+
This class implements defense-in-depth path validation:
|
|
111
|
+
1. Detects path traversal patterns in raw input
|
|
112
|
+
2. Canonicalizes paths to resolve symbolic elements
|
|
113
|
+
3. Validates against allowed directories (allowlist)
|
|
114
|
+
4. Validates file extensions
|
|
115
|
+
5. Detects and optionally blocks symlinks
|
|
116
|
+
|
|
117
|
+
Thread Safety: This class is thread-safe. All methods are stateless
|
|
118
|
+
and only read from immutable configuration.
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
validator = PathValidator(
|
|
122
|
+
allowed_export_paths=[Path("/data/exports")],
|
|
123
|
+
allowed_import_paths=[Path("/data/imports")],
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Validate export path
|
|
127
|
+
safe_path = validator.validate_export_path("/data/exports/backup.parquet")
|
|
128
|
+
|
|
129
|
+
# Validate import path with size check
|
|
130
|
+
safe_path = validator.validate_import_path(
|
|
131
|
+
"/data/imports/restore.json",
|
|
132
|
+
max_size_bytes=100 * 1024 * 1024,
|
|
133
|
+
)
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(
|
|
137
|
+
self,
|
|
138
|
+
allowed_export_paths: Sequence[str | Path],
|
|
139
|
+
allowed_import_paths: Sequence[str | Path],
|
|
140
|
+
allow_symlinks: bool = False,
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Initialize the PathValidator.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
allowed_export_paths: Directories where exports are permitted.
|
|
146
|
+
allowed_import_paths: Directories where imports are permitted.
|
|
147
|
+
allow_symlinks: Whether to allow following symlinks. Default False
|
|
148
|
+
for security - symlinks can be used to escape allowed directories.
|
|
149
|
+
"""
|
|
150
|
+
# Convert and resolve allowed paths to absolute paths
|
|
151
|
+
self._allowed_export_paths: tuple[Path, ...] = tuple(
|
|
152
|
+
Path(p).resolve() for p in allowed_export_paths
|
|
153
|
+
)
|
|
154
|
+
self._allowed_import_paths: tuple[Path, ...] = tuple(
|
|
155
|
+
Path(p).resolve() for p in allowed_import_paths
|
|
156
|
+
)
|
|
157
|
+
self._allow_symlinks = allow_symlinks
|
|
158
|
+
|
|
159
|
+
def validate_export_path(self, path: str | Path) -> Path:
|
|
160
|
+
"""Validate a path for export operations.
|
|
161
|
+
|
|
162
|
+
Performs security checks without requiring the file to exist.
|
|
163
|
+
Parent directories will be created if needed during export.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
path: The path to validate. Can be absolute or relative.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Canonicalized Path object that is safe to use.
|
|
170
|
+
|
|
171
|
+
Raises:
|
|
172
|
+
PathSecurityError: If the path fails any security check.
|
|
173
|
+
ValueError: If the path is empty or invalid.
|
|
174
|
+
"""
|
|
175
|
+
# Basic input validation
|
|
176
|
+
path_str = str(path).strip() if path else ""
|
|
177
|
+
if not path_str:
|
|
178
|
+
raise ValueError("Path cannot be empty")
|
|
179
|
+
|
|
180
|
+
# Check for null bytes
|
|
181
|
+
if "\x00" in path_str:
|
|
182
|
+
raise ValueError("Path cannot contain null bytes")
|
|
183
|
+
|
|
184
|
+
# Step 1: Detect path traversal patterns in raw input
|
|
185
|
+
self._check_traversal_patterns(path_str)
|
|
186
|
+
|
|
187
|
+
# Step 2: Detect UNC paths
|
|
188
|
+
self._check_unc_path(path_str)
|
|
189
|
+
|
|
190
|
+
# Step 3: URL decode and check again (defense in depth)
|
|
191
|
+
decoded = self._url_decode_path(path_str)
|
|
192
|
+
if decoded != path_str:
|
|
193
|
+
self._check_traversal_patterns(decoded)
|
|
194
|
+
|
|
195
|
+
# Step 4: Convert to Path and canonicalize
|
|
196
|
+
path_obj = Path(path_str)
|
|
197
|
+
|
|
198
|
+
# Resolve without strict (file doesn't need to exist for export)
|
|
199
|
+
# We resolve parents to detect traversal attempts
|
|
200
|
+
try:
|
|
201
|
+
# For non-existent paths, resolve what we can
|
|
202
|
+
if path_obj.exists():
|
|
203
|
+
canonical = path_obj.resolve()
|
|
204
|
+
else:
|
|
205
|
+
# Resolve existing parents, keep filename
|
|
206
|
+
parent = path_obj.parent
|
|
207
|
+
while not parent.exists() and parent != parent.parent:
|
|
208
|
+
parent = parent.parent
|
|
209
|
+
if parent.exists():
|
|
210
|
+
resolved_parent = parent.resolve()
|
|
211
|
+
# Build the rest of the path
|
|
212
|
+
if parent != path_obj:
|
|
213
|
+
relative = path_obj.relative_to(parent)
|
|
214
|
+
else:
|
|
215
|
+
relative = Path(path_obj.name)
|
|
216
|
+
canonical = resolved_parent / relative
|
|
217
|
+
else:
|
|
218
|
+
canonical = path_obj.absolute()
|
|
219
|
+
except (OSError, ValueError) as e:
|
|
220
|
+
raise PathSecurityError(
|
|
221
|
+
path=path_str,
|
|
222
|
+
violation_type="path_resolution_failed",
|
|
223
|
+
message=f"Failed to resolve path: {e}",
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Step 5: Check for traversal in canonical path (defense in depth)
|
|
227
|
+
canonical_str = str(canonical)
|
|
228
|
+
if ".." in canonical_str:
|
|
229
|
+
raise PathSecurityError(
|
|
230
|
+
path=path_str,
|
|
231
|
+
violation_type="traversal_attempt",
|
|
232
|
+
message=f"Path contains traversal after canonicalization: {path_str}",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Step 6: Validate extension
|
|
236
|
+
self._validate_extension(canonical)
|
|
237
|
+
|
|
238
|
+
# Step 7: Check symlink (if path exists)
|
|
239
|
+
if canonical.exists() and not self._allow_symlinks:
|
|
240
|
+
self._check_symlink(canonical, path_str)
|
|
241
|
+
|
|
242
|
+
# Step 8: Validate against allowlist
|
|
243
|
+
self._validate_allowlist(canonical, self._allowed_export_paths, path_str)
|
|
244
|
+
|
|
245
|
+
return canonical
|
|
246
|
+
|
|
247
|
+
def validate_import_path(self, path: str | Path, max_size_bytes: int) -> Path:
|
|
248
|
+
"""Validate a path for import operations.
|
|
249
|
+
|
|
250
|
+
Performs all security checks and additionally verifies:
|
|
251
|
+
- File exists
|
|
252
|
+
- File is not a directory
|
|
253
|
+
- File size is within limits
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
path: The path to validate. Can be absolute or relative.
|
|
257
|
+
max_size_bytes: Maximum allowed file size in bytes.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Canonicalized Path object that is safe to use.
|
|
261
|
+
|
|
262
|
+
Raises:
|
|
263
|
+
PathSecurityError: If the path fails any security check.
|
|
264
|
+
FileSizeLimitError: If the file exceeds the size limit.
|
|
265
|
+
ValueError: If the path is empty or invalid.
|
|
266
|
+
"""
|
|
267
|
+
# Basic input validation
|
|
268
|
+
path_str = str(path).strip() if path else ""
|
|
269
|
+
if not path_str:
|
|
270
|
+
raise ValueError("Path cannot be empty")
|
|
271
|
+
|
|
272
|
+
# Check for null bytes
|
|
273
|
+
if "\x00" in path_str:
|
|
274
|
+
raise ValueError("Path cannot contain null bytes")
|
|
275
|
+
|
|
276
|
+
# Step 1: Detect path traversal patterns in raw input
|
|
277
|
+
self._check_traversal_patterns(path_str)
|
|
278
|
+
|
|
279
|
+
# Step 2: Detect UNC paths
|
|
280
|
+
self._check_unc_path(path_str)
|
|
281
|
+
|
|
282
|
+
# Step 3: URL decode and check again
|
|
283
|
+
decoded = self._url_decode_path(path_str)
|
|
284
|
+
if decoded != path_str:
|
|
285
|
+
self._check_traversal_patterns(decoded)
|
|
286
|
+
|
|
287
|
+
# Step 4: Convert to Path
|
|
288
|
+
path_obj = Path(path_str)
|
|
289
|
+
|
|
290
|
+
# Step 5: Check file exists
|
|
291
|
+
if not path_obj.exists():
|
|
292
|
+
raise PathSecurityError(
|
|
293
|
+
path=path_str,
|
|
294
|
+
violation_type="file_not_found",
|
|
295
|
+
message=f"File does not exist: {path_str}",
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Step 6: Check it's a file, not a directory
|
|
299
|
+
if path_obj.is_dir():
|
|
300
|
+
raise PathSecurityError(
|
|
301
|
+
path=path_str,
|
|
302
|
+
violation_type="not_a_file",
|
|
303
|
+
message=f"Path is a directory, not a file: {path_str}",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Step 7: Canonicalize (resolve symlinks unless blocked)
|
|
307
|
+
try:
|
|
308
|
+
canonical = path_obj.resolve(strict=True)
|
|
309
|
+
except (OSError, RuntimeError) as e:
|
|
310
|
+
raise PathSecurityError(
|
|
311
|
+
path=path_str,
|
|
312
|
+
violation_type="path_resolution_failed",
|
|
313
|
+
message=f"Failed to resolve path: {e}",
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Step 8: Check for traversal in canonical path
|
|
317
|
+
canonical_str = str(canonical)
|
|
318
|
+
if ".." in canonical_str:
|
|
319
|
+
raise PathSecurityError(
|
|
320
|
+
path=path_str,
|
|
321
|
+
violation_type="traversal_attempt",
|
|
322
|
+
message=f"Path contains traversal after canonicalization: {path_str}",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Step 9: Validate extension
|
|
326
|
+
self._validate_extension(canonical)
|
|
327
|
+
|
|
328
|
+
# Step 10: Check symlink
|
|
329
|
+
if not self._allow_symlinks:
|
|
330
|
+
self._check_symlink(path_obj, path_str)
|
|
331
|
+
|
|
332
|
+
# Step 11: Validate against allowlist
|
|
333
|
+
self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
|
|
334
|
+
|
|
335
|
+
# Step 12: Check file size
|
|
336
|
+
try:
|
|
337
|
+
file_size = canonical.stat().st_size
|
|
338
|
+
except OSError as e:
|
|
339
|
+
raise PathSecurityError(
|
|
340
|
+
path=path_str,
|
|
341
|
+
violation_type="stat_failed",
|
|
342
|
+
message=f"Failed to get file size: {e}",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if file_size > max_size_bytes:
|
|
346
|
+
raise FileSizeLimitError(
|
|
347
|
+
path=path_str,
|
|
348
|
+
actual_size_bytes=file_size,
|
|
349
|
+
max_size_bytes=max_size_bytes,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
return canonical
|
|
353
|
+
|
|
354
|
+
def validate_and_open_import_file(
|
|
355
|
+
self, path: str | Path, max_size_bytes: int
|
|
356
|
+
) -> tuple[Path, BinaryIO]:
|
|
357
|
+
"""Atomically validate and open a file for import.
|
|
358
|
+
|
|
359
|
+
This method prevents TOCTOU (Time-of-Check-Time-of-Use) race conditions
|
|
360
|
+
by opening the file FIRST, then validating properties on the open file
|
|
361
|
+
descriptor. The caller MUST use the returned file handle for reading.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
path: The path to validate and open.
|
|
365
|
+
max_size_bytes: Maximum allowed file size in bytes.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Tuple of (canonical_path, open_file_handle). The file handle is
|
|
369
|
+
opened in binary read mode. Caller is responsible for closing it.
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
PathSecurityError: If the path fails any security check.
|
|
373
|
+
FileSizeLimitError: If the file exceeds the size limit.
|
|
374
|
+
ValueError: If the path is empty or invalid.
|
|
375
|
+
"""
|
|
376
|
+
# Basic input validation
|
|
377
|
+
path_str = str(path).strip() if path else ""
|
|
378
|
+
if not path_str:
|
|
379
|
+
raise ValueError("Path cannot be empty")
|
|
380
|
+
|
|
381
|
+
if "\x00" in path_str:
|
|
382
|
+
raise ValueError("Path cannot contain null bytes")
|
|
383
|
+
|
|
384
|
+
# Step 1: Detect path traversal patterns in raw input
|
|
385
|
+
self._check_traversal_patterns(path_str)
|
|
386
|
+
|
|
387
|
+
# Step 2: Detect UNC paths
|
|
388
|
+
self._check_unc_path(path_str)
|
|
389
|
+
|
|
390
|
+
# Step 3: URL decode and check again
|
|
391
|
+
decoded = self._url_decode_path(path_str)
|
|
392
|
+
if decoded != path_str:
|
|
393
|
+
self._check_traversal_patterns(decoded)
|
|
394
|
+
|
|
395
|
+
# Step 4: Validate extension BEFORE opening
|
|
396
|
+
path_obj = Path(path_str)
|
|
397
|
+
self._validate_extension(path_obj)
|
|
398
|
+
|
|
399
|
+
# Step 5: Check basic allowlist BEFORE opening (path-based check)
|
|
400
|
+
# We'll re-verify after opening but this catches obvious violations early
|
|
401
|
+
try:
|
|
402
|
+
preliminary_canonical = path_obj.resolve()
|
|
403
|
+
except (OSError, RuntimeError):
|
|
404
|
+
# Can't resolve - will fail when we try to open
|
|
405
|
+
pass
|
|
406
|
+
else:
|
|
407
|
+
# Quick check that we're in allowed territory
|
|
408
|
+
in_allowed = False
|
|
409
|
+
for allowed in self._allowed_import_paths:
|
|
410
|
+
try:
|
|
411
|
+
if preliminary_canonical.is_relative_to(allowed):
|
|
412
|
+
in_allowed = True
|
|
413
|
+
break
|
|
414
|
+
except AttributeError:
|
|
415
|
+
try:
|
|
416
|
+
preliminary_canonical.relative_to(allowed)
|
|
417
|
+
in_allowed = True
|
|
418
|
+
break
|
|
419
|
+
except ValueError:
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
if not in_allowed:
|
|
423
|
+
allowed_str = ", ".join(str(p) for p in self._allowed_import_paths)
|
|
424
|
+
raise PathSecurityError(
|
|
425
|
+
path=path_str,
|
|
426
|
+
violation_type="path_outside_allowlist",
|
|
427
|
+
message=f"Path is not in allowed directories. Allowed: {allowed_str}",
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Step 5.5: Pre-check symlinks BEFORE opening (defense in depth)
|
|
431
|
+
# This catches obvious symlinks before we open the file
|
|
432
|
+
if not self._allow_symlinks:
|
|
433
|
+
# Check the path itself and all parents
|
|
434
|
+
self._check_symlink(path_obj, path_str)
|
|
435
|
+
|
|
436
|
+
# Step 6: ATOMICALLY open the file using low-level os.open
|
|
437
|
+
# This prevents TOCTOU - all subsequent checks use the open descriptor
|
|
438
|
+
# On Unix, use O_NOFOLLOW to prevent opening through symlinks at OS level
|
|
439
|
+
flags = os.O_RDONLY
|
|
440
|
+
if hasattr(os, "O_NOFOLLOW") and not self._allow_symlinks:
|
|
441
|
+
flags |= os.O_NOFOLLOW
|
|
442
|
+
|
|
443
|
+
try:
|
|
444
|
+
fd = os.open(str(path_obj), flags)
|
|
445
|
+
except FileNotFoundError:
|
|
446
|
+
# FileNotFoundError is a subclass of OSError, so catch it first
|
|
447
|
+
raise PathSecurityError(
|
|
448
|
+
path=path_str,
|
|
449
|
+
violation_type="file_not_found",
|
|
450
|
+
message=f"File does not exist: {path_str}",
|
|
451
|
+
)
|
|
452
|
+
except IsADirectoryError:
|
|
453
|
+
raise PathSecurityError(
|
|
454
|
+
path=path_str,
|
|
455
|
+
violation_type="not_a_file",
|
|
456
|
+
message=f"Path is a directory, not a file: {path_str}",
|
|
457
|
+
)
|
|
458
|
+
except PermissionError as e:
|
|
459
|
+
raise PathSecurityError(
|
|
460
|
+
path=path_str,
|
|
461
|
+
violation_type="permission_denied",
|
|
462
|
+
message=f"Permission denied: {e}",
|
|
463
|
+
)
|
|
464
|
+
except OSError as e:
|
|
465
|
+
# O_NOFOLLOW causes ELOOP (or EMLINK on some systems) if path is a symlink
|
|
466
|
+
if e.errno in (errno.ELOOP, getattr(errno, "EMLINK", None)):
|
|
467
|
+
raise PathSecurityError(
|
|
468
|
+
path=path_str,
|
|
469
|
+
violation_type="symlink_not_allowed",
|
|
470
|
+
message=f"Symlinks are not allowed: {path_str}",
|
|
471
|
+
)
|
|
472
|
+
raise PathSecurityError(
|
|
473
|
+
path=path_str,
|
|
474
|
+
violation_type="open_failed",
|
|
475
|
+
message=f"Failed to open file: {e}",
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# From this point, we must close fd on any error
|
|
479
|
+
try:
|
|
480
|
+
# Step 7: Get file stats from the OPEN descriptor (not the path!)
|
|
481
|
+
try:
|
|
482
|
+
fd_stat = os.fstat(fd)
|
|
483
|
+
except OSError as e:
|
|
484
|
+
raise PathSecurityError(
|
|
485
|
+
path=path_str,
|
|
486
|
+
violation_type="stat_failed",
|
|
487
|
+
message=f"Failed to stat file: {e}",
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Step 8: Verify it's a regular file (not directory, device, etc.)
|
|
491
|
+
if not stat.S_ISREG(fd_stat.st_mode):
|
|
492
|
+
raise PathSecurityError(
|
|
493
|
+
path=path_str,
|
|
494
|
+
violation_type="not_a_regular_file",
|
|
495
|
+
message=f"Path is not a regular file: {path_str}",
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
# Step 9: Re-check symlink status after open (detect race conditions)
|
|
499
|
+
# If a symlink appeared between our pre-check and open, detect it here
|
|
500
|
+
if not self._allow_symlinks and path_obj.is_symlink():
|
|
501
|
+
raise PathSecurityError(
|
|
502
|
+
path=path_str,
|
|
503
|
+
violation_type="symlink_race_detected",
|
|
504
|
+
message=f"Symlink detected after open (possible race condition): {path_str}",
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Step 10: Check file size using the open descriptor
|
|
508
|
+
if fd_stat.st_size > max_size_bytes:
|
|
509
|
+
raise FileSizeLimitError(
|
|
510
|
+
path=path_str,
|
|
511
|
+
actual_size_bytes=fd_stat.st_size,
|
|
512
|
+
max_size_bytes=max_size_bytes,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Step 11: Resolve the canonical path for the file we actually opened
|
|
516
|
+
# Use /proc/self/fd on Linux or os.path.realpath
|
|
517
|
+
try:
|
|
518
|
+
canonical = path_obj.resolve(strict=True)
|
|
519
|
+
except (OSError, RuntimeError) as e:
|
|
520
|
+
raise PathSecurityError(
|
|
521
|
+
path=path_str,
|
|
522
|
+
violation_type="path_resolution_failed",
|
|
523
|
+
message=f"Failed to resolve path: {e}",
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Step 12: Final allowlist validation on canonical path
|
|
527
|
+
self._validate_allowlist(canonical, self._allowed_import_paths, path_str)
|
|
528
|
+
|
|
529
|
+
# Step 13: Convert fd to a Python file object
|
|
530
|
+
# The file object now owns the fd and will close it
|
|
531
|
+
file_handle: BinaryIO = os.fdopen(fd, "rb")
|
|
532
|
+
|
|
533
|
+
return canonical, file_handle
|
|
534
|
+
|
|
535
|
+
except Exception:
|
|
536
|
+
# Close fd on any error (before it's converted to file object)
|
|
537
|
+
os.close(fd)
|
|
538
|
+
raise
|
|
539
|
+
|
|
540
|
+
def _check_traversal_patterns(self, path_str: str) -> None:
|
|
541
|
+
"""Check for path traversal patterns in the input string.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
path_str: The path string to check.
|
|
545
|
+
|
|
546
|
+
Raises:
|
|
547
|
+
PathSecurityError: If a traversal pattern is detected.
|
|
548
|
+
"""
|
|
549
|
+
for pattern in PATH_TRAVERSAL_PATTERNS:
|
|
550
|
+
if pattern.search(path_str):
|
|
551
|
+
raise PathSecurityError(
|
|
552
|
+
path=path_str,
|
|
553
|
+
violation_type="traversal_attempt",
|
|
554
|
+
message=f"Path traversal pattern detected: {path_str}",
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
def _check_unc_path(self, path_str: str) -> None:
|
|
558
|
+
"""Check for Windows UNC paths.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
path_str: The path string to check.
|
|
562
|
+
|
|
563
|
+
Raises:
|
|
564
|
+
PathSecurityError: If a UNC path is detected.
|
|
565
|
+
"""
|
|
566
|
+
# Check for Windows UNC (\\server\share)
|
|
567
|
+
if path_str.startswith("\\\\"):
|
|
568
|
+
raise PathSecurityError(
|
|
569
|
+
path=path_str,
|
|
570
|
+
violation_type="unc_path",
|
|
571
|
+
message=f"UNC paths are not allowed: {path_str}",
|
|
572
|
+
)
|
|
573
|
+
# Check for \\?\ prefix (Windows extended path)
|
|
574
|
+
if path_str.startswith("\\\\?\\"):
|
|
575
|
+
raise PathSecurityError(
|
|
576
|
+
path=path_str,
|
|
577
|
+
violation_type="unc_path",
|
|
578
|
+
message=f"Extended UNC paths are not allowed: {path_str}",
|
|
579
|
+
)
|
|
580
|
+
# Check for Unix-style UNC (//server/share)
|
|
581
|
+
if path_str.startswith("//"):
|
|
582
|
+
raise PathSecurityError(
|
|
583
|
+
path=path_str,
|
|
584
|
+
violation_type="unc_path",
|
|
585
|
+
message=f"UNC-style paths are not allowed: {path_str}",
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
def _url_decode_path(self, path_str: str) -> str:
|
|
589
|
+
"""URL decode a path string (multiple passes for double encoding).
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
path_str: The path string to decode.
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
The decoded path string.
|
|
596
|
+
"""
|
|
597
|
+
decoded = path_str
|
|
598
|
+
# Multiple passes to catch double/triple encoding
|
|
599
|
+
for _ in range(MAX_URL_DECODE_ITERATIONS):
|
|
600
|
+
new_decoded = urllib.parse.unquote(decoded)
|
|
601
|
+
if new_decoded == decoded:
|
|
602
|
+
break
|
|
603
|
+
decoded = new_decoded
|
|
604
|
+
return decoded
|
|
605
|
+
|
|
606
|
+
def _validate_extension(self, path: Path) -> None:
|
|
607
|
+
"""Validate that the file has an allowed extension.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
path: The path to validate.
|
|
611
|
+
|
|
612
|
+
Raises:
|
|
613
|
+
PathSecurityError: If the extension is not allowed.
|
|
614
|
+
"""
|
|
615
|
+
# Get extension (lowercase for comparison)
|
|
616
|
+
ext = path.suffix.lower()
|
|
617
|
+
|
|
618
|
+
if ext not in VALID_EXTENSIONS:
|
|
619
|
+
allowed = ", ".join(sorted(VALID_EXTENSIONS))
|
|
620
|
+
raise PathSecurityError(
|
|
621
|
+
path=str(path),
|
|
622
|
+
violation_type="invalid_extension",
|
|
623
|
+
message=f"Invalid file extension '{ext}'. Allowed: {allowed}",
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
def _check_symlink(self, path: Path, original_path_str: str) -> None:
|
|
627
|
+
"""Check if path or any parent is a symlink.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
path: The path to check.
|
|
631
|
+
original_path_str: Original path string for error messages.
|
|
632
|
+
|
|
633
|
+
Raises:
|
|
634
|
+
PathSecurityError: If symlinks are found and not allowed.
|
|
635
|
+
"""
|
|
636
|
+
# Check the path itself
|
|
637
|
+
if path.is_symlink():
|
|
638
|
+
raise PathSecurityError(
|
|
639
|
+
path=original_path_str,
|
|
640
|
+
violation_type="symlink_not_allowed",
|
|
641
|
+
message=f"Symlinks are not allowed: {original_path_str}",
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Check all parents
|
|
645
|
+
current = path
|
|
646
|
+
while current != current.parent:
|
|
647
|
+
current = current.parent
|
|
648
|
+
if current.is_symlink():
|
|
649
|
+
raise PathSecurityError(
|
|
650
|
+
path=original_path_str,
|
|
651
|
+
violation_type="symlink_not_allowed",
|
|
652
|
+
message=f"Path contains symlink in parent directory: {original_path_str}",
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
def _validate_allowlist(
|
|
656
|
+
self,
|
|
657
|
+
canonical_path: Path,
|
|
658
|
+
allowed_paths: tuple[Path, ...],
|
|
659
|
+
original_path_str: str,
|
|
660
|
+
) -> None:
|
|
661
|
+
"""Validate that the canonical path is within allowed directories.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
canonical_path: The canonicalized path to validate.
|
|
665
|
+
allowed_paths: Tuple of allowed directory paths.
|
|
666
|
+
original_path_str: Original path string for error messages.
|
|
667
|
+
|
|
668
|
+
Raises:
|
|
669
|
+
PathSecurityError: If the path is outside all allowed directories.
|
|
670
|
+
"""
|
|
671
|
+
# Check if canonical path is under any allowed directory FIRST
|
|
672
|
+
# If explicitly allowed, skip sensitive directory check
|
|
673
|
+
for allowed in allowed_paths:
|
|
674
|
+
try:
|
|
675
|
+
# Use is_relative_to for Python 3.9+
|
|
676
|
+
if canonical_path.is_relative_to(allowed):
|
|
677
|
+
return # Path is explicitly allowed, skip all other checks
|
|
678
|
+
except AttributeError:
|
|
679
|
+
# Fallback for older Python
|
|
680
|
+
try:
|
|
681
|
+
canonical_path.relative_to(allowed)
|
|
682
|
+
return # Path is explicitly allowed, skip all other checks
|
|
683
|
+
except ValueError:
|
|
684
|
+
continue
|
|
685
|
+
|
|
686
|
+
# Path not in allowlist - now check sensitive directories
|
|
687
|
+
canonical_str = str(canonical_path)
|
|
688
|
+
for sensitive in SENSITIVE_DIRECTORIES:
|
|
689
|
+
if canonical_str.startswith(sensitive):
|
|
690
|
+
raise PathSecurityError(
|
|
691
|
+
path=original_path_str,
|
|
692
|
+
violation_type="sensitive_directory",
|
|
693
|
+
message=f"Access to sensitive directory is blocked: {sensitive}",
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
# Path is not in any allowed directory
|
|
697
|
+
allowed_str = ", ".join(str(p) for p in allowed_paths)
|
|
698
|
+
raise PathSecurityError(
|
|
699
|
+
path=original_path_str,
|
|
700
|
+
violation_type="path_outside_allowlist",
|
|
701
|
+
message=f"Path is not in allowed directories. Allowed: {allowed_str}",
|
|
702
|
+
)
|