splurge-dsv 2025.1.0__py3-none-any.whl → 2025.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splurge_dsv/__init__.py +84 -0
- splurge_dsv/__main__.py +15 -0
- splurge_dsv/cli.py +158 -0
- splurge_dsv/dsv_helper.py +29 -46
- splurge_dsv/exceptions.py +22 -9
- splurge_dsv/path_validator.py +102 -79
- splurge_dsv/resource_manager.py +77 -138
- splurge_dsv/string_tokenizer.py +5 -24
- splurge_dsv/text_file_helper.py +42 -64
- {splurge_dsv-2025.1.0.dist-info → splurge_dsv-2025.1.2.dist-info}/METADATA +47 -1
- splurge_dsv-2025.1.2.dist-info/RECORD +14 -0
- splurge_dsv-2025.1.0.dist-info/RECORD +0 -13
- {splurge_dsv-2025.1.0.dist-info → splurge_dsv-2025.1.2.dist-info}/WHEEL +0 -0
- {splurge_dsv-2025.1.0.dist-info → splurge_dsv-2025.1.2.dist-info}/licenses/LICENSE +0 -0
- {splurge_dsv-2025.1.0.dist-info → splurge_dsv-2025.1.2.dist-info}/top_level.txt +0 -0
splurge_dsv/path_validator.py
CHANGED
@@ -11,16 +11,13 @@ Please preserve this header and all related material when sharing!
|
|
11
11
|
This module is licensed under the MIT License.
|
12
12
|
"""
|
13
13
|
|
14
|
+
# Standard library imports
|
14
15
|
import os
|
15
16
|
import re
|
16
17
|
from pathlib import Path
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
SplurgeFileNotFoundError,
|
21
|
-
SplurgeFilePermissionError
|
22
|
-
)
|
23
|
-
|
19
|
+
# Local imports
|
20
|
+
from splurge_dsv.exceptions import SplurgeFileNotFoundError, SplurgeFilePermissionError, SplurgePathValidationError
|
24
21
|
|
25
22
|
# Module-level constants for path validation
|
26
23
|
_MAX_PATH_LENGTH = 4096 # Maximum path length for most filesystems
|
@@ -30,29 +27,62 @@ _DEFAULT_FILENAME = "unnamed_file" # Default filename when sanitization results
|
|
30
27
|
class PathValidator:
|
31
28
|
"""
|
32
29
|
Utility class for validating file paths securely.
|
33
|
-
|
30
|
+
|
34
31
|
This class provides methods to validate file paths and prevent
|
35
32
|
path traversal attacks and other security vulnerabilities.
|
36
33
|
"""
|
37
|
-
|
34
|
+
|
38
35
|
# Private constants for path validation
|
39
36
|
_PATH_TRAVERSAL_PATTERNS = [
|
40
|
-
r
|
41
|
-
r
|
42
|
-
r
|
43
|
-
r
|
37
|
+
r"\.\.", # Directory traversal
|
38
|
+
r"//+", # Multiple forward slashes (including //)
|
39
|
+
r"\\{2,}", # Two or more consecutive backslashes (not normal Windows paths)
|
40
|
+
r"~", # Home directory expansion
|
44
41
|
]
|
45
|
-
|
42
|
+
|
46
43
|
_DANGEROUS_CHARS = [
|
47
|
-
|
48
|
-
|
49
|
-
'
|
50
|
-
|
51
|
-
|
44
|
+
"<",
|
45
|
+
">",
|
46
|
+
'"',
|
47
|
+
"|",
|
48
|
+
"?",
|
49
|
+
"*", # Windows reserved characters (excluding ':' for drive letters)
|
50
|
+
"\x00",
|
51
|
+
"\x01",
|
52
|
+
"\x02",
|
53
|
+
"\x03",
|
54
|
+
"\x04",
|
55
|
+
"\x05",
|
56
|
+
"\x06",
|
57
|
+
"\x07", # Control characters
|
58
|
+
"\x08",
|
59
|
+
"\x09",
|
60
|
+
"\x0a",
|
61
|
+
"\x0b",
|
62
|
+
"\x0c",
|
63
|
+
"\x0d",
|
64
|
+
"\x0e",
|
65
|
+
"\x0f",
|
66
|
+
"\x10",
|
67
|
+
"\x11",
|
68
|
+
"\x12",
|
69
|
+
"\x13",
|
70
|
+
"\x14",
|
71
|
+
"\x15",
|
72
|
+
"\x16",
|
73
|
+
"\x17",
|
74
|
+
"\x18",
|
75
|
+
"\x19",
|
76
|
+
"\x1a",
|
77
|
+
"\x1b",
|
78
|
+
"\x1c",
|
79
|
+
"\x1d",
|
80
|
+
"\x1e",
|
81
|
+
"\x1f",
|
52
82
|
]
|
53
|
-
|
83
|
+
|
54
84
|
MAX_PATH_LENGTH = _MAX_PATH_LENGTH
|
55
|
-
|
85
|
+
|
56
86
|
@classmethod
|
57
87
|
def validate_path(
|
58
88
|
cls,
|
@@ -62,11 +92,11 @@ class PathValidator:
|
|
62
92
|
must_be_file: bool = False,
|
63
93
|
must_be_readable: bool = False,
|
64
94
|
allow_relative: bool = True,
|
65
|
-
base_directory: str | Path | None = None
|
95
|
+
base_directory: str | Path | None = None,
|
66
96
|
) -> Path:
|
67
97
|
"""
|
68
98
|
Validate a file path for security and correctness.
|
69
|
-
|
99
|
+
|
70
100
|
Args:
|
71
101
|
file_path: Path to validate
|
72
102
|
must_exist: Whether the file must exist
|
@@ -74,10 +104,10 @@ class PathValidator:
|
|
74
104
|
must_be_readable: Whether the file must be readable
|
75
105
|
allow_relative: Whether to allow relative paths
|
76
106
|
base_directory: Base directory for relative path resolution
|
77
|
-
|
107
|
+
|
78
108
|
Returns:
|
79
109
|
Normalized Path object
|
80
|
-
|
110
|
+
|
81
111
|
Raises:
|
82
112
|
SplurgePathValidationError: If path validation fails
|
83
113
|
SplurgeFileNotFoundError: If file doesn't exist when required
|
@@ -85,26 +115,25 @@ class PathValidator:
|
|
85
115
|
"""
|
86
116
|
# Convert to Path object
|
87
117
|
path = Path(file_path) if isinstance(file_path, str) else file_path
|
88
|
-
|
118
|
+
|
89
119
|
# Get the original string for validation (before Path normalization)
|
90
120
|
path_str = str(file_path) if isinstance(file_path, str) else str(path)
|
91
|
-
|
121
|
+
|
92
122
|
# Check for dangerous characters
|
93
123
|
cls._check_dangerous_characters(path_str)
|
94
|
-
|
124
|
+
|
95
125
|
# Check for path traversal patterns
|
96
126
|
cls._check_path_traversal(path_str)
|
97
|
-
|
127
|
+
|
98
128
|
# Check path length
|
99
129
|
cls._check_path_length(path_str)
|
100
|
-
|
130
|
+
|
101
131
|
# Handle relative paths
|
102
132
|
if not path.is_absolute() and not allow_relative:
|
103
133
|
raise SplurgePathValidationError(
|
104
|
-
f"Relative paths are not allowed: {path}",
|
105
|
-
details="Set allow_relative=True to allow relative paths"
|
134
|
+
f"Relative paths are not allowed: {path}", details="Set allow_relative=True to allow relative paths"
|
106
135
|
)
|
107
|
-
|
136
|
+
|
108
137
|
# Resolve path (handles symlinks and normalizes)
|
109
138
|
try:
|
110
139
|
if base_directory:
|
@@ -113,68 +142,63 @@ class PathValidator:
|
|
113
142
|
resolved_path = (base_path / path).resolve()
|
114
143
|
else:
|
115
144
|
resolved_path = path.resolve()
|
116
|
-
|
145
|
+
|
117
146
|
# Ensure resolved path is within base directory
|
118
147
|
try:
|
119
148
|
resolved_path.relative_to(base_path)
|
120
149
|
except ValueError:
|
121
150
|
raise SplurgePathValidationError(
|
122
151
|
f"Path {path} resolves outside base directory {base_directory}",
|
123
|
-
details="Path traversal detected"
|
124
|
-
)
|
152
|
+
details="Path traversal detected",
|
153
|
+
) from None
|
125
154
|
else:
|
126
155
|
resolved_path = path.resolve()
|
127
156
|
except (OSError, RuntimeError) as e:
|
128
157
|
raise SplurgePathValidationError(
|
129
|
-
f"Failed to resolve path {path}: {e}",
|
130
|
-
|
131
|
-
|
132
|
-
|
158
|
+
f"Failed to resolve path {path}: {e}", details="Check if path contains invalid characters or symlinks"
|
159
|
+
) from e
|
160
|
+
|
133
161
|
# Check if file exists
|
134
162
|
if must_exist and not resolved_path.exists():
|
135
163
|
raise SplurgeFileNotFoundError(
|
136
|
-
f"File does not exist: {resolved_path}",
|
137
|
-
details="Set must_exist=False to allow non-existent files"
|
164
|
+
f"File does not exist: {resolved_path}", details="Set must_exist=False to allow non-existent files"
|
138
165
|
)
|
139
|
-
|
166
|
+
|
140
167
|
# Check if it's a file (not directory)
|
141
168
|
if must_be_file and resolved_path.exists() and not resolved_path.is_file():
|
142
169
|
raise SplurgePathValidationError(
|
143
|
-
f"Path is not a file: {resolved_path}",
|
144
|
-
details="Path exists but is not a regular file"
|
170
|
+
f"Path is not a file: {resolved_path}", details="Path exists but is not a regular file"
|
145
171
|
)
|
146
|
-
|
172
|
+
|
147
173
|
# Check if file is readable
|
148
174
|
if must_be_readable:
|
149
175
|
if not resolved_path.exists():
|
150
176
|
raise SplurgeFileNotFoundError(
|
151
177
|
f"Cannot check readability of non-existent file: {resolved_path}",
|
152
|
-
details="File must exist to check readability"
|
178
|
+
details="File must exist to check readability",
|
153
179
|
)
|
154
|
-
|
180
|
+
|
155
181
|
if not os.access(resolved_path, os.R_OK):
|
156
182
|
raise SplurgeFilePermissionError(
|
157
|
-
f"File is not readable: {resolved_path}",
|
158
|
-
details="Check file permissions"
|
183
|
+
f"File is not readable: {resolved_path}", details="Check file permissions"
|
159
184
|
)
|
160
|
-
|
185
|
+
|
161
186
|
return resolved_path
|
162
|
-
|
187
|
+
|
163
188
|
@classmethod
|
164
189
|
def _is_valid_windows_drive_pattern(cls, path_str: str) -> bool:
|
165
190
|
"""
|
166
191
|
Check if a path string contains a valid Windows drive letter pattern.
|
167
|
-
|
192
|
+
|
168
193
|
Args:
|
169
194
|
path_str: Path string to validate
|
170
|
-
|
195
|
+
|
171
196
|
Returns:
|
172
197
|
True if the path contains a valid Windows drive letter pattern,
|
173
198
|
False otherwise
|
174
199
|
"""
|
175
200
|
# Must be C: at the end of the string, or C:\ (or C:/) followed by path
|
176
|
-
return
|
177
|
-
re.match(r'^[A-Za-z]:[\\/]', path_str))
|
201
|
+
return re.match(r"^[A-Za-z]:$", path_str) or re.match(r"^[A-Za-z]:[\\/]", path_str)
|
178
202
|
|
179
203
|
@classmethod
|
180
204
|
def _check_dangerous_characters(cls, path_str: str) -> None:
|
@@ -184,74 +208,73 @@ class PathValidator:
|
|
184
208
|
if char in path_str:
|
185
209
|
raise SplurgePathValidationError(
|
186
210
|
f"Path contains dangerous character: {repr(char)}",
|
187
|
-
details=f"Character at position {path_str.find(char)}"
|
211
|
+
details=f"Character at position {path_str.find(char)}",
|
188
212
|
)
|
189
|
-
|
213
|
+
|
190
214
|
# Special handling for colons - only allow them in Windows drive letters (e.g., C:)
|
191
|
-
if
|
215
|
+
if ":" in path_str:
|
192
216
|
if not cls._is_valid_windows_drive_pattern(path_str):
|
193
217
|
raise SplurgePathValidationError(
|
194
218
|
"Path contains colon in invalid position",
|
195
|
-
details="Colons are only allowed in Windows drive letters (e.g., C: or C:\\)"
|
219
|
+
details="Colons are only allowed in Windows drive letters (e.g., C: or C:\\)",
|
196
220
|
)
|
197
|
-
|
221
|
+
|
198
222
|
@classmethod
|
199
223
|
def _check_path_traversal(cls, path_str: str) -> None:
|
200
224
|
"""Check for path traversal patterns."""
|
201
225
|
for pattern in cls._PATH_TRAVERSAL_PATTERNS:
|
202
226
|
if re.search(pattern, path_str):
|
203
227
|
raise SplurgePathValidationError(
|
204
|
-
f"Path contains traversal pattern: {pattern}",
|
205
|
-
details="Path traversal attacks are not allowed"
|
228
|
+
f"Path contains traversal pattern: {pattern}", details="Path traversal attacks are not allowed"
|
206
229
|
)
|
207
|
-
|
230
|
+
|
208
231
|
@classmethod
|
209
232
|
def _check_path_length(cls, path_str: str) -> None:
|
210
233
|
"""Check if path length is within acceptable limits."""
|
211
234
|
if len(path_str) > cls.MAX_PATH_LENGTH:
|
212
235
|
raise SplurgePathValidationError(
|
213
236
|
f"Path is too long: {len(path_str)} characters",
|
214
|
-
details=f"Maximum allowed length is {cls.MAX_PATH_LENGTH} characters"
|
237
|
+
details=f"Maximum allowed length is {cls.MAX_PATH_LENGTH} characters",
|
215
238
|
)
|
216
|
-
|
239
|
+
|
217
240
|
@classmethod
|
218
241
|
def sanitize_filename(cls, filename: str) -> str:
|
219
242
|
"""
|
220
243
|
Sanitize a filename by removing dangerous characters.
|
221
|
-
|
244
|
+
|
222
245
|
Args:
|
223
246
|
filename: Original filename
|
224
|
-
|
247
|
+
|
225
248
|
Returns:
|
226
249
|
Sanitized filename
|
227
250
|
"""
|
228
251
|
# Remove or replace dangerous characters
|
229
252
|
sanitized = filename
|
230
|
-
|
253
|
+
|
231
254
|
# Replace Windows reserved characters
|
232
|
-
for char in [
|
233
|
-
sanitized = sanitized.replace(char,
|
234
|
-
|
255
|
+
for char in ["<", ">", ":", '"', "|", "?", "*"]:
|
256
|
+
sanitized = sanitized.replace(char, "_")
|
257
|
+
|
235
258
|
# Remove control characters
|
236
|
-
sanitized =
|
237
|
-
|
259
|
+
sanitized = "".join(char for char in sanitized if ord(char) >= 32)
|
260
|
+
|
238
261
|
# Remove leading/trailing spaces and dots
|
239
|
-
sanitized = sanitized.strip(
|
240
|
-
|
262
|
+
sanitized = sanitized.strip(" .")
|
263
|
+
|
241
264
|
# Ensure filename is not empty
|
242
265
|
if not sanitized:
|
243
266
|
sanitized = _DEFAULT_FILENAME
|
244
|
-
|
267
|
+
|
245
268
|
return sanitized
|
246
|
-
|
269
|
+
|
247
270
|
@classmethod
|
248
271
|
def is_safe_path(cls, file_path: str | Path) -> bool:
|
249
272
|
"""
|
250
273
|
Check if a path is safe without raising exceptions.
|
251
|
-
|
274
|
+
|
252
275
|
Args:
|
253
276
|
file_path: Path to check
|
254
|
-
|
277
|
+
|
255
278
|
Returns:
|
256
279
|
True if path is safe, False otherwise
|
257
280
|
"""
|