taipanstack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- taipanstack/__init__.py +53 -0
- taipanstack/config/__init__.py +25 -0
- taipanstack/config/generators.py +357 -0
- taipanstack/config/models.py +316 -0
- taipanstack/config/version_config.py +227 -0
- taipanstack/core/__init__.py +47 -0
- taipanstack/core/compat.py +329 -0
- taipanstack/core/optimizations.py +392 -0
- taipanstack/core/result.py +199 -0
- taipanstack/security/__init__.py +55 -0
- taipanstack/security/decorators.py +369 -0
- taipanstack/security/guards.py +362 -0
- taipanstack/security/sanitizers.py +321 -0
- taipanstack/security/validators.py +342 -0
- taipanstack/utils/__init__.py +24 -0
- taipanstack/utils/circuit_breaker.py +268 -0
- taipanstack/utils/filesystem.py +417 -0
- taipanstack/utils/logging.py +328 -0
- taipanstack/utils/metrics.py +272 -0
- taipanstack/utils/retry.py +300 -0
- taipanstack/utils/subprocess.py +344 -0
- taipanstack-0.1.0.dist-info/METADATA +350 -0
- taipanstack-0.1.0.dist-info/RECORD +25 -0
- taipanstack-0.1.0.dist-info/WHEEL +4 -0
- taipanstack-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Runtime guards for protection against errors and AI hallucinations.
|
|
3
|
+
|
|
4
|
+
These guards provide runtime protection against common security issues
|
|
5
|
+
and programming errors that can occur from incorrect AI-generated code.
|
|
6
|
+
All guards raise SecurityError on violation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SecurityError(Exception):
|
|
18
|
+
"""Raised when a security guard detects a violation.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
guard_name: Name of the guard that was triggered.
|
|
22
|
+
message: Description of the violation.
|
|
23
|
+
value: The offending value (if safe to log).
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
message: str,
|
|
30
|
+
guard_name: str = "unknown",
|
|
31
|
+
value: str | None = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Initialize SecurityError.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
message: Description of the violation.
|
|
37
|
+
guard_name: Name of the guard that triggered.
|
|
38
|
+
value: The offending value (sanitized).
|
|
39
|
+
|
|
40
|
+
"""
|
|
41
|
+
self.guard_name = guard_name
|
|
42
|
+
self.value = value
|
|
43
|
+
super().__init__(f"[{guard_name}] {message}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def guard_path_traversal(
|
|
47
|
+
path: Path | str,
|
|
48
|
+
base_dir: Path | str | None = None,
|
|
49
|
+
*,
|
|
50
|
+
allow_symlinks: bool = False,
|
|
51
|
+
) -> Path:
|
|
52
|
+
"""Prevent path traversal attacks.
|
|
53
|
+
|
|
54
|
+
Ensures that the given path does not escape the base directory
|
|
55
|
+
using techniques like '..' or symlinks.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
path: The path to validate.
|
|
59
|
+
base_dir: The base directory to constrain to. Defaults to cwd.
|
|
60
|
+
allow_symlinks: Whether to allow symlinks (default: False).
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The resolved, validated path.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
SecurityError: If path traversal is detected.
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
>>> guard_path_traversal("../etc/passwd", Path("/app"))
|
|
70
|
+
SecurityError: [path_traversal] Path escapes base directory
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
path = Path(path) if isinstance(path, str) else path
|
|
74
|
+
base_dir = Path(base_dir).resolve() if base_dir else Path.cwd().resolve()
|
|
75
|
+
|
|
76
|
+
# Check for explicit traversal patterns before resolution
|
|
77
|
+
path_str = str(path)
|
|
78
|
+
traversal_patterns = [
|
|
79
|
+
"..",
|
|
80
|
+
"~",
|
|
81
|
+
r"\.\.",
|
|
82
|
+
"%2e%2e", # URL encoded ..
|
|
83
|
+
"%252e%252e", # Double URL encoded
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
for pattern in traversal_patterns:
|
|
87
|
+
if pattern.lower() in path_str.lower():
|
|
88
|
+
raise SecurityError(
|
|
89
|
+
f"Path traversal pattern detected: {pattern}",
|
|
90
|
+
guard_name="path_traversal",
|
|
91
|
+
value=path_str[:50], # Truncate for safety
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Resolve the path
|
|
95
|
+
try:
|
|
96
|
+
resolved = path.resolve() if path.is_absolute() else (base_dir / path).resolve()
|
|
97
|
+
except (OSError, ValueError) as e:
|
|
98
|
+
raise SecurityError(
|
|
99
|
+
f"Invalid path: {e}",
|
|
100
|
+
guard_name="path_traversal",
|
|
101
|
+
) from e
|
|
102
|
+
|
|
103
|
+
# Check if resolved path is within base_dir
|
|
104
|
+
try:
|
|
105
|
+
resolved.relative_to(base_dir)
|
|
106
|
+
except ValueError as e:
|
|
107
|
+
raise SecurityError(
|
|
108
|
+
f"Path escapes base directory: {resolved} is not under {base_dir}",
|
|
109
|
+
guard_name="path_traversal",
|
|
110
|
+
value=str(resolved)[:100],
|
|
111
|
+
) from e
|
|
112
|
+
|
|
113
|
+
# Check for symlinks if not allowed
|
|
114
|
+
is_existing_symlink = (
|
|
115
|
+
not allow_symlinks and resolved.exists() and resolved.is_symlink()
|
|
116
|
+
)
|
|
117
|
+
if is_existing_symlink:
|
|
118
|
+
raise SecurityError(
|
|
119
|
+
"Symlinks are not allowed",
|
|
120
|
+
guard_name="path_traversal",
|
|
121
|
+
value=str(resolved),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return resolved
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def guard_command_injection(
|
|
128
|
+
command: Sequence[str],
|
|
129
|
+
*,
|
|
130
|
+
allowed_commands: Sequence[str] | None = None,
|
|
131
|
+
) -> list[str]:
|
|
132
|
+
"""Prevent command injection attacks.
|
|
133
|
+
|
|
134
|
+
Validates that command arguments don't contain shell metacharacters
|
|
135
|
+
that could lead to command injection.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
command: The command and arguments as a sequence.
|
|
139
|
+
allowed_commands: Optional whitelist of allowed base commands.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
The validated command as a list.
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
SecurityError: If command injection is detected.
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
>>> guard_command_injection(["echo", "hello; rm -rf /"])
|
|
149
|
+
SecurityError: [command_injection] Dangerous characters detected
|
|
150
|
+
|
|
151
|
+
"""
|
|
152
|
+
if not command:
|
|
153
|
+
raise SecurityError(
|
|
154
|
+
"Empty command is not allowed",
|
|
155
|
+
guard_name="command_injection",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
cmd_list = list(command)
|
|
159
|
+
|
|
160
|
+
# Dangerous shell metacharacters
|
|
161
|
+
dangerous_patterns: list[tuple[str, str]] = [
|
|
162
|
+
(";", "command separator"),
|
|
163
|
+
("|", "pipe"),
|
|
164
|
+
("&", "background/and operator"),
|
|
165
|
+
("$", "variable expansion"),
|
|
166
|
+
("`", "command substitution"),
|
|
167
|
+
("$(", "command substitution"),
|
|
168
|
+
("${", "variable expansion"),
|
|
169
|
+
(">", "redirect"),
|
|
170
|
+
("<", "redirect"),
|
|
171
|
+
(">>", "redirect append"),
|
|
172
|
+
("||", "or operator"),
|
|
173
|
+
("&&", "and operator"),
|
|
174
|
+
("\n", "newline"),
|
|
175
|
+
("\r", "carriage return"),
|
|
176
|
+
("\x00", "null byte"),
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
for arg in cmd_list:
|
|
180
|
+
for pattern, description in dangerous_patterns:
|
|
181
|
+
if pattern in arg:
|
|
182
|
+
raise SecurityError(
|
|
183
|
+
f"Dangerous shell character detected: {description}",
|
|
184
|
+
guard_name="command_injection",
|
|
185
|
+
value=arg[:50],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Check against allowed commands whitelist
|
|
189
|
+
if allowed_commands is not None:
|
|
190
|
+
base_command = cmd_list[0]
|
|
191
|
+
# Get just the command name without path
|
|
192
|
+
command_name = Path(base_command).name
|
|
193
|
+
cmd_not_allowed = (
|
|
194
|
+
command_name not in allowed_commands
|
|
195
|
+
and base_command not in allowed_commands
|
|
196
|
+
)
|
|
197
|
+
if cmd_not_allowed:
|
|
198
|
+
raise SecurityError(
|
|
199
|
+
f"Command not in allowed list: {command_name}",
|
|
200
|
+
guard_name="command_injection",
|
|
201
|
+
value=command_name,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return cmd_list
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def guard_file_extension(
|
|
208
|
+
filename: str | Path,
|
|
209
|
+
*,
|
|
210
|
+
allowed_extensions: Sequence[str] | None = None,
|
|
211
|
+
denied_extensions: Sequence[str] | None = None,
|
|
212
|
+
) -> Path:
|
|
213
|
+
"""Validate file extension against allow/deny lists.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
filename: The filename to check.
|
|
217
|
+
allowed_extensions: Extensions to allow (with or without dot).
|
|
218
|
+
denied_extensions: Extensions to deny (with or without dot).
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
The filename as a Path.
|
|
222
|
+
|
|
223
|
+
Raises:
|
|
224
|
+
SecurityError: If extension is not allowed or is denied.
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
path = Path(filename)
|
|
228
|
+
ext = path.suffix.lower().lstrip(".")
|
|
229
|
+
|
|
230
|
+
# Default dangerous extensions
|
|
231
|
+
default_denied = {
|
|
232
|
+
"exe",
|
|
233
|
+
"dll",
|
|
234
|
+
"so",
|
|
235
|
+
"dylib", # Executables
|
|
236
|
+
"sh",
|
|
237
|
+
"bash",
|
|
238
|
+
"zsh",
|
|
239
|
+
"ps1",
|
|
240
|
+
"bat",
|
|
241
|
+
"cmd", # Scripts
|
|
242
|
+
"php",
|
|
243
|
+
"jsp",
|
|
244
|
+
"asp",
|
|
245
|
+
"aspx", # Server-side scripts
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# Normalize extension lists
|
|
249
|
+
def normalize_ext(e: str) -> str:
|
|
250
|
+
return e.lower().lstrip(".")
|
|
251
|
+
|
|
252
|
+
if denied_extensions is not None:
|
|
253
|
+
denied = {normalize_ext(e) for e in denied_extensions}
|
|
254
|
+
else:
|
|
255
|
+
denied = default_denied
|
|
256
|
+
|
|
257
|
+
if ext in denied:
|
|
258
|
+
raise SecurityError(
|
|
259
|
+
f"File extension '{ext}' is not allowed",
|
|
260
|
+
guard_name="file_extension",
|
|
261
|
+
value=str(path.name),
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if allowed_extensions is not None:
|
|
265
|
+
allowed = {normalize_ext(e) for e in allowed_extensions}
|
|
266
|
+
if ext not in allowed:
|
|
267
|
+
raise SecurityError(
|
|
268
|
+
f"File extension '{ext}' is not in allowed list",
|
|
269
|
+
guard_name="file_extension",
|
|
270
|
+
value=str(path.name),
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return path
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def guard_env_variable(
|
|
277
|
+
name: str,
|
|
278
|
+
*,
|
|
279
|
+
allowed_names: Sequence[str] | None = None,
|
|
280
|
+
denied_names: Sequence[str] | None = None,
|
|
281
|
+
) -> str:
|
|
282
|
+
"""Guard against accessing sensitive environment variables.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
name: The environment variable name.
|
|
286
|
+
allowed_names: Variable names to allow.
|
|
287
|
+
denied_names: Variable names to deny.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
The environment variable value if safe.
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
SecurityError: If variable access is not allowed.
|
|
294
|
+
|
|
295
|
+
"""
|
|
296
|
+
# Default sensitive variables
|
|
297
|
+
default_denied = {
|
|
298
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
299
|
+
"AWS_SESSION_TOKEN",
|
|
300
|
+
"GITHUB_TOKEN",
|
|
301
|
+
"GH_TOKEN",
|
|
302
|
+
"GITLAB_TOKEN",
|
|
303
|
+
"DATABASE_URL",
|
|
304
|
+
"DB_PASSWORD",
|
|
305
|
+
"PASSWORD",
|
|
306
|
+
"SECRET_KEY",
|
|
307
|
+
"PRIVATE_KEY",
|
|
308
|
+
"API_KEY",
|
|
309
|
+
"API_SECRET",
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
name_upper = name.upper()
|
|
313
|
+
|
|
314
|
+
if denied_names is not None:
|
|
315
|
+
denied = {n.upper() for n in denied_names}
|
|
316
|
+
else:
|
|
317
|
+
denied = default_denied
|
|
318
|
+
|
|
319
|
+
# Check against patterns
|
|
320
|
+
sensitive_patterns = [
|
|
321
|
+
r".*SECRET.*",
|
|
322
|
+
r".*PASSWORD.*",
|
|
323
|
+
r".*TOKEN.*",
|
|
324
|
+
r".*PRIVATE.*KEY.*",
|
|
325
|
+
r".*API.*KEY.*",
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
if name_upper in denied:
|
|
329
|
+
raise SecurityError(
|
|
330
|
+
f"Access to sensitive variable '{name}' is denied",
|
|
331
|
+
guard_name="env_variable",
|
|
332
|
+
value=name,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
for pattern in sensitive_patterns:
|
|
336
|
+
if re.match(pattern, name_upper):
|
|
337
|
+
# Only block if not explicitly allowed
|
|
338
|
+
if allowed_names is not None:
|
|
339
|
+
allowed = {n.upper() for n in allowed_names}
|
|
340
|
+
if name_upper not in allowed:
|
|
341
|
+
raise SecurityError(
|
|
342
|
+
f"Access to potentially sensitive variable '{name}' is denied",
|
|
343
|
+
guard_name="env_variable",
|
|
344
|
+
value=name,
|
|
345
|
+
)
|
|
346
|
+
else:
|
|
347
|
+
raise SecurityError(
|
|
348
|
+
f"Access to potentially sensitive variable '{name}' is denied",
|
|
349
|
+
guard_name="env_variable",
|
|
350
|
+
value=name,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Get the variable
|
|
354
|
+
value = os.environ.get(name)
|
|
355
|
+
if value is None:
|
|
356
|
+
raise SecurityError(
|
|
357
|
+
f"Environment variable '{name}' is not set",
|
|
358
|
+
guard_name="env_variable",
|
|
359
|
+
value=name,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return value
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Input sanitizers for cleaning untrusted data.
|
|
3
|
+
|
|
4
|
+
Provides functions to sanitize strings, filenames, and paths
|
|
5
|
+
to remove potentially dangerous characters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import unicodedata
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# Constants to avoid magic values (PLR2004)
|
|
15
|
+
MAX_SQL_IDENTIFIER_LENGTH = 128
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sanitize_string(
|
|
19
|
+
value: str,
|
|
20
|
+
*,
|
|
21
|
+
max_length: int | None = None,
|
|
22
|
+
allow_html: bool = False,
|
|
23
|
+
allow_unicode: bool = True,
|
|
24
|
+
strip_whitespace: bool = True,
|
|
25
|
+
) -> str:
|
|
26
|
+
"""Sanitize a string by removing dangerous characters.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
value: The string to sanitize.
|
|
30
|
+
max_length: Maximum length to truncate to.
|
|
31
|
+
allow_html: Whether to keep HTML tags (default: False).
|
|
32
|
+
allow_unicode: Whether to keep non-ASCII characters.
|
|
33
|
+
strip_whitespace: Whether to strip leading/trailing whitespace.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The sanitized string.
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
>>> sanitize_string("<script>alert('xss')</script>Hello")
|
|
40
|
+
"scriptalert('xss')/scriptHello"
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
if not value:
|
|
44
|
+
return ""
|
|
45
|
+
|
|
46
|
+
result = value
|
|
47
|
+
|
|
48
|
+
# Strip whitespace first
|
|
49
|
+
if strip_whitespace:
|
|
50
|
+
result = result.strip()
|
|
51
|
+
|
|
52
|
+
# Remove null bytes and control characters
|
|
53
|
+
result = result.replace("\x00", "")
|
|
54
|
+
result = "".join(
|
|
55
|
+
c for c in result if unicodedata.category(c) != "Cc" or c in "\n\r\t"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Handle HTML
|
|
59
|
+
if not allow_html:
|
|
60
|
+
# Remove HTML tags
|
|
61
|
+
result = re.sub(r"<[^>]+>", "", result)
|
|
62
|
+
# Escape HTML entities
|
|
63
|
+
result = result.replace("&", "&")
|
|
64
|
+
result = result.replace("<", "<")
|
|
65
|
+
result = result.replace(">", ">")
|
|
66
|
+
|
|
67
|
+
# Handle unicode
|
|
68
|
+
if not allow_unicode:
|
|
69
|
+
result = result.encode("ascii", errors="ignore").decode("ascii")
|
|
70
|
+
|
|
71
|
+
# Truncate if needed
|
|
72
|
+
if max_length is not None and len(result) > max_length:
|
|
73
|
+
result = result[:max_length]
|
|
74
|
+
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def sanitize_filename(
|
|
79
|
+
filename: str,
|
|
80
|
+
*,
|
|
81
|
+
max_length: int = 255,
|
|
82
|
+
replacement: str = "_",
|
|
83
|
+
preserve_extension: bool = True,
|
|
84
|
+
) -> str:
|
|
85
|
+
"""Sanitize a filename to be safe for filesystem use.
|
|
86
|
+
|
|
87
|
+
Removes or replaces characters that are:
|
|
88
|
+
- Not allowed in filenames on various OSes
|
|
89
|
+
- Potentially dangerous (path separators, etc.)
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
filename: The filename to sanitize.
|
|
93
|
+
max_length: Maximum length for the filename.
|
|
94
|
+
replacement: Character to replace invalid chars with.
|
|
95
|
+
preserve_extension: Keep original extension.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
The sanitized filename.
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
>>> sanitize_filename("my/../file<>:name.txt")
|
|
102
|
+
'my_file_name.txt'
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
if not filename:
|
|
106
|
+
return "unnamed"
|
|
107
|
+
|
|
108
|
+
# Get parts
|
|
109
|
+
original_path = Path(filename)
|
|
110
|
+
stem = original_path.stem
|
|
111
|
+
suffix = original_path.suffix if preserve_extension else ""
|
|
112
|
+
|
|
113
|
+
# Characters not allowed in filenames (Windows + Unix)
|
|
114
|
+
invalid_chars = r'[<>:"/\\|?*\x00-\x1f]'
|
|
115
|
+
|
|
116
|
+
# Remove invalid characters
|
|
117
|
+
safe_stem = re.sub(invalid_chars, replacement, stem)
|
|
118
|
+
|
|
119
|
+
# Remove leading/trailing dots and spaces (Windows issues)
|
|
120
|
+
safe_stem = safe_stem.strip(". ")
|
|
121
|
+
|
|
122
|
+
# Remove path separators that might have snuck through
|
|
123
|
+
safe_stem = safe_stem.replace("/", replacement)
|
|
124
|
+
safe_stem = safe_stem.replace("\\", replacement)
|
|
125
|
+
|
|
126
|
+
# Collapse multiple replacement chars
|
|
127
|
+
if replacement:
|
|
128
|
+
safe_stem = re.sub(f"{re.escape(replacement)}+", replacement, safe_stem)
|
|
129
|
+
safe_stem = safe_stem.strip(replacement)
|
|
130
|
+
|
|
131
|
+
# Handle reserved names (Windows)
|
|
132
|
+
reserved_names = {
|
|
133
|
+
"CON",
|
|
134
|
+
"PRN",
|
|
135
|
+
"AUX",
|
|
136
|
+
"NUL",
|
|
137
|
+
"COM1",
|
|
138
|
+
"COM2",
|
|
139
|
+
"COM3",
|
|
140
|
+
"COM4",
|
|
141
|
+
"COM5",
|
|
142
|
+
"COM6",
|
|
143
|
+
"COM7",
|
|
144
|
+
"COM8",
|
|
145
|
+
"COM9",
|
|
146
|
+
"LPT1",
|
|
147
|
+
"LPT2",
|
|
148
|
+
"LPT3",
|
|
149
|
+
"LPT4",
|
|
150
|
+
"LPT5",
|
|
151
|
+
"LPT6",
|
|
152
|
+
"LPT7",
|
|
153
|
+
"LPT8",
|
|
154
|
+
"LPT9",
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if safe_stem.upper() in reserved_names:
|
|
158
|
+
safe_stem = f"{replacement}{safe_stem}"
|
|
159
|
+
|
|
160
|
+
# Handle empty result
|
|
161
|
+
if not safe_stem:
|
|
162
|
+
safe_stem = "unnamed"
|
|
163
|
+
|
|
164
|
+
# Construct result
|
|
165
|
+
result = f"{safe_stem}{suffix}"
|
|
166
|
+
|
|
167
|
+
# Truncate if needed (keeping extension)
|
|
168
|
+
if len(result) > max_length:
|
|
169
|
+
available = max_length - len(suffix)
|
|
170
|
+
if available > 0:
|
|
171
|
+
safe_stem = safe_stem[:available]
|
|
172
|
+
result = f"{safe_stem}{suffix}"
|
|
173
|
+
else:
|
|
174
|
+
result = result[:max_length]
|
|
175
|
+
|
|
176
|
+
return result
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def sanitize_path(
|
|
180
|
+
path: str | Path,
|
|
181
|
+
*,
|
|
182
|
+
base_dir: Path | None = None,
|
|
183
|
+
max_depth: int | None = 10,
|
|
184
|
+
resolve: bool = False,
|
|
185
|
+
) -> Path:
|
|
186
|
+
"""Sanitize a path to prevent traversal and normalize it.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
path: The path to sanitize.
|
|
190
|
+
base_dir: Optional base directory to constrain to.
|
|
191
|
+
max_depth: Maximum directory depth allowed.
|
|
192
|
+
resolve: Whether to resolve the path (requires it to exist).
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The sanitized Path object.
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
ValueError: If path is invalid or too deep.
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
if isinstance(path, str):
|
|
202
|
+
path = Path(path)
|
|
203
|
+
|
|
204
|
+
# Remove any null bytes
|
|
205
|
+
path_str = str(path).replace("\x00", "")
|
|
206
|
+
|
|
207
|
+
# Normalize the path
|
|
208
|
+
path = Path(path_str)
|
|
209
|
+
|
|
210
|
+
# Remove any .. or . components manually
|
|
211
|
+
parts: list[str] = []
|
|
212
|
+
for part in path.parts:
|
|
213
|
+
if part == "..":
|
|
214
|
+
if parts and parts[-1] != "..":
|
|
215
|
+
parts.pop()
|
|
216
|
+
# Skip the .. entirely if at root
|
|
217
|
+
elif part != ".":
|
|
218
|
+
# Sanitize each component
|
|
219
|
+
safe_part = sanitize_filename(part, preserve_extension=True)
|
|
220
|
+
if safe_part: # Skip empty parts
|
|
221
|
+
parts.append(safe_part)
|
|
222
|
+
|
|
223
|
+
# Reconstruct path
|
|
224
|
+
if path.is_absolute():
|
|
225
|
+
sanitized = Path("/").joinpath(*parts) if parts else Path("/")
|
|
226
|
+
else:
|
|
227
|
+
sanitized = Path().joinpath(*parts) if parts else Path()
|
|
228
|
+
|
|
229
|
+
# Check depth (skip if max_depth is None)
|
|
230
|
+
depth = len(sanitized.parts)
|
|
231
|
+
if max_depth is not None and depth > max_depth:
|
|
232
|
+
msg = f"Path depth {depth} exceeds maximum of {max_depth}"
|
|
233
|
+
raise ValueError(msg)
|
|
234
|
+
|
|
235
|
+
# Constrain to base_dir if provided
|
|
236
|
+
if base_dir is not None:
|
|
237
|
+
base = Path(base_dir).resolve()
|
|
238
|
+
if resolve:
|
|
239
|
+
try:
|
|
240
|
+
sanitized = sanitized.resolve()
|
|
241
|
+
except (OSError, RuntimeError) as e:
|
|
242
|
+
msg = f"Cannot resolve path: {e}"
|
|
243
|
+
raise ValueError(msg) from e
|
|
244
|
+
# Make absolute relative to base
|
|
245
|
+
elif not sanitized.is_absolute():
|
|
246
|
+
sanitized = base / sanitized
|
|
247
|
+
|
|
248
|
+
return sanitized
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def sanitize_env_value(
|
|
252
|
+
value: str,
|
|
253
|
+
*,
|
|
254
|
+
max_length: int = 4096,
|
|
255
|
+
allow_multiline: bool = False,
|
|
256
|
+
) -> str:
|
|
257
|
+
"""Sanitize a value for use as an environment variable.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
value: The value to sanitize.
|
|
261
|
+
max_length: Maximum length allowed.
|
|
262
|
+
allow_multiline: Whether to allow newlines.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
The sanitized value.
|
|
266
|
+
|
|
267
|
+
"""
|
|
268
|
+
if not value:
|
|
269
|
+
return ""
|
|
270
|
+
|
|
271
|
+
result = value
|
|
272
|
+
|
|
273
|
+
# Remove null bytes
|
|
274
|
+
result = result.replace("\x00", "")
|
|
275
|
+
|
|
276
|
+
# Handle newlines
|
|
277
|
+
if not allow_multiline:
|
|
278
|
+
result = result.replace("\n", " ").replace("\r", " ")
|
|
279
|
+
|
|
280
|
+
# Truncate
|
|
281
|
+
if len(result) > max_length:
|
|
282
|
+
result = result[:max_length]
|
|
283
|
+
|
|
284
|
+
return result
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def sanitize_sql_identifier(identifier: str) -> str:
|
|
288
|
+
"""Sanitize a SQL identifier (table/column name).
|
|
289
|
+
|
|
290
|
+
Note: This is NOT for SQL values - use parameterized queries for those!
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
identifier: The identifier to sanitize.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
The sanitized identifier.
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
ValueError: If identifier is empty or too long.
|
|
300
|
+
|
|
301
|
+
"""
|
|
302
|
+
if not identifier:
|
|
303
|
+
msg = "SQL identifier cannot be empty"
|
|
304
|
+
raise ValueError(msg)
|
|
305
|
+
|
|
306
|
+
# Only allow alphanumeric and underscore
|
|
307
|
+
result = re.sub(r"[^a-zA-Z0-9_]", "", identifier)
|
|
308
|
+
|
|
309
|
+
# Must start with letter or underscore
|
|
310
|
+
if result and not (result[0].isalpha() or result[0] == "_"):
|
|
311
|
+
result = f"_{result}"
|
|
312
|
+
|
|
313
|
+
# Check length (most DBs limit to 128 chars)
|
|
314
|
+
if len(result) > MAX_SQL_IDENTIFIER_LENGTH:
|
|
315
|
+
result = result[:MAX_SQL_IDENTIFIER_LENGTH]
|
|
316
|
+
|
|
317
|
+
if not result:
|
|
318
|
+
msg = "SQL identifier contains no valid characters"
|
|
319
|
+
raise ValueError(msg)
|
|
320
|
+
|
|
321
|
+
return result
|