taipanstack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ """
2
+ Runtime guards for protection against errors and AI hallucinations.
3
+
4
+ These guards provide runtime protection against common security issues
5
+ and programming errors that can occur from incorrect AI-generated code.
6
+ All guards raise SecurityError on violation.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import re
13
+ from collections.abc import Sequence
14
+ from pathlib import Path
15
+
16
+
17
+ class SecurityError(Exception):
18
+ """Raised when a security guard detects a violation.
19
+
20
+ Attributes:
21
+ guard_name: Name of the guard that was triggered.
22
+ message: Description of the violation.
23
+ value: The offending value (if safe to log).
24
+
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ message: str,
30
+ guard_name: str = "unknown",
31
+ value: str | None = None,
32
+ ) -> None:
33
+ """Initialize SecurityError.
34
+
35
+ Args:
36
+ message: Description of the violation.
37
+ guard_name: Name of the guard that triggered.
38
+ value: The offending value (sanitized).
39
+
40
+ """
41
+ self.guard_name = guard_name
42
+ self.value = value
43
+ super().__init__(f"[{guard_name}] {message}")
44
+
45
+
46
+ def guard_path_traversal(
47
+ path: Path | str,
48
+ base_dir: Path | str | None = None,
49
+ *,
50
+ allow_symlinks: bool = False,
51
+ ) -> Path:
52
+ """Prevent path traversal attacks.
53
+
54
+ Ensures that the given path does not escape the base directory
55
+ using techniques like '..' or symlinks.
56
+
57
+ Args:
58
+ path: The path to validate.
59
+ base_dir: The base directory to constrain to. Defaults to cwd.
60
+ allow_symlinks: Whether to allow symlinks (default: False).
61
+
62
+ Returns:
63
+ The resolved, validated path.
64
+
65
+ Raises:
66
+ SecurityError: If path traversal is detected.
67
+
68
+ Example:
69
+ >>> guard_path_traversal("../etc/passwd", Path("/app"))
70
+ SecurityError: [path_traversal] Path escapes base directory
71
+
72
+ """
73
+ path = Path(path) if isinstance(path, str) else path
74
+ base_dir = Path(base_dir).resolve() if base_dir else Path.cwd().resolve()
75
+
76
+ # Check for explicit traversal patterns before resolution
77
+ path_str = str(path)
78
+ traversal_patterns = [
79
+ "..",
80
+ "~",
81
+ r"\.\.",
82
+ "%2e%2e", # URL encoded ..
83
+ "%252e%252e", # Double URL encoded
84
+ ]
85
+
86
+ for pattern in traversal_patterns:
87
+ if pattern.lower() in path_str.lower():
88
+ raise SecurityError(
89
+ f"Path traversal pattern detected: {pattern}",
90
+ guard_name="path_traversal",
91
+ value=path_str[:50], # Truncate for safety
92
+ )
93
+
94
+ # Resolve the path
95
+ try:
96
+ resolved = path.resolve() if path.is_absolute() else (base_dir / path).resolve()
97
+ except (OSError, ValueError) as e:
98
+ raise SecurityError(
99
+ f"Invalid path: {e}",
100
+ guard_name="path_traversal",
101
+ ) from e
102
+
103
+ # Check if resolved path is within base_dir
104
+ try:
105
+ resolved.relative_to(base_dir)
106
+ except ValueError as e:
107
+ raise SecurityError(
108
+ f"Path escapes base directory: {resolved} is not under {base_dir}",
109
+ guard_name="path_traversal",
110
+ value=str(resolved)[:100],
111
+ ) from e
112
+
113
+ # Check for symlinks if not allowed
114
+ is_existing_symlink = (
115
+ not allow_symlinks and resolved.exists() and resolved.is_symlink()
116
+ )
117
+ if is_existing_symlink:
118
+ raise SecurityError(
119
+ "Symlinks are not allowed",
120
+ guard_name="path_traversal",
121
+ value=str(resolved),
122
+ )
123
+
124
+ return resolved
125
+
126
+
127
+ def guard_command_injection(
128
+ command: Sequence[str],
129
+ *,
130
+ allowed_commands: Sequence[str] | None = None,
131
+ ) -> list[str]:
132
+ """Prevent command injection attacks.
133
+
134
+ Validates that command arguments don't contain shell metacharacters
135
+ that could lead to command injection.
136
+
137
+ Args:
138
+ command: The command and arguments as a sequence.
139
+ allowed_commands: Optional whitelist of allowed base commands.
140
+
141
+ Returns:
142
+ The validated command as a list.
143
+
144
+ Raises:
145
+ SecurityError: If command injection is detected.
146
+
147
+ Example:
148
+ >>> guard_command_injection(["echo", "hello; rm -rf /"])
149
+ SecurityError: [command_injection] Dangerous characters detected
150
+
151
+ """
152
+ if not command:
153
+ raise SecurityError(
154
+ "Empty command is not allowed",
155
+ guard_name="command_injection",
156
+ )
157
+
158
+ cmd_list = list(command)
159
+
160
+ # Dangerous shell metacharacters
161
+ dangerous_patterns: list[tuple[str, str]] = [
162
+ (";", "command separator"),
163
+ ("|", "pipe"),
164
+ ("&", "background/and operator"),
165
+ ("$", "variable expansion"),
166
+ ("`", "command substitution"),
167
+ ("$(", "command substitution"),
168
+ ("${", "variable expansion"),
169
+ (">", "redirect"),
170
+ ("<", "redirect"),
171
+ (">>", "redirect append"),
172
+ ("||", "or operator"),
173
+ ("&&", "and operator"),
174
+ ("\n", "newline"),
175
+ ("\r", "carriage return"),
176
+ ("\x00", "null byte"),
177
+ ]
178
+
179
+ for arg in cmd_list:
180
+ for pattern, description in dangerous_patterns:
181
+ if pattern in arg:
182
+ raise SecurityError(
183
+ f"Dangerous shell character detected: {description}",
184
+ guard_name="command_injection",
185
+ value=arg[:50],
186
+ )
187
+
188
+ # Check against allowed commands whitelist
189
+ if allowed_commands is not None:
190
+ base_command = cmd_list[0]
191
+ # Get just the command name without path
192
+ command_name = Path(base_command).name
193
+ cmd_not_allowed = (
194
+ command_name not in allowed_commands
195
+ and base_command not in allowed_commands
196
+ )
197
+ if cmd_not_allowed:
198
+ raise SecurityError(
199
+ f"Command not in allowed list: {command_name}",
200
+ guard_name="command_injection",
201
+ value=command_name,
202
+ )
203
+
204
+ return cmd_list
205
+
206
+
207
+ def guard_file_extension(
208
+ filename: str | Path,
209
+ *,
210
+ allowed_extensions: Sequence[str] | None = None,
211
+ denied_extensions: Sequence[str] | None = None,
212
+ ) -> Path:
213
+ """Validate file extension against allow/deny lists.
214
+
215
+ Args:
216
+ filename: The filename to check.
217
+ allowed_extensions: Extensions to allow (with or without dot).
218
+ denied_extensions: Extensions to deny (with or without dot).
219
+
220
+ Returns:
221
+ The filename as a Path.
222
+
223
+ Raises:
224
+ SecurityError: If extension is not allowed or is denied.
225
+
226
+ """
227
+ path = Path(filename)
228
+ ext = path.suffix.lower().lstrip(".")
229
+
230
+ # Default dangerous extensions
231
+ default_denied = {
232
+ "exe",
233
+ "dll",
234
+ "so",
235
+ "dylib", # Executables
236
+ "sh",
237
+ "bash",
238
+ "zsh",
239
+ "ps1",
240
+ "bat",
241
+ "cmd", # Scripts
242
+ "php",
243
+ "jsp",
244
+ "asp",
245
+ "aspx", # Server-side scripts
246
+ }
247
+
248
+ # Normalize extension lists
249
+ def normalize_ext(e: str) -> str:
250
+ return e.lower().lstrip(".")
251
+
252
+ if denied_extensions is not None:
253
+ denied = {normalize_ext(e) for e in denied_extensions}
254
+ else:
255
+ denied = default_denied
256
+
257
+ if ext in denied:
258
+ raise SecurityError(
259
+ f"File extension '{ext}' is not allowed",
260
+ guard_name="file_extension",
261
+ value=str(path.name),
262
+ )
263
+
264
+ if allowed_extensions is not None:
265
+ allowed = {normalize_ext(e) for e in allowed_extensions}
266
+ if ext not in allowed:
267
+ raise SecurityError(
268
+ f"File extension '{ext}' is not in allowed list",
269
+ guard_name="file_extension",
270
+ value=str(path.name),
271
+ )
272
+
273
+ return path
274
+
275
+
276
+ def guard_env_variable(
277
+ name: str,
278
+ *,
279
+ allowed_names: Sequence[str] | None = None,
280
+ denied_names: Sequence[str] | None = None,
281
+ ) -> str:
282
+ """Guard against accessing sensitive environment variables.
283
+
284
+ Args:
285
+ name: The environment variable name.
286
+ allowed_names: Variable names to allow.
287
+ denied_names: Variable names to deny.
288
+
289
+ Returns:
290
+ The environment variable value if safe.
291
+
292
+ Raises:
293
+ SecurityError: If variable access is not allowed.
294
+
295
+ """
296
+ # Default sensitive variables
297
+ default_denied = {
298
+ "AWS_SECRET_ACCESS_KEY",
299
+ "AWS_SESSION_TOKEN",
300
+ "GITHUB_TOKEN",
301
+ "GH_TOKEN",
302
+ "GITLAB_TOKEN",
303
+ "DATABASE_URL",
304
+ "DB_PASSWORD",
305
+ "PASSWORD",
306
+ "SECRET_KEY",
307
+ "PRIVATE_KEY",
308
+ "API_KEY",
309
+ "API_SECRET",
310
+ }
311
+
312
+ name_upper = name.upper()
313
+
314
+ if denied_names is not None:
315
+ denied = {n.upper() for n in denied_names}
316
+ else:
317
+ denied = default_denied
318
+
319
+ # Check against patterns
320
+ sensitive_patterns = [
321
+ r".*SECRET.*",
322
+ r".*PASSWORD.*",
323
+ r".*TOKEN.*",
324
+ r".*PRIVATE.*KEY.*",
325
+ r".*API.*KEY.*",
326
+ ]
327
+
328
+ if name_upper in denied:
329
+ raise SecurityError(
330
+ f"Access to sensitive variable '{name}' is denied",
331
+ guard_name="env_variable",
332
+ value=name,
333
+ )
334
+
335
+ for pattern in sensitive_patterns:
336
+ if re.match(pattern, name_upper):
337
+ # Only block if not explicitly allowed
338
+ if allowed_names is not None:
339
+ allowed = {n.upper() for n in allowed_names}
340
+ if name_upper not in allowed:
341
+ raise SecurityError(
342
+ f"Access to potentially sensitive variable '{name}' is denied",
343
+ guard_name="env_variable",
344
+ value=name,
345
+ )
346
+ else:
347
+ raise SecurityError(
348
+ f"Access to potentially sensitive variable '{name}' is denied",
349
+ guard_name="env_variable",
350
+ value=name,
351
+ )
352
+
353
+ # Get the variable
354
+ value = os.environ.get(name)
355
+ if value is None:
356
+ raise SecurityError(
357
+ f"Environment variable '{name}' is not set",
358
+ guard_name="env_variable",
359
+ value=name,
360
+ )
361
+
362
+ return value
@@ -0,0 +1,321 @@
1
+ """
2
+ Input sanitizers for cleaning untrusted data.
3
+
4
+ Provides functions to sanitize strings, filenames, and paths
5
+ to remove potentially dangerous characters.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ import unicodedata
12
+ from pathlib import Path
13
+
14
+ # Constants to avoid magic values (PLR2004)
15
+ MAX_SQL_IDENTIFIER_LENGTH = 128
16
+
17
+
18
+ def sanitize_string(
19
+ value: str,
20
+ *,
21
+ max_length: int | None = None,
22
+ allow_html: bool = False,
23
+ allow_unicode: bool = True,
24
+ strip_whitespace: bool = True,
25
+ ) -> str:
26
+ """Sanitize a string by removing dangerous characters.
27
+
28
+ Args:
29
+ value: The string to sanitize.
30
+ max_length: Maximum length to truncate to.
31
+ allow_html: Whether to keep HTML tags (default: False).
32
+ allow_unicode: Whether to keep non-ASCII characters.
33
+ strip_whitespace: Whether to strip leading/trailing whitespace.
34
+
35
+ Returns:
36
+ The sanitized string.
37
+
38
+ Example:
39
+ >>> sanitize_string("<script>alert('xss')</script>Hello")
40
+ "scriptalert('xss')/scriptHello"
41
+
42
+ """
43
+ if not value:
44
+ return ""
45
+
46
+ result = value
47
+
48
+ # Strip whitespace first
49
+ if strip_whitespace:
50
+ result = result.strip()
51
+
52
+ # Remove null bytes and control characters
53
+ result = result.replace("\x00", "")
54
+ result = "".join(
55
+ c for c in result if unicodedata.category(c) != "Cc" or c in "\n\r\t"
56
+ )
57
+
58
+ # Handle HTML
59
+ if not allow_html:
60
+ # Remove HTML tags
61
+ result = re.sub(r"<[^>]+>", "", result)
62
+ # Escape HTML entities
63
+ result = result.replace("&", "&amp;")
64
+ result = result.replace("<", "&lt;")
65
+ result = result.replace(">", "&gt;")
66
+
67
+ # Handle unicode
68
+ if not allow_unicode:
69
+ result = result.encode("ascii", errors="ignore").decode("ascii")
70
+
71
+ # Truncate if needed
72
+ if max_length is not None and len(result) > max_length:
73
+ result = result[:max_length]
74
+
75
+ return result
76
+
77
+
78
+ def sanitize_filename(
79
+ filename: str,
80
+ *,
81
+ max_length: int = 255,
82
+ replacement: str = "_",
83
+ preserve_extension: bool = True,
84
+ ) -> str:
85
+ """Sanitize a filename to be safe for filesystem use.
86
+
87
+ Removes or replaces characters that are:
88
+ - Not allowed in filenames on various OSes
89
+ - Potentially dangerous (path separators, etc.)
90
+
91
+ Args:
92
+ filename: The filename to sanitize.
93
+ max_length: Maximum length for the filename.
94
+ replacement: Character to replace invalid chars with.
95
+ preserve_extension: Keep original extension.
96
+
97
+ Returns:
98
+ The sanitized filename.
99
+
100
+ Example:
101
+ >>> sanitize_filename("my/../file<>:name.txt")
102
+ 'my_file_name.txt'
103
+
104
+ """
105
+ if not filename:
106
+ return "unnamed"
107
+
108
+ # Get parts
109
+ original_path = Path(filename)
110
+ stem = original_path.stem
111
+ suffix = original_path.suffix if preserve_extension else ""
112
+
113
+ # Characters not allowed in filenames (Windows + Unix)
114
+ invalid_chars = r'[<>:"/\\|?*\x00-\x1f]'
115
+
116
+ # Remove invalid characters
117
+ safe_stem = re.sub(invalid_chars, replacement, stem)
118
+
119
+ # Remove leading/trailing dots and spaces (Windows issues)
120
+ safe_stem = safe_stem.strip(". ")
121
+
122
+ # Remove path separators that might have snuck through
123
+ safe_stem = safe_stem.replace("/", replacement)
124
+ safe_stem = safe_stem.replace("\\", replacement)
125
+
126
+ # Collapse multiple replacement chars
127
+ if replacement:
128
+ safe_stem = re.sub(f"{re.escape(replacement)}+", replacement, safe_stem)
129
+ safe_stem = safe_stem.strip(replacement)
130
+
131
+ # Handle reserved names (Windows)
132
+ reserved_names = {
133
+ "CON",
134
+ "PRN",
135
+ "AUX",
136
+ "NUL",
137
+ "COM1",
138
+ "COM2",
139
+ "COM3",
140
+ "COM4",
141
+ "COM5",
142
+ "COM6",
143
+ "COM7",
144
+ "COM8",
145
+ "COM9",
146
+ "LPT1",
147
+ "LPT2",
148
+ "LPT3",
149
+ "LPT4",
150
+ "LPT5",
151
+ "LPT6",
152
+ "LPT7",
153
+ "LPT8",
154
+ "LPT9",
155
+ }
156
+
157
+ if safe_stem.upper() in reserved_names:
158
+ safe_stem = f"{replacement}{safe_stem}"
159
+
160
+ # Handle empty result
161
+ if not safe_stem:
162
+ safe_stem = "unnamed"
163
+
164
+ # Construct result
165
+ result = f"{safe_stem}{suffix}"
166
+
167
+ # Truncate if needed (keeping extension)
168
+ if len(result) > max_length:
169
+ available = max_length - len(suffix)
170
+ if available > 0:
171
+ safe_stem = safe_stem[:available]
172
+ result = f"{safe_stem}{suffix}"
173
+ else:
174
+ result = result[:max_length]
175
+
176
+ return result
177
+
178
+
179
+ def sanitize_path(
180
+ path: str | Path,
181
+ *,
182
+ base_dir: Path | None = None,
183
+ max_depth: int | None = 10,
184
+ resolve: bool = False,
185
+ ) -> Path:
186
+ """Sanitize a path to prevent traversal and normalize it.
187
+
188
+ Args:
189
+ path: The path to sanitize.
190
+ base_dir: Optional base directory to constrain to.
191
+ max_depth: Maximum directory depth allowed.
192
+ resolve: Whether to resolve the path (requires it to exist).
193
+
194
+ Returns:
195
+ The sanitized Path object.
196
+
197
+ Raises:
198
+ ValueError: If path is invalid or too deep.
199
+
200
+ """
201
+ if isinstance(path, str):
202
+ path = Path(path)
203
+
204
+ # Remove any null bytes
205
+ path_str = str(path).replace("\x00", "")
206
+
207
+ # Normalize the path
208
+ path = Path(path_str)
209
+
210
+ # Remove any .. or . components manually
211
+ parts: list[str] = []
212
+ for part in path.parts:
213
+ if part == "..":
214
+ if parts and parts[-1] != "..":
215
+ parts.pop()
216
+ # Skip the .. entirely if at root
217
+ elif part != ".":
218
+ # Sanitize each component
219
+ safe_part = sanitize_filename(part, preserve_extension=True)
220
+ if safe_part: # Skip empty parts
221
+ parts.append(safe_part)
222
+
223
+ # Reconstruct path
224
+ if path.is_absolute():
225
+ sanitized = Path("/").joinpath(*parts) if parts else Path("/")
226
+ else:
227
+ sanitized = Path().joinpath(*parts) if parts else Path()
228
+
229
+ # Check depth (skip if max_depth is None)
230
+ depth = len(sanitized.parts)
231
+ if max_depth is not None and depth > max_depth:
232
+ msg = f"Path depth {depth} exceeds maximum of {max_depth}"
233
+ raise ValueError(msg)
234
+
235
+ # Constrain to base_dir if provided
236
+ if base_dir is not None:
237
+ base = Path(base_dir).resolve()
238
+ if resolve:
239
+ try:
240
+ sanitized = sanitized.resolve()
241
+ except (OSError, RuntimeError) as e:
242
+ msg = f"Cannot resolve path: {e}"
243
+ raise ValueError(msg) from e
244
+ # Make absolute relative to base
245
+ elif not sanitized.is_absolute():
246
+ sanitized = base / sanitized
247
+
248
+ return sanitized
249
+
250
+
251
+ def sanitize_env_value(
252
+ value: str,
253
+ *,
254
+ max_length: int = 4096,
255
+ allow_multiline: bool = False,
256
+ ) -> str:
257
+ """Sanitize a value for use as an environment variable.
258
+
259
+ Args:
260
+ value: The value to sanitize.
261
+ max_length: Maximum length allowed.
262
+ allow_multiline: Whether to allow newlines.
263
+
264
+ Returns:
265
+ The sanitized value.
266
+
267
+ """
268
+ if not value:
269
+ return ""
270
+
271
+ result = value
272
+
273
+ # Remove null bytes
274
+ result = result.replace("\x00", "")
275
+
276
+ # Handle newlines
277
+ if not allow_multiline:
278
+ result = result.replace("\n", " ").replace("\r", " ")
279
+
280
+ # Truncate
281
+ if len(result) > max_length:
282
+ result = result[:max_length]
283
+
284
+ return result
285
+
286
+
287
+ def sanitize_sql_identifier(identifier: str) -> str:
288
+ """Sanitize a SQL identifier (table/column name).
289
+
290
+ Note: This is NOT for SQL values - use parameterized queries for those!
291
+
292
+ Args:
293
+ identifier: The identifier to sanitize.
294
+
295
+ Returns:
296
+ The sanitized identifier.
297
+
298
+ Raises:
299
+ ValueError: If identifier is empty or too long.
300
+
301
+ """
302
+ if not identifier:
303
+ msg = "SQL identifier cannot be empty"
304
+ raise ValueError(msg)
305
+
306
+ # Only allow alphanumeric and underscore
307
+ result = re.sub(r"[^a-zA-Z0-9_]", "", identifier)
308
+
309
+ # Must start with letter or underscore
310
+ if result and not (result[0].isalpha() or result[0] == "_"):
311
+ result = f"_{result}"
312
+
313
+ # Check length (most DBs limit to 128 chars)
314
+ if len(result) > MAX_SQL_IDENTIFIER_LENGTH:
315
+ result = result[:MAX_SQL_IDENTIFIER_LENGTH]
316
+
317
+ if not result:
318
+ msg = "SQL identifier contains no valid characters"
319
+ raise ValueError(msg)
320
+
321
+ return result