tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Project Boundary Manager for Tree-sitter Analyzer
|
|
4
|
+
|
|
5
|
+
Provides strict project boundary control to prevent access to files
|
|
6
|
+
outside the designated project directory.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..exceptions import SecurityError
|
|
12
|
+
from ..utils import log_debug, log_info, log_warning
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ProjectBoundaryManager:
|
|
16
|
+
"""
|
|
17
|
+
Project boundary manager for access control.
|
|
18
|
+
|
|
19
|
+
This class enforces strict boundaries around project directories
|
|
20
|
+
to prevent unauthorized file access outside the project scope.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Real path resolution for symlink protection
|
|
24
|
+
- Configurable allowed directories
|
|
25
|
+
- Comprehensive boundary checking
|
|
26
|
+
- Audit logging for security events
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, project_root: str) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Initialize project boundary manager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
project_root: Root directory of the project
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
SecurityError: If project root is invalid
|
|
38
|
+
"""
|
|
39
|
+
if not project_root:
|
|
40
|
+
raise SecurityError("Project root cannot be empty")
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
project_path = Path(project_root)
|
|
44
|
+
|
|
45
|
+
# Handle both string and Path objects
|
|
46
|
+
if isinstance(project_root, str):
|
|
47
|
+
project_path = Path(project_root)
|
|
48
|
+
else:
|
|
49
|
+
raise SecurityError(f"Invalid project root type: {type(project_root)}")
|
|
50
|
+
|
|
51
|
+
# Ensure the path exists and is a directory
|
|
52
|
+
if not project_path.exists():
|
|
53
|
+
raise SecurityError(f"Project root does not exist: {project_root}")
|
|
54
|
+
|
|
55
|
+
if not project_path.is_dir():
|
|
56
|
+
raise SecurityError(f"Project root is not a directory: {project_root}")
|
|
57
|
+
|
|
58
|
+
# Store real path to prevent symlink attacks
|
|
59
|
+
self.project_root = str(project_path.resolve())
|
|
60
|
+
self.allowed_directories: set[str] = {self.project_root}
|
|
61
|
+
|
|
62
|
+
log_debug(
|
|
63
|
+
f"ProjectBoundaryManager initialized with root: {self.project_root}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
if isinstance(e, SecurityError):
|
|
68
|
+
raise
|
|
69
|
+
raise SecurityError(
|
|
70
|
+
f"Failed to initialize ProjectBoundaryManager: {e}"
|
|
71
|
+
) from e
|
|
72
|
+
|
|
73
|
+
def add_allowed_directory(self, directory: str) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Add an additional allowed directory.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
directory: Directory path to allow access to
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
SecurityError: If directory is invalid
|
|
82
|
+
"""
|
|
83
|
+
if not directory:
|
|
84
|
+
raise SecurityError("Directory cannot be empty")
|
|
85
|
+
|
|
86
|
+
dir_path = Path(directory)
|
|
87
|
+
if not dir_path.exists():
|
|
88
|
+
raise SecurityError(f"Directory does not exist: {directory}")
|
|
89
|
+
|
|
90
|
+
if not dir_path.is_dir():
|
|
91
|
+
raise SecurityError(f"Path is not a directory: {directory}")
|
|
92
|
+
|
|
93
|
+
real_dir = str(dir_path.resolve())
|
|
94
|
+
self.allowed_directories.add(real_dir)
|
|
95
|
+
|
|
96
|
+
log_info(f"Added allowed directory: {real_dir}")
|
|
97
|
+
|
|
98
|
+
def is_within_project(self, file_path: str) -> bool:
|
|
99
|
+
"""
|
|
100
|
+
Check if file path is within project boundaries.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
file_path: File path to check
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
True if path is within allowed boundaries
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
if not file_path:
|
|
110
|
+
log_warning("Empty file path provided to boundary check")
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
# Resolve real path to handle symlinks
|
|
114
|
+
real_path = str(Path(file_path).resolve())
|
|
115
|
+
|
|
116
|
+
# Check against all allowed directories
|
|
117
|
+
for allowed_dir in self.allowed_directories:
|
|
118
|
+
# Use pathlib to check if path is within allowed directory
|
|
119
|
+
try:
|
|
120
|
+
Path(real_path).relative_to(Path(allowed_dir))
|
|
121
|
+
log_debug(f"File path within boundaries: {file_path}")
|
|
122
|
+
return True
|
|
123
|
+
except ValueError:
|
|
124
|
+
# Path is not within this allowed directory, continue checking
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
log_warning(f"File path outside boundaries: {file_path} -> {real_path}")
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
log_warning(f"Boundary check error for {file_path}: {e}")
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
def get_relative_path(self, file_path: str) -> str | None:
|
|
135
|
+
"""
|
|
136
|
+
Get relative path from project root if within boundaries.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
file_path: File path to convert
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Relative path from project root, or None if outside boundaries
|
|
143
|
+
"""
|
|
144
|
+
if not self.is_within_project(file_path):
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
real_path = Path(file_path).resolve()
|
|
149
|
+
try:
|
|
150
|
+
rel_path = real_path.relative_to(Path(self.project_root))
|
|
151
|
+
except ValueError:
|
|
152
|
+
# Path is not relative to project root
|
|
153
|
+
log_warning(f"Path not relative to project root: {file_path}")
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
# Ensure relative path doesn't start with ..
|
|
157
|
+
if str(rel_path).startswith(".."):
|
|
158
|
+
log_warning(f"Relative path calculation failed: {rel_path}")
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
return str(rel_path)
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
log_warning(f"Relative path calculation error: {e}")
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
def validate_and_resolve_path(self, file_path: str) -> str | None:
|
|
168
|
+
"""
|
|
169
|
+
Validate path and return resolved absolute path if within boundaries.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
file_path: File path to validate and resolve
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Resolved absolute path if valid, None otherwise
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
# Handle relative paths from project root
|
|
179
|
+
file_path_obj = Path(file_path)
|
|
180
|
+
if not file_path_obj.is_absolute():
|
|
181
|
+
full_path = Path(self.project_root) / file_path
|
|
182
|
+
else:
|
|
183
|
+
full_path = file_path_obj
|
|
184
|
+
|
|
185
|
+
# Check boundaries
|
|
186
|
+
if not self.is_within_project(str(full_path)):
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
# Return real path
|
|
190
|
+
return str(full_path.resolve())
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
log_warning(f"Path validation error: {e}")
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
def list_allowed_directories(self) -> set[str]:
|
|
197
|
+
"""
|
|
198
|
+
Get list of all allowed directories.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Set of allowed directory paths
|
|
202
|
+
"""
|
|
203
|
+
return self.allowed_directories.copy()
|
|
204
|
+
|
|
205
|
+
def is_symlink_safe(self, file_path: str) -> bool:
|
|
206
|
+
"""
|
|
207
|
+
Check if file path is safe from symlink attacks.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
file_path: File path to check
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
True if path is safe from symlink attacks
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
file_path_obj = Path(file_path)
|
|
217
|
+
if not file_path_obj.exists():
|
|
218
|
+
return True # Non-existent files are safe
|
|
219
|
+
|
|
220
|
+
# If the fully resolved path is within project boundaries, we treat it as safe.
|
|
221
|
+
# This makes the check tolerant to system-level symlinks like
|
|
222
|
+
# /var -> /private/var on macOS runners.
|
|
223
|
+
resolved = str(file_path_obj.resolve())
|
|
224
|
+
if self.is_within_project(resolved):
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
# Otherwise, inspect each path component symlink to ensure no hop jumps outside
|
|
228
|
+
# the allowed directories.
|
|
229
|
+
path_parts = file_path_obj.parts
|
|
230
|
+
current_path = Path()
|
|
231
|
+
|
|
232
|
+
for part in path_parts:
|
|
233
|
+
current_path = current_path / part if current_path.parts else Path(part)
|
|
234
|
+
|
|
235
|
+
if current_path.is_symlink():
|
|
236
|
+
target = str(current_path.resolve())
|
|
237
|
+
if not self.is_within_project(target):
|
|
238
|
+
log_warning(
|
|
239
|
+
f"Unsafe symlink detected: {current_path} -> {target}"
|
|
240
|
+
)
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
# If no unsafe hop found, consider safe
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
except Exception as e:
|
|
247
|
+
log_warning(f"Symlink safety check error: {e}")
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
def audit_access(self, file_path: str, operation: str) -> None:
|
|
251
|
+
"""
|
|
252
|
+
Log file access for security auditing.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
file_path: File path being accessed
|
|
256
|
+
operation: Type of operation (read, write, analyze, etc.)
|
|
257
|
+
"""
|
|
258
|
+
is_within = self.is_within_project(file_path)
|
|
259
|
+
status = "ALLOWED" if is_within else "DENIED"
|
|
260
|
+
|
|
261
|
+
log_info(f"AUDIT: {status} {operation} access to {file_path}")
|
|
262
|
+
|
|
263
|
+
if not is_within:
|
|
264
|
+
log_warning(f"SECURITY: Unauthorized access attempt to {file_path}")
|
|
265
|
+
|
|
266
|
+
def __str__(self) -> str:
|
|
267
|
+
"""String representation of boundary manager."""
|
|
268
|
+
return f"ProjectBoundaryManager(root={self.project_root}, allowed_dirs={len(self.allowed_directories)})"
|
|
269
|
+
|
|
270
|
+
def __repr__(self) -> str:
|
|
271
|
+
"""Detailed representation of boundary manager."""
|
|
272
|
+
return (
|
|
273
|
+
f"ProjectBoundaryManager("
|
|
274
|
+
f"project_root='{self.project_root}', "
|
|
275
|
+
f"allowed_directories={self.allowed_directories}"
|
|
276
|
+
f")"
|
|
277
|
+
)
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Regex Safety Checker for Tree-sitter Analyzer
|
|
4
|
+
|
|
5
|
+
Provides ReDoS (Regular Expression Denial of Service) attack prevention
|
|
6
|
+
by analyzing regex patterns for potentially dangerous constructs.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from ..utils import log_debug, log_warning
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RegexSafetyChecker:
|
|
16
|
+
"""
|
|
17
|
+
Regex safety checker for ReDoS attack prevention.
|
|
18
|
+
|
|
19
|
+
This class analyzes regular expressions for patterns that could
|
|
20
|
+
lead to catastrophic backtracking and ReDoS attacks.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Pattern complexity analysis
|
|
24
|
+
- Dangerous construct detection
|
|
25
|
+
- Execution time monitoring
|
|
26
|
+
- Safe pattern compilation
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# Maximum allowed pattern length
|
|
30
|
+
MAX_PATTERN_LENGTH = 1000
|
|
31
|
+
|
|
32
|
+
# Maximum execution time for pattern testing (seconds)
|
|
33
|
+
MAX_EXECUTION_TIME = 1.0
|
|
34
|
+
|
|
35
|
+
# Dangerous regex patterns that can cause ReDoS
|
|
36
|
+
DANGEROUS_PATTERNS = [
|
|
37
|
+
# Nested quantifiers
|
|
38
|
+
r"\(.+\)\+", # (a+)+
|
|
39
|
+
r"\(.*\)\*", # (a*)*
|
|
40
|
+
r"\(.{0,}\)\+", # (.{0,})+
|
|
41
|
+
r"\(.+\)\{.*\}", # (a+){n,m}
|
|
42
|
+
# Alternation with overlap
|
|
43
|
+
r"\(a\|a\)\*", # (a|a)*
|
|
44
|
+
r"\([^|]*\|[^|]*\)\+", # (abc|abd)+
|
|
45
|
+
# Exponential backtracking patterns
|
|
46
|
+
r"\(.*\)\1", # (.*)\1 - backreference
|
|
47
|
+
r"\(\?\=.*\)\+", # (?=.*)+
|
|
48
|
+
r"\(\?\!.*\)\+", # (?!.*)+
|
|
49
|
+
r"\(\?\<\=.*\)\+", # (?<=.*)+
|
|
50
|
+
r"\(\?\<\!.*\)\+", # (?<!.*)+
|
|
51
|
+
# Catastrophic patterns
|
|
52
|
+
r"\([^)]*\+[^)]*\)\+", # Nested + quantifiers
|
|
53
|
+
r"\([^)]*\*[^)]*\)\*", # Nested * quantifiers
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
def __init__(self) -> None:
|
|
57
|
+
"""Initialize regex safety checker."""
|
|
58
|
+
log_debug("RegexSafetyChecker initialized")
|
|
59
|
+
|
|
60
|
+
def validate_pattern(self, pattern: str) -> tuple[bool, str]:
|
|
61
|
+
"""
|
|
62
|
+
Validate regex pattern for safety.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
pattern: Regex pattern to validate
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of (is_safe, error_message)
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> checker = RegexSafetyChecker()
|
|
72
|
+
>>> is_safe, error = checker.validate_pattern(r"hello.*world")
|
|
73
|
+
>>> assert is_safe
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
# Basic validation
|
|
77
|
+
if not pattern or not isinstance(pattern, str):
|
|
78
|
+
return False, "Pattern must be a non-empty string"
|
|
79
|
+
|
|
80
|
+
# Length check
|
|
81
|
+
if len(pattern) > self.MAX_PATTERN_LENGTH:
|
|
82
|
+
return (
|
|
83
|
+
False,
|
|
84
|
+
f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Check for dangerous patterns
|
|
88
|
+
dangerous_found = self._check_dangerous_patterns(pattern)
|
|
89
|
+
if dangerous_found:
|
|
90
|
+
return (
|
|
91
|
+
False,
|
|
92
|
+
f"Potentially dangerous regex pattern detected: {dangerous_found}",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Compilation check
|
|
96
|
+
compilation_error = self._check_compilation(pattern)
|
|
97
|
+
if compilation_error:
|
|
98
|
+
return False, f"Invalid regex pattern: {compilation_error}"
|
|
99
|
+
|
|
100
|
+
# Performance check
|
|
101
|
+
performance_error = self._check_performance(pattern)
|
|
102
|
+
if performance_error:
|
|
103
|
+
return False, f"Pattern performance issue: {performance_error}"
|
|
104
|
+
|
|
105
|
+
log_debug(f"Regex pattern validation passed: {pattern}")
|
|
106
|
+
return True, ""
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
log_warning(f"Regex validation error: {e}")
|
|
110
|
+
return False, f"Validation error: {str(e)}"
|
|
111
|
+
|
|
112
|
+
def _check_dangerous_patterns(self, pattern: str) -> str | None:
|
|
113
|
+
"""
|
|
114
|
+
Check for known dangerous regex patterns.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
pattern: Pattern to check
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Description of dangerous pattern found, or None if safe
|
|
121
|
+
"""
|
|
122
|
+
for dangerous_pattern in self.DANGEROUS_PATTERNS:
|
|
123
|
+
try:
|
|
124
|
+
if re.search(dangerous_pattern, pattern):
|
|
125
|
+
log_warning(
|
|
126
|
+
f"Dangerous pattern detected: {dangerous_pattern} in {pattern}"
|
|
127
|
+
)
|
|
128
|
+
return dangerous_pattern
|
|
129
|
+
except re.error:
|
|
130
|
+
# If the dangerous pattern itself is invalid, skip it
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
def _check_compilation(self, pattern: str) -> str | None:
|
|
136
|
+
"""
|
|
137
|
+
Check if pattern compiles successfully.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
pattern: Pattern to compile
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Error message if compilation fails, None if successful
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
146
|
+
re.compile(pattern)
|
|
147
|
+
return None
|
|
148
|
+
except re.error as e:
|
|
149
|
+
log_warning(f"Regex compilation failed: {e}")
|
|
150
|
+
return str(e)
|
|
151
|
+
|
|
152
|
+
def _check_performance(self, pattern: str) -> str | None:
|
|
153
|
+
"""
|
|
154
|
+
Check pattern performance with test strings.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
pattern: Pattern to test
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Error message if performance is poor, None if acceptable
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
compiled_pattern = re.compile(pattern)
|
|
164
|
+
|
|
165
|
+
# Test strings that might cause backtracking
|
|
166
|
+
test_strings = [
|
|
167
|
+
"a" * 100, # Long string of same character
|
|
168
|
+
"ab" * 50, # Alternating pattern
|
|
169
|
+
"x" * 50 + "y", # Long string with different ending
|
|
170
|
+
"a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
for test_string in test_strings:
|
|
174
|
+
start_time = time.time()
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
# Test both search and match operations
|
|
178
|
+
compiled_pattern.search(test_string)
|
|
179
|
+
compiled_pattern.match(test_string)
|
|
180
|
+
|
|
181
|
+
execution_time = time.time() - start_time
|
|
182
|
+
|
|
183
|
+
if execution_time > self.MAX_EXECUTION_TIME:
|
|
184
|
+
log_warning(
|
|
185
|
+
f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
|
|
186
|
+
)
|
|
187
|
+
return f"Pattern execution too slow: {execution_time:.3f}s"
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
log_warning(f"Regex execution error: {e}")
|
|
191
|
+
return f"Pattern execution error: {str(e)}"
|
|
192
|
+
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
log_warning(f"Performance check error: {e}")
|
|
197
|
+
return f"Performance check failed: {str(e)}"
|
|
198
|
+
|
|
199
|
+
def analyze_complexity(self, pattern: str) -> dict:
|
|
200
|
+
"""
|
|
201
|
+
Analyze regex pattern complexity.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
pattern: Pattern to analyze
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Dictionary with complexity metrics
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
metrics = {
|
|
211
|
+
"length": len(pattern),
|
|
212
|
+
"quantifiers": len(re.findall(r"[+*?{]", pattern)),
|
|
213
|
+
"groups": len(re.findall(r"\(", pattern)),
|
|
214
|
+
"alternations": len(re.findall(r"\|", pattern)),
|
|
215
|
+
"character_classes": len(re.findall(r"\[", pattern)),
|
|
216
|
+
"anchors": len(re.findall(r"[\^$]", pattern)),
|
|
217
|
+
"complexity_score": 0,
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# Calculate complexity score
|
|
221
|
+
metrics["complexity_score"] = (
|
|
222
|
+
int(metrics["length"] * 0.1)
|
|
223
|
+
+ metrics["quantifiers"] * 2
|
|
224
|
+
+ int(metrics["groups"] * 1.5)
|
|
225
|
+
+ metrics["alternations"] * 3
|
|
226
|
+
+ metrics["character_classes"] * 1
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return metrics
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
log_warning(f"Complexity analysis error: {e}")
|
|
233
|
+
return {"error": str(e)}
|
|
234
|
+
|
|
235
|
+
def suggest_safer_pattern(self, pattern: str) -> str | None:
|
|
236
|
+
"""
|
|
237
|
+
Suggest a safer alternative for dangerous patterns.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
pattern: Original pattern
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Suggested safer pattern, or None if no suggestion available
|
|
244
|
+
"""
|
|
245
|
+
# Only suggest for patterns that are actually dangerous
|
|
246
|
+
is_dangerous = self._check_dangerous_patterns(pattern)
|
|
247
|
+
if not is_dangerous:
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
# Simple pattern replacements for common dangerous cases
|
|
251
|
+
replacements = {
|
|
252
|
+
r"\(.+\)\+": r"[^\\s]+", # Replace (a+)+ with [^\s]+
|
|
253
|
+
r"\(.*\)\*": r"[^\\s]*", # Replace (.*)* with [^\s]*
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
for dangerous, safer in replacements.items():
|
|
257
|
+
if re.search(dangerous, pattern):
|
|
258
|
+
suggested = re.sub(dangerous, safer, pattern)
|
|
259
|
+
log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
|
|
260
|
+
return suggested
|
|
261
|
+
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
def get_safe_flags(self) -> int:
|
|
265
|
+
"""
|
|
266
|
+
Get recommended safe regex flags.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Combination of safe regex flags
|
|
270
|
+
"""
|
|
271
|
+
# Use flags that prevent some ReDoS attacks
|
|
272
|
+
return re.MULTILINE | re.DOTALL
|
|
273
|
+
|
|
274
|
+
def create_safe_pattern(
|
|
275
|
+
self, pattern: str, flags: int | None = None
|
|
276
|
+
) -> re.Pattern | None:
|
|
277
|
+
"""
|
|
278
|
+
Create a safely compiled regex pattern.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
pattern: Pattern to compile
|
|
282
|
+
flags: Optional regex flags
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Compiled pattern if safe, None if dangerous
|
|
286
|
+
"""
|
|
287
|
+
is_safe, error = self.validate_pattern(pattern)
|
|
288
|
+
if not is_safe:
|
|
289
|
+
log_warning(f"Cannot create unsafe pattern: {error}")
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
safe_flags = flags if flags is not None else self.get_safe_flags()
|
|
294
|
+
return re.compile(pattern, safe_flags)
|
|
295
|
+
except re.error as e:
|
|
296
|
+
log_warning(f"Pattern compilation failed: {e}")
|
|
297
|
+
return None
|