tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,277 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Project Boundary Manager for Tree-sitter Analyzer
4
+
5
+ Provides strict project boundary control to prevent access to files
6
+ outside the designated project directory.
7
+ """
8
+
9
+ from pathlib import Path
10
+
11
+ from ..exceptions import SecurityError
12
+ from ..utils import log_debug, log_info, log_warning
13
+
14
+
15
+ class ProjectBoundaryManager:
16
+ """
17
+ Project boundary manager for access control.
18
+
19
+ This class enforces strict boundaries around project directories
20
+ to prevent unauthorized file access outside the project scope.
21
+
22
+ Features:
23
+ - Real path resolution for symlink protection
24
+ - Configurable allowed directories
25
+ - Comprehensive boundary checking
26
+ - Audit logging for security events
27
+ """
28
+
29
+ def __init__(self, project_root: str) -> None:
30
+ """
31
+ Initialize project boundary manager.
32
+
33
+ Args:
34
+ project_root: Root directory of the project
35
+
36
+ Raises:
37
+ SecurityError: If project root is invalid
38
+ """
39
+ if not project_root:
40
+ raise SecurityError("Project root cannot be empty")
41
+
42
+ try:
43
+ project_path = Path(project_root)
44
+
45
+ # Handle both string and Path objects
46
+ if isinstance(project_root, str):
47
+ project_path = Path(project_root)
48
+ else:
49
+ raise SecurityError(f"Invalid project root type: {type(project_root)}")
50
+
51
+ # Ensure the path exists and is a directory
52
+ if not project_path.exists():
53
+ raise SecurityError(f"Project root does not exist: {project_root}")
54
+
55
+ if not project_path.is_dir():
56
+ raise SecurityError(f"Project root is not a directory: {project_root}")
57
+
58
+ # Store real path to prevent symlink attacks
59
+ self.project_root = str(project_path.resolve())
60
+ self.allowed_directories: set[str] = {self.project_root}
61
+
62
+ log_debug(
63
+ f"ProjectBoundaryManager initialized with root: {self.project_root}"
64
+ )
65
+
66
+ except Exception as e:
67
+ if isinstance(e, SecurityError):
68
+ raise
69
+ raise SecurityError(
70
+ f"Failed to initialize ProjectBoundaryManager: {e}"
71
+ ) from e
72
+
73
+ def add_allowed_directory(self, directory: str) -> None:
74
+ """
75
+ Add an additional allowed directory.
76
+
77
+ Args:
78
+ directory: Directory path to allow access to
79
+
80
+ Raises:
81
+ SecurityError: If directory is invalid
82
+ """
83
+ if not directory:
84
+ raise SecurityError("Directory cannot be empty")
85
+
86
+ dir_path = Path(directory)
87
+ if not dir_path.exists():
88
+ raise SecurityError(f"Directory does not exist: {directory}")
89
+
90
+ if not dir_path.is_dir():
91
+ raise SecurityError(f"Path is not a directory: {directory}")
92
+
93
+ real_dir = str(dir_path.resolve())
94
+ self.allowed_directories.add(real_dir)
95
+
96
+ log_info(f"Added allowed directory: {real_dir}")
97
+
98
+ def is_within_project(self, file_path: str) -> bool:
99
+ """
100
+ Check if file path is within project boundaries.
101
+
102
+ Args:
103
+ file_path: File path to check
104
+
105
+ Returns:
106
+ True if path is within allowed boundaries
107
+ """
108
+ try:
109
+ if not file_path:
110
+ log_warning("Empty file path provided to boundary check")
111
+ return False
112
+
113
+ # Resolve real path to handle symlinks
114
+ real_path = str(Path(file_path).resolve())
115
+
116
+ # Check against all allowed directories
117
+ for allowed_dir in self.allowed_directories:
118
+ # Use pathlib to check if path is within allowed directory
119
+ try:
120
+ Path(real_path).relative_to(Path(allowed_dir))
121
+ log_debug(f"File path within boundaries: {file_path}")
122
+ return True
123
+ except ValueError:
124
+ # Path is not within this allowed directory, continue checking
125
+ continue
126
+
127
+ log_warning(f"File path outside boundaries: {file_path} -> {real_path}")
128
+ return False
129
+
130
+ except Exception as e:
131
+ log_warning(f"Boundary check error for {file_path}: {e}")
132
+ return False
133
+
134
+ def get_relative_path(self, file_path: str) -> str | None:
135
+ """
136
+ Get relative path from project root if within boundaries.
137
+
138
+ Args:
139
+ file_path: File path to convert
140
+
141
+ Returns:
142
+ Relative path from project root, or None if outside boundaries
143
+ """
144
+ if not self.is_within_project(file_path):
145
+ return None
146
+
147
+ try:
148
+ real_path = Path(file_path).resolve()
149
+ try:
150
+ rel_path = real_path.relative_to(Path(self.project_root))
151
+ except ValueError:
152
+ # Path is not relative to project root
153
+ log_warning(f"Path not relative to project root: {file_path}")
154
+ return None
155
+
156
+ # Ensure relative path doesn't start with ..
157
+ if str(rel_path).startswith(".."):
158
+ log_warning(f"Relative path calculation failed: {rel_path}")
159
+ return None
160
+
161
+ return str(rel_path)
162
+
163
+ except Exception as e:
164
+ log_warning(f"Relative path calculation error: {e}")
165
+ return None
166
+
167
+ def validate_and_resolve_path(self, file_path: str) -> str | None:
168
+ """
169
+ Validate path and return resolved absolute path if within boundaries.
170
+
171
+ Args:
172
+ file_path: File path to validate and resolve
173
+
174
+ Returns:
175
+ Resolved absolute path if valid, None otherwise
176
+ """
177
+ try:
178
+ # Handle relative paths from project root
179
+ file_path_obj = Path(file_path)
180
+ if not file_path_obj.is_absolute():
181
+ full_path = Path(self.project_root) / file_path
182
+ else:
183
+ full_path = file_path_obj
184
+
185
+ # Check boundaries
186
+ if not self.is_within_project(str(full_path)):
187
+ return None
188
+
189
+ # Return real path
190
+ return str(full_path.resolve())
191
+
192
+ except Exception as e:
193
+ log_warning(f"Path validation error: {e}")
194
+ return None
195
+
196
+ def list_allowed_directories(self) -> set[str]:
197
+ """
198
+ Get list of all allowed directories.
199
+
200
+ Returns:
201
+ Set of allowed directory paths
202
+ """
203
+ return self.allowed_directories.copy()
204
+
205
+ def is_symlink_safe(self, file_path: str) -> bool:
206
+ """
207
+ Check if file path is safe from symlink attacks.
208
+
209
+ Args:
210
+ file_path: File path to check
211
+
212
+ Returns:
213
+ True if path is safe from symlink attacks
214
+ """
215
+ try:
216
+ file_path_obj = Path(file_path)
217
+ if not file_path_obj.exists():
218
+ return True # Non-existent files are safe
219
+
220
+ # If the fully resolved path is within project boundaries, we treat it as safe.
221
+ # This makes the check tolerant to system-level symlinks like
222
+ # /var -> /private/var on macOS runners.
223
+ resolved = str(file_path_obj.resolve())
224
+ if self.is_within_project(resolved):
225
+ return True
226
+
227
+ # Otherwise, inspect each path component symlink to ensure no hop jumps outside
228
+ # the allowed directories.
229
+ path_parts = file_path_obj.parts
230
+ current_path = Path()
231
+
232
+ for part in path_parts:
233
+ current_path = current_path / part if current_path.parts else Path(part)
234
+
235
+ if current_path.is_symlink():
236
+ target = str(current_path.resolve())
237
+ if not self.is_within_project(target):
238
+ log_warning(
239
+ f"Unsafe symlink detected: {current_path} -> {target}"
240
+ )
241
+ return False
242
+
243
+ # If no unsafe hop found, consider safe
244
+ return True
245
+
246
+ except Exception as e:
247
+ log_warning(f"Symlink safety check error: {e}")
248
+ return False
249
+
250
+ def audit_access(self, file_path: str, operation: str) -> None:
251
+ """
252
+ Log file access for security auditing.
253
+
254
+ Args:
255
+ file_path: File path being accessed
256
+ operation: Type of operation (read, write, analyze, etc.)
257
+ """
258
+ is_within = self.is_within_project(file_path)
259
+ status = "ALLOWED" if is_within else "DENIED"
260
+
261
+ log_info(f"AUDIT: {status} {operation} access to {file_path}")
262
+
263
+ if not is_within:
264
+ log_warning(f"SECURITY: Unauthorized access attempt to {file_path}")
265
+
266
+ def __str__(self) -> str:
267
+ """String representation of boundary manager."""
268
+ return f"ProjectBoundaryManager(root={self.project_root}, allowed_dirs={len(self.allowed_directories)})"
269
+
270
+ def __repr__(self) -> str:
271
+ """Detailed representation of boundary manager."""
272
+ return (
273
+ f"ProjectBoundaryManager("
274
+ f"project_root='{self.project_root}', "
275
+ f"allowed_directories={self.allowed_directories}"
276
+ f")"
277
+ )
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Regex Safety Checker for Tree-sitter Analyzer
4
+
5
+ Provides ReDoS (Regular Expression Denial of Service) attack prevention
6
+ by analyzing regex patterns for potentially dangerous constructs.
7
+ """
8
+
9
+ import re
10
+ import time
11
+
12
+ from ..utils import log_debug, log_warning
13
+
14
+
15
+ class RegexSafetyChecker:
16
+ """
17
+ Regex safety checker for ReDoS attack prevention.
18
+
19
+ This class analyzes regular expressions for patterns that could
20
+ lead to catastrophic backtracking and ReDoS attacks.
21
+
22
+ Features:
23
+ - Pattern complexity analysis
24
+ - Dangerous construct detection
25
+ - Execution time monitoring
26
+ - Safe pattern compilation
27
+ """
28
+
29
+ # Maximum allowed pattern length
30
+ MAX_PATTERN_LENGTH = 1000
31
+
32
+ # Maximum execution time for pattern testing (seconds)
33
+ MAX_EXECUTION_TIME = 1.0
34
+
35
+ # Dangerous regex patterns that can cause ReDoS
36
+ DANGEROUS_PATTERNS = [
37
+ # Nested quantifiers
38
+ r"\(.+\)\+", # (a+)+
39
+ r"\(.*\)\*", # (a*)*
40
+ r"\(.{0,}\)\+", # (.{0,})+
41
+ r"\(.+\)\{.*\}", # (a+){n,m}
42
+ # Alternation with overlap
43
+ r"\(a\|a\)\*", # (a|a)*
44
+ r"\([^|]*\|[^|]*\)\+", # (abc|abd)+
45
+ # Exponential backtracking patterns
46
+ r"\(.*\)\1", # (.*)\1 - backreference
47
+ r"\(\?\=.*\)\+", # (?=.*)+
48
+ r"\(\?\!.*\)\+", # (?!.*)+
49
+ r"\(\?\<\=.*\)\+", # (?<=.*)+
50
+ r"\(\?\<\!.*\)\+", # (?<!.*)+
51
+ # Catastrophic patterns
52
+ r"\([^)]*\+[^)]*\)\+", # Nested + quantifiers
53
+ r"\([^)]*\*[^)]*\)\*", # Nested * quantifiers
54
+ ]
55
+
56
+ def __init__(self) -> None:
57
+ """Initialize regex safety checker."""
58
+ log_debug("RegexSafetyChecker initialized")
59
+
60
+ def validate_pattern(self, pattern: str) -> tuple[bool, str]:
61
+ """
62
+ Validate regex pattern for safety.
63
+
64
+ Args:
65
+ pattern: Regex pattern to validate
66
+
67
+ Returns:
68
+ Tuple of (is_safe, error_message)
69
+
70
+ Example:
71
+ >>> checker = RegexSafetyChecker()
72
+ >>> is_safe, error = checker.validate_pattern(r"hello.*world")
73
+ >>> assert is_safe
74
+ """
75
+ try:
76
+ # Basic validation
77
+ if not pattern or not isinstance(pattern, str):
78
+ return False, "Pattern must be a non-empty string"
79
+
80
+ # Length check
81
+ if len(pattern) > self.MAX_PATTERN_LENGTH:
82
+ return (
83
+ False,
84
+ f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}",
85
+ )
86
+
87
+ # Check for dangerous patterns
88
+ dangerous_found = self._check_dangerous_patterns(pattern)
89
+ if dangerous_found:
90
+ return (
91
+ False,
92
+ f"Potentially dangerous regex pattern detected: {dangerous_found}",
93
+ )
94
+
95
+ # Compilation check
96
+ compilation_error = self._check_compilation(pattern)
97
+ if compilation_error:
98
+ return False, f"Invalid regex pattern: {compilation_error}"
99
+
100
+ # Performance check
101
+ performance_error = self._check_performance(pattern)
102
+ if performance_error:
103
+ return False, f"Pattern performance issue: {performance_error}"
104
+
105
+ log_debug(f"Regex pattern validation passed: {pattern}")
106
+ return True, ""
107
+
108
+ except Exception as e:
109
+ log_warning(f"Regex validation error: {e}")
110
+ return False, f"Validation error: {str(e)}"
111
+
112
+ def _check_dangerous_patterns(self, pattern: str) -> str | None:
113
+ """
114
+ Check for known dangerous regex patterns.
115
+
116
+ Args:
117
+ pattern: Pattern to check
118
+
119
+ Returns:
120
+ Description of dangerous pattern found, or None if safe
121
+ """
122
+ for dangerous_pattern in self.DANGEROUS_PATTERNS:
123
+ try:
124
+ if re.search(dangerous_pattern, pattern):
125
+ log_warning(
126
+ f"Dangerous pattern detected: {dangerous_pattern} in {pattern}"
127
+ )
128
+ return dangerous_pattern
129
+ except re.error:
130
+ # If the dangerous pattern itself is invalid, skip it
131
+ continue
132
+
133
+ return None
134
+
135
+ def _check_compilation(self, pattern: str) -> str | None:
136
+ """
137
+ Check if pattern compiles successfully.
138
+
139
+ Args:
140
+ pattern: Pattern to compile
141
+
142
+ Returns:
143
+ Error message if compilation fails, None if successful
144
+ """
145
+ try:
146
+ re.compile(pattern)
147
+ return None
148
+ except re.error as e:
149
+ log_warning(f"Regex compilation failed: {e}")
150
+ return str(e)
151
+
152
+ def _check_performance(self, pattern: str) -> str | None:
153
+ """
154
+ Check pattern performance with test strings.
155
+
156
+ Args:
157
+ pattern: Pattern to test
158
+
159
+ Returns:
160
+ Error message if performance is poor, None if acceptable
161
+ """
162
+ try:
163
+ compiled_pattern = re.compile(pattern)
164
+
165
+ # Test strings that might cause backtracking
166
+ test_strings = [
167
+ "a" * 100, # Long string of same character
168
+ "ab" * 50, # Alternating pattern
169
+ "x" * 50 + "y", # Long string with different ending
170
+ "a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
171
+ ]
172
+
173
+ for test_string in test_strings:
174
+ start_time = time.time()
175
+
176
+ try:
177
+ # Test both search and match operations
178
+ compiled_pattern.search(test_string)
179
+ compiled_pattern.match(test_string)
180
+
181
+ execution_time = time.time() - start_time
182
+
183
+ if execution_time > self.MAX_EXECUTION_TIME:
184
+ log_warning(
185
+ f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
186
+ )
187
+ return f"Pattern execution too slow: {execution_time:.3f}s"
188
+
189
+ except Exception as e:
190
+ log_warning(f"Regex execution error: {e}")
191
+ return f"Pattern execution error: {str(e)}"
192
+
193
+ return None
194
+
195
+ except Exception as e:
196
+ log_warning(f"Performance check error: {e}")
197
+ return f"Performance check failed: {str(e)}"
198
+
199
+ def analyze_complexity(self, pattern: str) -> dict:
200
+ """
201
+ Analyze regex pattern complexity.
202
+
203
+ Args:
204
+ pattern: Pattern to analyze
205
+
206
+ Returns:
207
+ Dictionary with complexity metrics
208
+ """
209
+ try:
210
+ metrics = {
211
+ "length": len(pattern),
212
+ "quantifiers": len(re.findall(r"[+*?{]", pattern)),
213
+ "groups": len(re.findall(r"\(", pattern)),
214
+ "alternations": len(re.findall(r"\|", pattern)),
215
+ "character_classes": len(re.findall(r"\[", pattern)),
216
+ "anchors": len(re.findall(r"[\^$]", pattern)),
217
+ "complexity_score": 0,
218
+ }
219
+
220
+ # Calculate complexity score
221
+ metrics["complexity_score"] = (
222
+ int(metrics["length"] * 0.1)
223
+ + metrics["quantifiers"] * 2
224
+ + int(metrics["groups"] * 1.5)
225
+ + metrics["alternations"] * 3
226
+ + metrics["character_classes"] * 1
227
+ )
228
+
229
+ return metrics
230
+
231
+ except Exception as e:
232
+ log_warning(f"Complexity analysis error: {e}")
233
+ return {"error": str(e)}
234
+
235
+ def suggest_safer_pattern(self, pattern: str) -> str | None:
236
+ """
237
+ Suggest a safer alternative for dangerous patterns.
238
+
239
+ Args:
240
+ pattern: Original pattern
241
+
242
+ Returns:
243
+ Suggested safer pattern, or None if no suggestion available
244
+ """
245
+ # Only suggest for patterns that are actually dangerous
246
+ is_dangerous = self._check_dangerous_patterns(pattern)
247
+ if not is_dangerous:
248
+ return None
249
+
250
+ # Simple pattern replacements for common dangerous cases
251
+ replacements = {
252
+ r"\(.+\)\+": r"[^\\s]+", # Replace (a+)+ with [^\s]+
253
+ r"\(.*\)\*": r"[^\\s]*", # Replace (.*)* with [^\s]*
254
+ }
255
+
256
+ for dangerous, safer in replacements.items():
257
+ if re.search(dangerous, pattern):
258
+ suggested = re.sub(dangerous, safer, pattern)
259
+ log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
260
+ return suggested
261
+
262
+ return None
263
+
264
+ def get_safe_flags(self) -> int:
265
+ """
266
+ Get recommended safe regex flags.
267
+
268
+ Returns:
269
+ Combination of safe regex flags
270
+ """
271
+ # Use flags that prevent some ReDoS attacks
272
+ return re.MULTILINE | re.DOTALL
273
+
274
+ def create_safe_pattern(
275
+ self, pattern: str, flags: int | None = None
276
+ ) -> re.Pattern | None:
277
+ """
278
+ Create a safely compiled regex pattern.
279
+
280
+ Args:
281
+ pattern: Pattern to compile
282
+ flags: Optional regex flags
283
+
284
+ Returns:
285
+ Compiled pattern if safe, None if dangerous
286
+ """
287
+ is_safe, error = self.validate_pattern(pattern)
288
+ if not is_safe:
289
+ log_warning(f"Cannot create unsafe pattern: {error}")
290
+ return None
291
+
292
+ try:
293
+ safe_flags = flags if flags is not None else self.get_safe_flags()
294
+ return re.compile(pattern, safe_flags)
295
+ except re.error as e:
296
+ log_warning(f"Pattern compilation failed: {e}")
297
+ return None