tree-sitter-analyzer 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/cli/commands/base_command.py +3 -1
- tree_sitter_analyzer/mcp/server.py +10 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +677 -673
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +4 -9
- tree_sitter_analyzer/project_detector.py +317 -317
- tree_sitter_analyzer/security/__init__.py +22 -22
- tree_sitter_analyzer/security/boundary_manager.py +44 -2
- tree_sitter_analyzer/security/regex_checker.py +292 -292
- tree_sitter_analyzer/security/validator.py +5 -2
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.8.3.dist-info}/METADATA +7 -6
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.8.3.dist-info}/RECORD +14 -14
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.8.3.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.8.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,292 +1,292 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Regex Safety Checker for Tree-sitter Analyzer
|
|
4
|
-
|
|
5
|
-
Provides ReDoS (Regular Expression Denial of Service) attack prevention
|
|
6
|
-
by analyzing regex patterns for potentially dangerous constructs.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import re
|
|
10
|
-
import time
|
|
11
|
-
from typing import List, Optional, Tuple
|
|
12
|
-
|
|
13
|
-
from ..exceptions import SecurityError
|
|
14
|
-
from ..utils import log_debug, log_warning
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class RegexSafetyChecker:
|
|
18
|
-
"""
|
|
19
|
-
Regex safety checker for ReDoS attack prevention.
|
|
20
|
-
|
|
21
|
-
This class analyzes regular expressions for patterns that could
|
|
22
|
-
lead to catastrophic backtracking and ReDoS attacks.
|
|
23
|
-
|
|
24
|
-
Features:
|
|
25
|
-
- Pattern complexity analysis
|
|
26
|
-
- Dangerous construct detection
|
|
27
|
-
- Execution time monitoring
|
|
28
|
-
- Safe pattern compilation
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
# Maximum allowed pattern length
|
|
32
|
-
MAX_PATTERN_LENGTH = 1000
|
|
33
|
-
|
|
34
|
-
# Maximum execution time for pattern testing (seconds)
|
|
35
|
-
MAX_EXECUTION_TIME = 1.0
|
|
36
|
-
|
|
37
|
-
# Dangerous regex patterns that can cause ReDoS
|
|
38
|
-
DANGEROUS_PATTERNS = [
|
|
39
|
-
# Nested quantifiers
|
|
40
|
-
r'\(.+\)\+', # (a+)+
|
|
41
|
-
r'\(.*\)\*', # (a*)*
|
|
42
|
-
r'\(.{0,}\)\+', # (.{0,})+
|
|
43
|
-
r'\(.+\)\{.*\}', # (a+){n,m}
|
|
44
|
-
|
|
45
|
-
# Alternation with overlap
|
|
46
|
-
r'\(a\|a\)\*', # (a|a)*
|
|
47
|
-
r'\([^|]*\|[^|]*\)\+', # (abc|abd)+
|
|
48
|
-
|
|
49
|
-
# Exponential backtracking patterns
|
|
50
|
-
r'\(.*\)\1', # (.*)\1 - backreference
|
|
51
|
-
r'\(\?\=.*\)\+', # (?=.*)+
|
|
52
|
-
r'\(\?\!.*\)\+', # (?!.*)+
|
|
53
|
-
r'\(\?\<\=.*\)\+', # (?<=.*)+
|
|
54
|
-
r'\(\?\<\!.*\)\+', # (?<!.*)+
|
|
55
|
-
|
|
56
|
-
# Catastrophic patterns
|
|
57
|
-
r'\([^)]*\+[^)]*\)\+', # Nested + quantifiers
|
|
58
|
-
r'\([^)]*\*[^)]*\)\*', # Nested * quantifiers
|
|
59
|
-
]
|
|
60
|
-
|
|
61
|
-
def __init__(self) -> None:
|
|
62
|
-
"""Initialize regex safety checker."""
|
|
63
|
-
log_debug("RegexSafetyChecker initialized")
|
|
64
|
-
|
|
65
|
-
def validate_pattern(self, pattern: str) -> Tuple[bool, str]:
|
|
66
|
-
"""
|
|
67
|
-
Validate regex pattern for safety.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
pattern: Regex pattern to validate
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
Tuple of (is_safe, error_message)
|
|
74
|
-
|
|
75
|
-
Example:
|
|
76
|
-
>>> checker = RegexSafetyChecker()
|
|
77
|
-
>>> is_safe, error = checker.validate_pattern(r"hello.*world")
|
|
78
|
-
>>> assert is_safe
|
|
79
|
-
"""
|
|
80
|
-
try:
|
|
81
|
-
# Basic validation
|
|
82
|
-
if not pattern or not isinstance(pattern, str):
|
|
83
|
-
return False, "Pattern must be a non-empty string"
|
|
84
|
-
|
|
85
|
-
# Length check
|
|
86
|
-
if len(pattern) > self.MAX_PATTERN_LENGTH:
|
|
87
|
-
return False, f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}"
|
|
88
|
-
|
|
89
|
-
# Check for dangerous patterns
|
|
90
|
-
dangerous_found = self._check_dangerous_patterns(pattern)
|
|
91
|
-
if dangerous_found:
|
|
92
|
-
return False, f"Potentially dangerous regex pattern detected: {dangerous_found}"
|
|
93
|
-
|
|
94
|
-
# Compilation check
|
|
95
|
-
compilation_error = self._check_compilation(pattern)
|
|
96
|
-
if compilation_error:
|
|
97
|
-
return False, f"Invalid regex pattern: {compilation_error}"
|
|
98
|
-
|
|
99
|
-
# Performance check
|
|
100
|
-
performance_error = self._check_performance(pattern)
|
|
101
|
-
if performance_error:
|
|
102
|
-
return False, f"Pattern performance issue: {performance_error}"
|
|
103
|
-
|
|
104
|
-
log_debug(f"Regex pattern validation passed: {pattern}")
|
|
105
|
-
return True, ""
|
|
106
|
-
|
|
107
|
-
except Exception as e:
|
|
108
|
-
log_warning(f"Regex validation error: {e}")
|
|
109
|
-
return False, f"Validation error: {str(e)}"
|
|
110
|
-
|
|
111
|
-
def _check_dangerous_patterns(self, pattern: str) -> Optional[str]:
|
|
112
|
-
"""
|
|
113
|
-
Check for known dangerous regex patterns.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
pattern: Pattern to check
|
|
117
|
-
|
|
118
|
-
Returns:
|
|
119
|
-
Description of dangerous pattern found, or None if safe
|
|
120
|
-
"""
|
|
121
|
-
for dangerous_pattern in self.DANGEROUS_PATTERNS:
|
|
122
|
-
try:
|
|
123
|
-
if re.search(dangerous_pattern, pattern):
|
|
124
|
-
log_warning(f"Dangerous pattern detected: {dangerous_pattern} in {pattern}")
|
|
125
|
-
return dangerous_pattern
|
|
126
|
-
except re.error:
|
|
127
|
-
# If the dangerous pattern itself is invalid, skip it
|
|
128
|
-
continue
|
|
129
|
-
|
|
130
|
-
return None
|
|
131
|
-
|
|
132
|
-
def _check_compilation(self, pattern: str) -> Optional[str]:
|
|
133
|
-
"""
|
|
134
|
-
Check if pattern compiles successfully.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
pattern: Pattern to compile
|
|
138
|
-
|
|
139
|
-
Returns:
|
|
140
|
-
Error message if compilation fails, None if successful
|
|
141
|
-
"""
|
|
142
|
-
try:
|
|
143
|
-
re.compile(pattern)
|
|
144
|
-
return None
|
|
145
|
-
except re.error as e:
|
|
146
|
-
log_warning(f"Regex compilation failed: {e}")
|
|
147
|
-
return str(e)
|
|
148
|
-
|
|
149
|
-
def _check_performance(self, pattern: str) -> Optional[str]:
|
|
150
|
-
"""
|
|
151
|
-
Check pattern performance with test strings.
|
|
152
|
-
|
|
153
|
-
Args:
|
|
154
|
-
pattern: Pattern to test
|
|
155
|
-
|
|
156
|
-
Returns:
|
|
157
|
-
Error message if performance is poor, None if acceptable
|
|
158
|
-
"""
|
|
159
|
-
try:
|
|
160
|
-
compiled_pattern = re.compile(pattern)
|
|
161
|
-
|
|
162
|
-
# Test strings that might cause backtracking
|
|
163
|
-
test_strings = [
|
|
164
|
-
"a" * 100, # Long string of same character
|
|
165
|
-
"ab" * 50, # Alternating pattern
|
|
166
|
-
"x" * 50 + "y", # Long string with different ending
|
|
167
|
-
"a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
|
|
168
|
-
]
|
|
169
|
-
|
|
170
|
-
for test_string in test_strings:
|
|
171
|
-
start_time = time.time()
|
|
172
|
-
|
|
173
|
-
try:
|
|
174
|
-
# Test both search and match operations
|
|
175
|
-
compiled_pattern.search(test_string)
|
|
176
|
-
compiled_pattern.match(test_string)
|
|
177
|
-
|
|
178
|
-
execution_time = time.time() - start_time
|
|
179
|
-
|
|
180
|
-
if execution_time > self.MAX_EXECUTION_TIME:
|
|
181
|
-
log_warning(
|
|
182
|
-
f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
|
|
183
|
-
)
|
|
184
|
-
return f"Pattern execution too slow: {execution_time:.3f}s"
|
|
185
|
-
|
|
186
|
-
except Exception as e:
|
|
187
|
-
log_warning(f"Regex execution error: {e}")
|
|
188
|
-
return f"Pattern execution error: {str(e)}"
|
|
189
|
-
|
|
190
|
-
return None
|
|
191
|
-
|
|
192
|
-
except Exception as e:
|
|
193
|
-
log_warning(f"Performance check error: {e}")
|
|
194
|
-
return f"Performance check failed: {str(e)}"
|
|
195
|
-
|
|
196
|
-
def analyze_complexity(self, pattern: str) -> dict:
|
|
197
|
-
"""
|
|
198
|
-
Analyze regex pattern complexity.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
pattern: Pattern to analyze
|
|
202
|
-
|
|
203
|
-
Returns:
|
|
204
|
-
Dictionary with complexity metrics
|
|
205
|
-
"""
|
|
206
|
-
try:
|
|
207
|
-
metrics = {
|
|
208
|
-
"length": len(pattern),
|
|
209
|
-
"quantifiers": len(re.findall(r'[+*?{]', pattern)),
|
|
210
|
-
"groups": len(re.findall(r'\(', pattern)),
|
|
211
|
-
"alternations": len(re.findall(r'\|', pattern)),
|
|
212
|
-
"character_classes": len(re.findall(r'\[', pattern)),
|
|
213
|
-
"anchors": len(re.findall(r'[\^$]', pattern)),
|
|
214
|
-
"complexity_score": 0,
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
# Calculate complexity score
|
|
218
|
-
metrics["complexity_score"] = (
|
|
219
|
-
metrics["length"] * 0.1 +
|
|
220
|
-
metrics["quantifiers"] * 2 +
|
|
221
|
-
metrics["groups"] * 1.5 +
|
|
222
|
-
metrics["alternations"] * 3 +
|
|
223
|
-
metrics["character_classes"] * 1
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
return metrics
|
|
227
|
-
|
|
228
|
-
except Exception as e:
|
|
229
|
-
log_warning(f"Complexity analysis error: {e}")
|
|
230
|
-
return {"error": str(e)}
|
|
231
|
-
|
|
232
|
-
def suggest_safer_pattern(self, pattern: str) -> Optional[str]:
|
|
233
|
-
"""
|
|
234
|
-
Suggest a safer alternative for dangerous patterns.
|
|
235
|
-
|
|
236
|
-
Args:
|
|
237
|
-
pattern: Original pattern
|
|
238
|
-
|
|
239
|
-
Returns:
|
|
240
|
-
Suggested safer pattern, or None if no suggestion available
|
|
241
|
-
"""
|
|
242
|
-
# Only suggest for patterns that are actually dangerous
|
|
243
|
-
is_dangerous = self._check_dangerous_patterns(pattern)
|
|
244
|
-
if not is_dangerous:
|
|
245
|
-
return None
|
|
246
|
-
|
|
247
|
-
# Simple pattern replacements for common dangerous cases
|
|
248
|
-
replacements = {
|
|
249
|
-
r'\(.+\)\+': r'[^\\s]+', # Replace (a+)+ with [^\s]+
|
|
250
|
-
r'\(.*\)\*': r'[^\\s]*', # Replace (.*)* with [^\s]*
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
for dangerous, safer in replacements.items():
|
|
254
|
-
if re.search(dangerous, pattern):
|
|
255
|
-
suggested = re.sub(dangerous, safer, pattern)
|
|
256
|
-
log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
|
|
257
|
-
return suggested
|
|
258
|
-
|
|
259
|
-
return None
|
|
260
|
-
|
|
261
|
-
def get_safe_flags(self) -> int:
|
|
262
|
-
"""
|
|
263
|
-
Get recommended safe regex flags.
|
|
264
|
-
|
|
265
|
-
Returns:
|
|
266
|
-
Combination of safe regex flags
|
|
267
|
-
"""
|
|
268
|
-
# Use flags that prevent some ReDoS attacks
|
|
269
|
-
return re.MULTILINE | re.DOTALL
|
|
270
|
-
|
|
271
|
-
def create_safe_pattern(self, pattern: str, flags: Optional[int] = None) -> Optional[re.Pattern]:
|
|
272
|
-
"""
|
|
273
|
-
Create a safely compiled regex pattern.
|
|
274
|
-
|
|
275
|
-
Args:
|
|
276
|
-
pattern: Pattern to compile
|
|
277
|
-
flags: Optional regex flags
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
Compiled pattern if safe, None if dangerous
|
|
281
|
-
"""
|
|
282
|
-
is_safe, error = self.validate_pattern(pattern)
|
|
283
|
-
if not is_safe:
|
|
284
|
-
log_warning(f"Cannot create unsafe pattern: {error}")
|
|
285
|
-
return None
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
safe_flags = flags if flags is not None else self.get_safe_flags()
|
|
289
|
-
return re.compile(pattern, safe_flags)
|
|
290
|
-
except re.error as e:
|
|
291
|
-
log_warning(f"Pattern compilation failed: {e}")
|
|
292
|
-
return None
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Regex Safety Checker for Tree-sitter Analyzer
|
|
4
|
+
|
|
5
|
+
Provides ReDoS (Regular Expression Denial of Service) attack prevention
|
|
6
|
+
by analyzing regex patterns for potentially dangerous constructs.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
import time
|
|
11
|
+
from typing import List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from ..exceptions import SecurityError
|
|
14
|
+
from ..utils import log_debug, log_warning
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RegexSafetyChecker:
|
|
18
|
+
"""
|
|
19
|
+
Regex safety checker for ReDoS attack prevention.
|
|
20
|
+
|
|
21
|
+
This class analyzes regular expressions for patterns that could
|
|
22
|
+
lead to catastrophic backtracking and ReDoS attacks.
|
|
23
|
+
|
|
24
|
+
Features:
|
|
25
|
+
- Pattern complexity analysis
|
|
26
|
+
- Dangerous construct detection
|
|
27
|
+
- Execution time monitoring
|
|
28
|
+
- Safe pattern compilation
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
# Maximum allowed pattern length
|
|
32
|
+
MAX_PATTERN_LENGTH = 1000
|
|
33
|
+
|
|
34
|
+
# Maximum execution time for pattern testing (seconds)
|
|
35
|
+
MAX_EXECUTION_TIME = 1.0
|
|
36
|
+
|
|
37
|
+
# Dangerous regex patterns that can cause ReDoS
|
|
38
|
+
DANGEROUS_PATTERNS = [
|
|
39
|
+
# Nested quantifiers
|
|
40
|
+
r'\(.+\)\+', # (a+)+
|
|
41
|
+
r'\(.*\)\*', # (a*)*
|
|
42
|
+
r'\(.{0,}\)\+', # (.{0,})+
|
|
43
|
+
r'\(.+\)\{.*\}', # (a+){n,m}
|
|
44
|
+
|
|
45
|
+
# Alternation with overlap
|
|
46
|
+
r'\(a\|a\)\*', # (a|a)*
|
|
47
|
+
r'\([^|]*\|[^|]*\)\+', # (abc|abd)+
|
|
48
|
+
|
|
49
|
+
# Exponential backtracking patterns
|
|
50
|
+
r'\(.*\)\1', # (.*)\1 - backreference
|
|
51
|
+
r'\(\?\=.*\)\+', # (?=.*)+
|
|
52
|
+
r'\(\?\!.*\)\+', # (?!.*)+
|
|
53
|
+
r'\(\?\<\=.*\)\+', # (?<=.*)+
|
|
54
|
+
r'\(\?\<\!.*\)\+', # (?<!.*)+
|
|
55
|
+
|
|
56
|
+
# Catastrophic patterns
|
|
57
|
+
r'\([^)]*\+[^)]*\)\+', # Nested + quantifiers
|
|
58
|
+
r'\([^)]*\*[^)]*\)\*', # Nested * quantifiers
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
def __init__(self) -> None:
|
|
62
|
+
"""Initialize regex safety checker."""
|
|
63
|
+
log_debug("RegexSafetyChecker initialized")
|
|
64
|
+
|
|
65
|
+
def validate_pattern(self, pattern: str) -> Tuple[bool, str]:
|
|
66
|
+
"""
|
|
67
|
+
Validate regex pattern for safety.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
pattern: Regex pattern to validate
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (is_safe, error_message)
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> checker = RegexSafetyChecker()
|
|
77
|
+
>>> is_safe, error = checker.validate_pattern(r"hello.*world")
|
|
78
|
+
>>> assert is_safe
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
# Basic validation
|
|
82
|
+
if not pattern or not isinstance(pattern, str):
|
|
83
|
+
return False, "Pattern must be a non-empty string"
|
|
84
|
+
|
|
85
|
+
# Length check
|
|
86
|
+
if len(pattern) > self.MAX_PATTERN_LENGTH:
|
|
87
|
+
return False, f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}"
|
|
88
|
+
|
|
89
|
+
# Check for dangerous patterns
|
|
90
|
+
dangerous_found = self._check_dangerous_patterns(pattern)
|
|
91
|
+
if dangerous_found:
|
|
92
|
+
return False, f"Potentially dangerous regex pattern detected: {dangerous_found}"
|
|
93
|
+
|
|
94
|
+
# Compilation check
|
|
95
|
+
compilation_error = self._check_compilation(pattern)
|
|
96
|
+
if compilation_error:
|
|
97
|
+
return False, f"Invalid regex pattern: {compilation_error}"
|
|
98
|
+
|
|
99
|
+
# Performance check
|
|
100
|
+
performance_error = self._check_performance(pattern)
|
|
101
|
+
if performance_error:
|
|
102
|
+
return False, f"Pattern performance issue: {performance_error}"
|
|
103
|
+
|
|
104
|
+
log_debug(f"Regex pattern validation passed: {pattern}")
|
|
105
|
+
return True, ""
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
log_warning(f"Regex validation error: {e}")
|
|
109
|
+
return False, f"Validation error: {str(e)}"
|
|
110
|
+
|
|
111
|
+
def _check_dangerous_patterns(self, pattern: str) -> Optional[str]:
|
|
112
|
+
"""
|
|
113
|
+
Check for known dangerous regex patterns.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
pattern: Pattern to check
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Description of dangerous pattern found, or None if safe
|
|
120
|
+
"""
|
|
121
|
+
for dangerous_pattern in self.DANGEROUS_PATTERNS:
|
|
122
|
+
try:
|
|
123
|
+
if re.search(dangerous_pattern, pattern):
|
|
124
|
+
log_warning(f"Dangerous pattern detected: {dangerous_pattern} in {pattern}")
|
|
125
|
+
return dangerous_pattern
|
|
126
|
+
except re.error:
|
|
127
|
+
# If the dangerous pattern itself is invalid, skip it
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def _check_compilation(self, pattern: str) -> Optional[str]:
|
|
133
|
+
"""
|
|
134
|
+
Check if pattern compiles successfully.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
pattern: Pattern to compile
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Error message if compilation fails, None if successful
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
re.compile(pattern)
|
|
144
|
+
return None
|
|
145
|
+
except re.error as e:
|
|
146
|
+
log_warning(f"Regex compilation failed: {e}")
|
|
147
|
+
return str(e)
|
|
148
|
+
|
|
149
|
+
def _check_performance(self, pattern: str) -> Optional[str]:
|
|
150
|
+
"""
|
|
151
|
+
Check pattern performance with test strings.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
pattern: Pattern to test
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Error message if performance is poor, None if acceptable
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
compiled_pattern = re.compile(pattern)
|
|
161
|
+
|
|
162
|
+
# Test strings that might cause backtracking
|
|
163
|
+
test_strings = [
|
|
164
|
+
"a" * 100, # Long string of same character
|
|
165
|
+
"ab" * 50, # Alternating pattern
|
|
166
|
+
"x" * 50 + "y", # Long string with different ending
|
|
167
|
+
"a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
for test_string in test_strings:
|
|
171
|
+
start_time = time.time()
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
# Test both search and match operations
|
|
175
|
+
compiled_pattern.search(test_string)
|
|
176
|
+
compiled_pattern.match(test_string)
|
|
177
|
+
|
|
178
|
+
execution_time = time.time() - start_time
|
|
179
|
+
|
|
180
|
+
if execution_time > self.MAX_EXECUTION_TIME:
|
|
181
|
+
log_warning(
|
|
182
|
+
f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
|
|
183
|
+
)
|
|
184
|
+
return f"Pattern execution too slow: {execution_time:.3f}s"
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
log_warning(f"Regex execution error: {e}")
|
|
188
|
+
return f"Pattern execution error: {str(e)}"
|
|
189
|
+
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
log_warning(f"Performance check error: {e}")
|
|
194
|
+
return f"Performance check failed: {str(e)}"
|
|
195
|
+
|
|
196
|
+
def analyze_complexity(self, pattern: str) -> dict:
|
|
197
|
+
"""
|
|
198
|
+
Analyze regex pattern complexity.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
pattern: Pattern to analyze
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Dictionary with complexity metrics
|
|
205
|
+
"""
|
|
206
|
+
try:
|
|
207
|
+
metrics = {
|
|
208
|
+
"length": len(pattern),
|
|
209
|
+
"quantifiers": len(re.findall(r'[+*?{]', pattern)),
|
|
210
|
+
"groups": len(re.findall(r'\(', pattern)),
|
|
211
|
+
"alternations": len(re.findall(r'\|', pattern)),
|
|
212
|
+
"character_classes": len(re.findall(r'\[', pattern)),
|
|
213
|
+
"anchors": len(re.findall(r'[\^$]', pattern)),
|
|
214
|
+
"complexity_score": 0,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Calculate complexity score
|
|
218
|
+
metrics["complexity_score"] = (
|
|
219
|
+
metrics["length"] * 0.1 +
|
|
220
|
+
metrics["quantifiers"] * 2 +
|
|
221
|
+
metrics["groups"] * 1.5 +
|
|
222
|
+
metrics["alternations"] * 3 +
|
|
223
|
+
metrics["character_classes"] * 1
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return metrics
|
|
227
|
+
|
|
228
|
+
except Exception as e:
|
|
229
|
+
log_warning(f"Complexity analysis error: {e}")
|
|
230
|
+
return {"error": str(e)}
|
|
231
|
+
|
|
232
|
+
def suggest_safer_pattern(self, pattern: str) -> Optional[str]:
|
|
233
|
+
"""
|
|
234
|
+
Suggest a safer alternative for dangerous patterns.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
pattern: Original pattern
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Suggested safer pattern, or None if no suggestion available
|
|
241
|
+
"""
|
|
242
|
+
# Only suggest for patterns that are actually dangerous
|
|
243
|
+
is_dangerous = self._check_dangerous_patterns(pattern)
|
|
244
|
+
if not is_dangerous:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
# Simple pattern replacements for common dangerous cases
|
|
248
|
+
replacements = {
|
|
249
|
+
r'\(.+\)\+': r'[^\\s]+', # Replace (a+)+ with [^\s]+
|
|
250
|
+
r'\(.*\)\*': r'[^\\s]*', # Replace (.*)* with [^\s]*
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
for dangerous, safer in replacements.items():
|
|
254
|
+
if re.search(dangerous, pattern):
|
|
255
|
+
suggested = re.sub(dangerous, safer, pattern)
|
|
256
|
+
log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
|
|
257
|
+
return suggested
|
|
258
|
+
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
def get_safe_flags(self) -> int:
|
|
262
|
+
"""
|
|
263
|
+
Get recommended safe regex flags.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Combination of safe regex flags
|
|
267
|
+
"""
|
|
268
|
+
# Use flags that prevent some ReDoS attacks
|
|
269
|
+
return re.MULTILINE | re.DOTALL
|
|
270
|
+
|
|
271
|
+
def create_safe_pattern(self, pattern: str, flags: Optional[int] = None) -> Optional[re.Pattern]:
|
|
272
|
+
"""
|
|
273
|
+
Create a safely compiled regex pattern.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
pattern: Pattern to compile
|
|
277
|
+
flags: Optional regex flags
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Compiled pattern if safe, None if dangerous
|
|
281
|
+
"""
|
|
282
|
+
is_safe, error = self.validate_pattern(pattern)
|
|
283
|
+
if not is_safe:
|
|
284
|
+
log_warning(f"Cannot create unsafe pattern: {error}")
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
safe_flags = flags if flags is not None else self.get_safe_flags()
|
|
289
|
+
return re.compile(pattern, safe_flags)
|
|
290
|
+
except re.error as e:
|
|
291
|
+
log_warning(f"Pattern compilation failed: {e}")
|
|
292
|
+
return None
|
|
@@ -80,8 +80,11 @@ class SecurityValidator:
|
|
|
80
80
|
if len(file_path) > 1 and file_path[1] == ":" and os.name != 'nt':
|
|
81
81
|
return False, "Windows drive letters are not allowed on this system"
|
|
82
82
|
|
|
83
|
-
# Layer 4: Absolute path check
|
|
84
|
-
|
|
83
|
+
# Layer 4: Absolute path check (handle Windows leading slash/backslash explicitly)
|
|
84
|
+
is_abs = os.path.isabs(file_path) or (
|
|
85
|
+
os.name == 'nt' and (file_path.startswith('/') or file_path.startswith('\\'))
|
|
86
|
+
)
|
|
87
|
+
if is_abs:
|
|
85
88
|
# If we have a project root, check if the absolute path is within it
|
|
86
89
|
if self.boundary_manager and self.boundary_manager.project_root:
|
|
87
90
|
if not self.boundary_manager.is_within_project(file_path):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tree-sitter-analyzer
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.3
|
|
4
4
|
Summary: Extensible multi-language code analyzer framework using Tree-sitter with dynamic plugin architecture
|
|
5
5
|
Project-URL: Homepage, https://github.com/aimasteracc/tree-sitter-analyzer
|
|
6
6
|
Project-URL: Documentation, https://github.com/aimasteracc/tree-sitter-analyzer#readme
|
|
@@ -344,11 +344,12 @@ This project maintains **enterprise-grade quality** with comprehensive testing:
|
|
|
344
344
|
- **Zero test failures** - Complete CI/CD readiness
|
|
345
345
|
- **Cross-platform compatibility** - Windows, macOS, Linux
|
|
346
346
|
|
|
347
|
-
### 🏆 Recent Quality Achievements (v0.8.2)
|
|
348
|
-
- ✅ **Complete test suite stabilization** -
|
|
349
|
-
- ✅ **
|
|
350
|
-
- ✅ **
|
|
351
|
-
- ✅ **
|
|
347
|
+
### 🏆 Recent Quality Achievements (v0.8.2+)
|
|
348
|
+
- ✅ **Complete test suite stabilization** - All 1358 tests passing
|
|
349
|
+
- ✅ **Windows compatibility improvements** - Fixed path handling and security validation
|
|
350
|
+
- ✅ **Enhanced error messaging** - Consistent CLI and MCP error reporting
|
|
351
|
+
- ✅ **Performance optimizations** - Improved timing accuracy in analysis tools
|
|
352
|
+
- ✅ **Security framework enhancements** - Better project boundary management
|
|
352
353
|
|
|
353
354
|
### 🔧 Running Tests
|
|
354
355
|
```bash
|