tree-sitter-analyzer 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (32) hide show
  1. tree_sitter_analyzer/cli/commands/default_command.py +18 -18
  2. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -141
  3. tree_sitter_analyzer/cli/commands/query_command.py +92 -88
  4. tree_sitter_analyzer/cli/commands/table_command.py +235 -235
  5. tree_sitter_analyzer/cli/info_commands.py +121 -121
  6. tree_sitter_analyzer/cli_main.py +307 -307
  7. tree_sitter_analyzer/core/analysis_engine.py +584 -584
  8. tree_sitter_analyzer/core/cache_service.py +5 -4
  9. tree_sitter_analyzer/core/query.py +502 -502
  10. tree_sitter_analyzer/encoding_utils.py +6 -2
  11. tree_sitter_analyzer/exceptions.py +400 -406
  12. tree_sitter_analyzer/formatters/java_formatter.py +291 -291
  13. tree_sitter_analyzer/formatters/python_formatter.py +259 -259
  14. tree_sitter_analyzer/interfaces/mcp_server.py +426 -425
  15. tree_sitter_analyzer/language_detector.py +398 -398
  16. tree_sitter_analyzer/language_loader.py +224 -224
  17. tree_sitter_analyzer/languages/java_plugin.py +1202 -1202
  18. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +559 -555
  19. tree_sitter_analyzer/mcp/server.py +30 -9
  20. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +21 -4
  21. tree_sitter_analyzer/mcp/tools/table_format_tool.py +22 -4
  22. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -567
  23. tree_sitter_analyzer/models.py +470 -470
  24. tree_sitter_analyzer/security/__init__.py +22 -22
  25. tree_sitter_analyzer/security/boundary_manager.py +243 -243
  26. tree_sitter_analyzer/security/regex_checker.py +297 -292
  27. tree_sitter_analyzer/table_formatter.py +703 -652
  28. tree_sitter_analyzer/utils.py +50 -19
  29. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/METADATA +1 -1
  30. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/RECORD +32 -32
  31. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/WHEEL +0 -0
  32. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/entry_points.txt +0 -0
@@ -1,292 +1,297 @@
1
- #!/usr/bin/env python3
2
- """
3
- Regex Safety Checker for Tree-sitter Analyzer
4
-
5
- Provides ReDoS (Regular Expression Denial of Service) attack prevention
6
- by analyzing regex patterns for potentially dangerous constructs.
7
- """
8
-
9
- import re
10
- import time
11
- from typing import List, Optional, Tuple
12
-
13
- from ..exceptions import SecurityError
14
- from ..utils import log_debug, log_warning
15
-
16
-
17
- class RegexSafetyChecker:
18
- """
19
- Regex safety checker for ReDoS attack prevention.
20
-
21
- This class analyzes regular expressions for patterns that could
22
- lead to catastrophic backtracking and ReDoS attacks.
23
-
24
- Features:
25
- - Pattern complexity analysis
26
- - Dangerous construct detection
27
- - Execution time monitoring
28
- - Safe pattern compilation
29
- """
30
-
31
- # Maximum allowed pattern length
32
- MAX_PATTERN_LENGTH = 1000
33
-
34
- # Maximum execution time for pattern testing (seconds)
35
- MAX_EXECUTION_TIME = 1.0
36
-
37
- # Dangerous regex patterns that can cause ReDoS
38
- DANGEROUS_PATTERNS = [
39
- # Nested quantifiers
40
- r'\(.+\)\+', # (a+)+
41
- r'\(.*\)\*', # (a*)*
42
- r'\(.{0,}\)\+', # (.{0,})+
43
- r'\(.+\)\{.*\}', # (a+){n,m}
44
-
45
- # Alternation with overlap
46
- r'\(a\|a\)\*', # (a|a)*
47
- r'\([^|]*\|[^|]*\)\+', # (abc|abd)+
48
-
49
- # Exponential backtracking patterns
50
- r'\(.*\)\1', # (.*)\1 - backreference
51
- r'\(\?\=.*\)\+', # (?=.*)+
52
- r'\(\?\!.*\)\+', # (?!.*)+
53
- r'\(\?\<\=.*\)\+', # (?<=.*)+
54
- r'\(\?\<\!.*\)\+', # (?<!.*)+
55
-
56
- # Catastrophic patterns
57
- r'\([^)]*\+[^)]*\)\+', # Nested + quantifiers
58
- r'\([^)]*\*[^)]*\)\*', # Nested * quantifiers
59
- ]
60
-
61
- def __init__(self) -> None:
62
- """Initialize regex safety checker."""
63
- log_debug("RegexSafetyChecker initialized")
64
-
65
- def validate_pattern(self, pattern: str) -> Tuple[bool, str]:
66
- """
67
- Validate regex pattern for safety.
68
-
69
- Args:
70
- pattern: Regex pattern to validate
71
-
72
- Returns:
73
- Tuple of (is_safe, error_message)
74
-
75
- Example:
76
- >>> checker = RegexSafetyChecker()
77
- >>> is_safe, error = checker.validate_pattern(r"hello.*world")
78
- >>> assert is_safe
79
- """
80
- try:
81
- # Basic validation
82
- if not pattern or not isinstance(pattern, str):
83
- return False, "Pattern must be a non-empty string"
84
-
85
- # Length check
86
- if len(pattern) > self.MAX_PATTERN_LENGTH:
87
- return False, f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}"
88
-
89
- # Check for dangerous patterns
90
- dangerous_found = self._check_dangerous_patterns(pattern)
91
- if dangerous_found:
92
- return False, f"Potentially dangerous regex pattern detected: {dangerous_found}"
93
-
94
- # Compilation check
95
- compilation_error = self._check_compilation(pattern)
96
- if compilation_error:
97
- return False, f"Invalid regex pattern: {compilation_error}"
98
-
99
- # Performance check
100
- performance_error = self._check_performance(pattern)
101
- if performance_error:
102
- return False, f"Pattern performance issue: {performance_error}"
103
-
104
- log_debug(f"Regex pattern validation passed: {pattern}")
105
- return True, ""
106
-
107
- except Exception as e:
108
- log_warning(f"Regex validation error: {e}")
109
- return False, f"Validation error: {str(e)}"
110
-
111
- def _check_dangerous_patterns(self, pattern: str) -> Optional[str]:
112
- """
113
- Check for known dangerous regex patterns.
114
-
115
- Args:
116
- pattern: Pattern to check
117
-
118
- Returns:
119
- Description of dangerous pattern found, or None if safe
120
- """
121
- for dangerous_pattern in self.DANGEROUS_PATTERNS:
122
- try:
123
- if re.search(dangerous_pattern, pattern):
124
- log_warning(f"Dangerous pattern detected: {dangerous_pattern} in {pattern}")
125
- return dangerous_pattern
126
- except re.error:
127
- # If the dangerous pattern itself is invalid, skip it
128
- continue
129
-
130
- return None
131
-
132
- def _check_compilation(self, pattern: str) -> Optional[str]:
133
- """
134
- Check if pattern compiles successfully.
135
-
136
- Args:
137
- pattern: Pattern to compile
138
-
139
- Returns:
140
- Error message if compilation fails, None if successful
141
- """
142
- try:
143
- re.compile(pattern)
144
- return None
145
- except re.error as e:
146
- log_warning(f"Regex compilation failed: {e}")
147
- return str(e)
148
-
149
- def _check_performance(self, pattern: str) -> Optional[str]:
150
- """
151
- Check pattern performance with test strings.
152
-
153
- Args:
154
- pattern: Pattern to test
155
-
156
- Returns:
157
- Error message if performance is poor, None if acceptable
158
- """
159
- try:
160
- compiled_pattern = re.compile(pattern)
161
-
162
- # Test strings that might cause backtracking
163
- test_strings = [
164
- "a" * 100, # Long string of same character
165
- "ab" * 50, # Alternating pattern
166
- "x" * 50 + "y", # Long string with different ending
167
- "a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
168
- ]
169
-
170
- for test_string in test_strings:
171
- start_time = time.time()
172
-
173
- try:
174
- # Test both search and match operations
175
- compiled_pattern.search(test_string)
176
- compiled_pattern.match(test_string)
177
-
178
- execution_time = time.time() - start_time
179
-
180
- if execution_time > self.MAX_EXECUTION_TIME:
181
- log_warning(
182
- f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
183
- )
184
- return f"Pattern execution too slow: {execution_time:.3f}s"
185
-
186
- except Exception as e:
187
- log_warning(f"Regex execution error: {e}")
188
- return f"Pattern execution error: {str(e)}"
189
-
190
- return None
191
-
192
- except Exception as e:
193
- log_warning(f"Performance check error: {e}")
194
- return f"Performance check failed: {str(e)}"
195
-
196
- def analyze_complexity(self, pattern: str) -> dict:
197
- """
198
- Analyze regex pattern complexity.
199
-
200
- Args:
201
- pattern: Pattern to analyze
202
-
203
- Returns:
204
- Dictionary with complexity metrics
205
- """
206
- try:
207
- metrics = {
208
- "length": len(pattern),
209
- "quantifiers": len(re.findall(r'[+*?{]', pattern)),
210
- "groups": len(re.findall(r'\(', pattern)),
211
- "alternations": len(re.findall(r'\|', pattern)),
212
- "character_classes": len(re.findall(r'\[', pattern)),
213
- "anchors": len(re.findall(r'[\^$]', pattern)),
214
- "complexity_score": 0,
215
- }
216
-
217
- # Calculate complexity score
218
- metrics["complexity_score"] = (
219
- metrics["length"] * 0.1 +
220
- metrics["quantifiers"] * 2 +
221
- metrics["groups"] * 1.5 +
222
- metrics["alternations"] * 3 +
223
- metrics["character_classes"] * 1
224
- )
225
-
226
- return metrics
227
-
228
- except Exception as e:
229
- log_warning(f"Complexity analysis error: {e}")
230
- return {"error": str(e)}
231
-
232
- def suggest_safer_pattern(self, pattern: str) -> Optional[str]:
233
- """
234
- Suggest a safer alternative for dangerous patterns.
235
-
236
- Args:
237
- pattern: Original pattern
238
-
239
- Returns:
240
- Suggested safer pattern, or None if no suggestion available
241
- """
242
- # Only suggest for patterns that are actually dangerous
243
- is_dangerous = self._check_dangerous_patterns(pattern)
244
- if not is_dangerous:
245
- return None
246
-
247
- # Simple pattern replacements for common dangerous cases
248
- replacements = {
249
- r'\(.+\)\+': r'[^\\s]+', # Replace (a+)+ with [^\s]+
250
- r'\(.*\)\*': r'[^\\s]*', # Replace (.*)* with [^\s]*
251
- }
252
-
253
- for dangerous, safer in replacements.items():
254
- if re.search(dangerous, pattern):
255
- suggested = re.sub(dangerous, safer, pattern)
256
- log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
257
- return suggested
258
-
259
- return None
260
-
261
- def get_safe_flags(self) -> int:
262
- """
263
- Get recommended safe regex flags.
264
-
265
- Returns:
266
- Combination of safe regex flags
267
- """
268
- # Use flags that prevent some ReDoS attacks
269
- return re.MULTILINE | re.DOTALL
270
-
271
- def create_safe_pattern(self, pattern: str, flags: Optional[int] = None) -> Optional[re.Pattern]:
272
- """
273
- Create a safely compiled regex pattern.
274
-
275
- Args:
276
- pattern: Pattern to compile
277
- flags: Optional regex flags
278
-
279
- Returns:
280
- Compiled pattern if safe, None if dangerous
281
- """
282
- is_safe, error = self.validate_pattern(pattern)
283
- if not is_safe:
284
- log_warning(f"Cannot create unsafe pattern: {error}")
285
- return None
286
-
287
- try:
288
- safe_flags = flags if flags is not None else self.get_safe_flags()
289
- return re.compile(pattern, safe_flags)
290
- except re.error as e:
291
- log_warning(f"Pattern compilation failed: {e}")
292
- return None
1
+ #!/usr/bin/env python3
2
+ """
3
+ Regex Safety Checker for Tree-sitter Analyzer
4
+
5
+ Provides ReDoS (Regular Expression Denial of Service) attack prevention
6
+ by analyzing regex patterns for potentially dangerous constructs.
7
+ """
8
+
9
+ import re
10
+ import time
11
+
12
+ from ..utils import log_debug, log_warning
13
+
14
+
15
+ class RegexSafetyChecker:
16
+ """
17
+ Regex safety checker for ReDoS attack prevention.
18
+
19
+ This class analyzes regular expressions for patterns that could
20
+ lead to catastrophic backtracking and ReDoS attacks.
21
+
22
+ Features:
23
+ - Pattern complexity analysis
24
+ - Dangerous construct detection
25
+ - Execution time monitoring
26
+ - Safe pattern compilation
27
+ """
28
+
29
+ # Maximum allowed pattern length
30
+ MAX_PATTERN_LENGTH = 1000
31
+
32
+ # Maximum execution time for pattern testing (seconds)
33
+ MAX_EXECUTION_TIME = 1.0
34
+
35
+ # Dangerous regex patterns that can cause ReDoS
36
+ DANGEROUS_PATTERNS = [
37
+ # Nested quantifiers
38
+ r"\(.+\)\+", # (a+)+
39
+ r"\(.*\)\*", # (a*)*
40
+ r"\(.{0,}\)\+", # (.{0,})+
41
+ r"\(.+\)\{.*\}", # (a+){n,m}
42
+ # Alternation with overlap
43
+ r"\(a\|a\)\*", # (a|a)*
44
+ r"\([^|]*\|[^|]*\)\+", # (abc|abd)+
45
+ # Exponential backtracking patterns
46
+ r"\(.*\)\1", # (.*)\1 - backreference
47
+ r"\(\?\=.*\)\+", # (?=.*)+
48
+ r"\(\?\!.*\)\+", # (?!.*)+
49
+ r"\(\?\<\=.*\)\+", # (?<=.*)+
50
+ r"\(\?\<\!.*\)\+", # (?<!.*)+
51
+ # Catastrophic patterns
52
+ r"\([^)]*\+[^)]*\)\+", # Nested + quantifiers
53
+ r"\([^)]*\*[^)]*\)\*", # Nested * quantifiers
54
+ ]
55
+
56
+ def __init__(self) -> None:
57
+ """Initialize regex safety checker."""
58
+ log_debug("RegexSafetyChecker initialized")
59
+
60
+ def validate_pattern(self, pattern: str) -> tuple[bool, str]:
61
+ """
62
+ Validate regex pattern for safety.
63
+
64
+ Args:
65
+ pattern: Regex pattern to validate
66
+
67
+ Returns:
68
+ Tuple of (is_safe, error_message)
69
+
70
+ Example:
71
+ >>> checker = RegexSafetyChecker()
72
+ >>> is_safe, error = checker.validate_pattern(r"hello.*world")
73
+ >>> assert is_safe
74
+ """
75
+ try:
76
+ # Basic validation
77
+ if not pattern or not isinstance(pattern, str):
78
+ return False, "Pattern must be a non-empty string"
79
+
80
+ # Length check
81
+ if len(pattern) > self.MAX_PATTERN_LENGTH:
82
+ return (
83
+ False,
84
+ f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}",
85
+ )
86
+
87
+ # Check for dangerous patterns
88
+ dangerous_found = self._check_dangerous_patterns(pattern)
89
+ if dangerous_found:
90
+ return (
91
+ False,
92
+ f"Potentially dangerous regex pattern detected: {dangerous_found}",
93
+ )
94
+
95
+ # Compilation check
96
+ compilation_error = self._check_compilation(pattern)
97
+ if compilation_error:
98
+ return False, f"Invalid regex pattern: {compilation_error}"
99
+
100
+ # Performance check
101
+ performance_error = self._check_performance(pattern)
102
+ if performance_error:
103
+ return False, f"Pattern performance issue: {performance_error}"
104
+
105
+ log_debug(f"Regex pattern validation passed: {pattern}")
106
+ return True, ""
107
+
108
+ except Exception as e:
109
+ log_warning(f"Regex validation error: {e}")
110
+ return False, f"Validation error: {str(e)}"
111
+
112
+ def _check_dangerous_patterns(self, pattern: str) -> str | None:
113
+ """
114
+ Check for known dangerous regex patterns.
115
+
116
+ Args:
117
+ pattern: Pattern to check
118
+
119
+ Returns:
120
+ Description of dangerous pattern found, or None if safe
121
+ """
122
+ for dangerous_pattern in self.DANGEROUS_PATTERNS:
123
+ try:
124
+ if re.search(dangerous_pattern, pattern):
125
+ log_warning(
126
+ f"Dangerous pattern detected: {dangerous_pattern} in {pattern}"
127
+ )
128
+ return dangerous_pattern
129
+ except re.error:
130
+ # If the dangerous pattern itself is invalid, skip it
131
+ continue
132
+
133
+ return None
134
+
135
+ def _check_compilation(self, pattern: str) -> str | None:
136
+ """
137
+ Check if pattern compiles successfully.
138
+
139
+ Args:
140
+ pattern: Pattern to compile
141
+
142
+ Returns:
143
+ Error message if compilation fails, None if successful
144
+ """
145
+ try:
146
+ re.compile(pattern)
147
+ return None
148
+ except re.error as e:
149
+ log_warning(f"Regex compilation failed: {e}")
150
+ return str(e)
151
+
152
+ def _check_performance(self, pattern: str) -> str | None:
153
+ """
154
+ Check pattern performance with test strings.
155
+
156
+ Args:
157
+ pattern: Pattern to test
158
+
159
+ Returns:
160
+ Error message if performance is poor, None if acceptable
161
+ """
162
+ try:
163
+ compiled_pattern = re.compile(pattern)
164
+
165
+ # Test strings that might cause backtracking
166
+ test_strings = [
167
+ "a" * 100, # Long string of same character
168
+ "ab" * 50, # Alternating pattern
169
+ "x" * 50 + "y", # Long string with different ending
170
+ "a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
171
+ ]
172
+
173
+ for test_string in test_strings:
174
+ start_time = time.time()
175
+
176
+ try:
177
+ # Test both search and match operations
178
+ compiled_pattern.search(test_string)
179
+ compiled_pattern.match(test_string)
180
+
181
+ execution_time = time.time() - start_time
182
+
183
+ if execution_time > self.MAX_EXECUTION_TIME:
184
+ log_warning(
185
+ f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
186
+ )
187
+ return f"Pattern execution too slow: {execution_time:.3f}s"
188
+
189
+ except Exception as e:
190
+ log_warning(f"Regex execution error: {e}")
191
+ return f"Pattern execution error: {str(e)}"
192
+
193
+ return None
194
+
195
+ except Exception as e:
196
+ log_warning(f"Performance check error: {e}")
197
+ return f"Performance check failed: {str(e)}"
198
+
199
+ def analyze_complexity(self, pattern: str) -> dict:
200
+ """
201
+ Analyze regex pattern complexity.
202
+
203
+ Args:
204
+ pattern: Pattern to analyze
205
+
206
+ Returns:
207
+ Dictionary with complexity metrics
208
+ """
209
+ try:
210
+ metrics = {
211
+ "length": len(pattern),
212
+ "quantifiers": len(re.findall(r"[+*?{]", pattern)),
213
+ "groups": len(re.findall(r"\(", pattern)),
214
+ "alternations": len(re.findall(r"\|", pattern)),
215
+ "character_classes": len(re.findall(r"\[", pattern)),
216
+ "anchors": len(re.findall(r"[\^$]", pattern)),
217
+ "complexity_score": 0,
218
+ }
219
+
220
+ # Calculate complexity score
221
+ metrics["complexity_score"] = (
222
+ metrics["length"] * 0.1
223
+ + metrics["quantifiers"] * 2
224
+ + metrics["groups"] * 1.5
225
+ + metrics["alternations"] * 3
226
+ + metrics["character_classes"] * 1
227
+ )
228
+
229
+ return metrics
230
+
231
+ except Exception as e:
232
+ log_warning(f"Complexity analysis error: {e}")
233
+ return {"error": str(e)}
234
+
235
+ def suggest_safer_pattern(self, pattern: str) -> str | None:
236
+ """
237
+ Suggest a safer alternative for dangerous patterns.
238
+
239
+ Args:
240
+ pattern: Original pattern
241
+
242
+ Returns:
243
+ Suggested safer pattern, or None if no suggestion available
244
+ """
245
+ # Only suggest for patterns that are actually dangerous
246
+ is_dangerous = self._check_dangerous_patterns(pattern)
247
+ if not is_dangerous:
248
+ return None
249
+
250
+ # Simple pattern replacements for common dangerous cases
251
+ replacements = {
252
+ r"\(.+\)\+": r"[^\\s]+", # Replace (a+)+ with [^\s]+
253
+ r"\(.*\)\*": r"[^\\s]*", # Replace (.*)* with [^\s]*
254
+ }
255
+
256
+ for dangerous, safer in replacements.items():
257
+ if re.search(dangerous, pattern):
258
+ suggested = re.sub(dangerous, safer, pattern)
259
+ log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
260
+ return suggested
261
+
262
+ return None
263
+
264
+ def get_safe_flags(self) -> int:
265
+ """
266
+ Get recommended safe regex flags.
267
+
268
+ Returns:
269
+ Combination of safe regex flags
270
+ """
271
+ # Use flags that prevent some ReDoS attacks
272
+ return re.MULTILINE | re.DOTALL
273
+
274
+ def create_safe_pattern(
275
+ self, pattern: str, flags: int | None = None
276
+ ) -> re.Pattern | None:
277
+ """
278
+ Create a safely compiled regex pattern.
279
+
280
+ Args:
281
+ pattern: Pattern to compile
282
+ flags: Optional regex flags
283
+
284
+ Returns:
285
+ Compiled pattern if safe, None if dangerous
286
+ """
287
+ is_safe, error = self.validate_pattern(pattern)
288
+ if not is_safe:
289
+ log_warning(f"Cannot create unsafe pattern: {error}")
290
+ return None
291
+
292
+ try:
293
+ safe_flags = flags if flags is not None else self.get_safe_flags()
294
+ return re.compile(pattern, safe_flags)
295
+ except re.error as e:
296
+ log_warning(f"Pattern compilation failed: {e}")
297
+ return None