tree-sitter-analyzer 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (69) hide show
  1. tree_sitter_analyzer/__init__.py +132 -132
  2. tree_sitter_analyzer/__main__.py +11 -11
  3. tree_sitter_analyzer/api.py +533 -533
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -12
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -26
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +160 -160
  9. tree_sitter_analyzer/cli/commands/default_command.py +18 -18
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +141 -141
  11. tree_sitter_analyzer/cli/commands/query_command.py +81 -81
  12. tree_sitter_analyzer/cli/commands/structure_command.py +138 -138
  13. tree_sitter_analyzer/cli/commands/summary_command.py +101 -101
  14. tree_sitter_analyzer/cli/commands/table_command.py +235 -235
  15. tree_sitter_analyzer/cli/info_commands.py +121 -121
  16. tree_sitter_analyzer/cli_main.py +297 -297
  17. tree_sitter_analyzer/core/__init__.py +15 -15
  18. tree_sitter_analyzer/core/analysis_engine.py +555 -555
  19. tree_sitter_analyzer/core/cache_service.py +320 -320
  20. tree_sitter_analyzer/core/engine.py +566 -566
  21. tree_sitter_analyzer/core/parser.py +293 -293
  22. tree_sitter_analyzer/encoding_utils.py +459 -459
  23. tree_sitter_analyzer/exceptions.py +406 -337
  24. tree_sitter_analyzer/file_handler.py +210 -210
  25. tree_sitter_analyzer/formatters/__init__.py +1 -1
  26. tree_sitter_analyzer/formatters/base_formatter.py +167 -167
  27. tree_sitter_analyzer/formatters/formatter_factory.py +78 -78
  28. tree_sitter_analyzer/interfaces/__init__.py +9 -9
  29. tree_sitter_analyzer/interfaces/cli.py +528 -528
  30. tree_sitter_analyzer/interfaces/cli_adapter.py +343 -343
  31. tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -206
  32. tree_sitter_analyzer/interfaces/mcp_server.py +425 -405
  33. tree_sitter_analyzer/languages/__init__.py +10 -10
  34. tree_sitter_analyzer/languages/javascript_plugin.py +446 -446
  35. tree_sitter_analyzer/languages/python_plugin.py +755 -755
  36. tree_sitter_analyzer/mcp/__init__.py +31 -31
  37. tree_sitter_analyzer/mcp/resources/__init__.py +44 -44
  38. tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -209
  39. tree_sitter_analyzer/mcp/server.py +346 -333
  40. tree_sitter_analyzer/mcp/tools/__init__.py +30 -30
  41. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +654 -654
  42. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -247
  43. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -54
  44. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +300 -300
  45. tree_sitter_analyzer/mcp/tools/table_format_tool.py +362 -362
  46. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +543 -543
  47. tree_sitter_analyzer/mcp/utils/__init__.py +107 -107
  48. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -549
  49. tree_sitter_analyzer/output_manager.py +253 -253
  50. tree_sitter_analyzer/plugins/__init__.py +280 -280
  51. tree_sitter_analyzer/plugins/base.py +529 -529
  52. tree_sitter_analyzer/plugins/manager.py +379 -379
  53. tree_sitter_analyzer/queries/__init__.py +26 -26
  54. tree_sitter_analyzer/queries/java.py +391 -391
  55. tree_sitter_analyzer/queries/javascript.py +148 -148
  56. tree_sitter_analyzer/queries/python.py +285 -285
  57. tree_sitter_analyzer/queries/typescript.py +229 -229
  58. tree_sitter_analyzer/query_loader.py +257 -257
  59. tree_sitter_analyzer/security/__init__.py +22 -0
  60. tree_sitter_analyzer/security/boundary_manager.py +237 -0
  61. tree_sitter_analyzer/security/regex_checker.py +292 -0
  62. tree_sitter_analyzer/security/validator.py +224 -0
  63. tree_sitter_analyzer/table_formatter.py +652 -589
  64. tree_sitter_analyzer/utils.py +277 -277
  65. {tree_sitter_analyzer-0.7.0.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/METADATA +4 -1
  66. tree_sitter_analyzer-0.8.0.dist-info/RECORD +76 -0
  67. tree_sitter_analyzer-0.7.0.dist-info/RECORD +0 -72
  68. {tree_sitter_analyzer-0.7.0.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/WHEEL +0 -0
  69. {tree_sitter_analyzer-0.7.0.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Project Boundary Manager for Tree-sitter Analyzer
4
+
5
+ Provides strict project boundary control to prevent access to files
6
+ outside the designated project directory.
7
+ """
8
+
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Optional, Set
12
+
13
+ from ..exceptions import SecurityError
14
+ from ..utils import log_debug, log_info, log_warning
15
+
16
+
17
+ class ProjectBoundaryManager:
18
+ """
19
+ Project boundary manager for access control.
20
+
21
+ This class enforces strict boundaries around project directories
22
+ to prevent unauthorized file access outside the project scope.
23
+
24
+ Features:
25
+ - Real path resolution for symlink protection
26
+ - Configurable allowed directories
27
+ - Comprehensive boundary checking
28
+ - Audit logging for security events
29
+ """
30
+
31
+ def __init__(self, project_root: str) -> None:
32
+ """
33
+ Initialize project boundary manager.
34
+
35
+ Args:
36
+ project_root: Root directory of the project
37
+
38
+ Raises:
39
+ SecurityError: If project root is invalid
40
+ """
41
+ if not project_root:
42
+ raise SecurityError("Project root cannot be empty")
43
+
44
+ if not os.path.exists(project_root):
45
+ raise SecurityError(f"Project root does not exist: {project_root}")
46
+
47
+ if not os.path.isdir(project_root):
48
+ raise SecurityError(f"Project root is not a directory: {project_root}")
49
+
50
+ # Store real path to prevent symlink attacks
51
+ self.project_root = os.path.realpath(project_root)
52
+ self.allowed_directories: Set[str] = {self.project_root}
53
+
54
+ log_info(f"ProjectBoundaryManager initialized with root: {self.project_root}")
55
+
56
+ def add_allowed_directory(self, directory: str) -> None:
57
+ """
58
+ Add an additional allowed directory.
59
+
60
+ Args:
61
+ directory: Directory path to allow access to
62
+
63
+ Raises:
64
+ SecurityError: If directory is invalid
65
+ """
66
+ if not directory:
67
+ raise SecurityError("Directory cannot be empty")
68
+
69
+ if not os.path.exists(directory):
70
+ raise SecurityError(f"Directory does not exist: {directory}")
71
+
72
+ if not os.path.isdir(directory):
73
+ raise SecurityError(f"Path is not a directory: {directory}")
74
+
75
+ real_dir = os.path.realpath(directory)
76
+ self.allowed_directories.add(real_dir)
77
+
78
+ log_info(f"Added allowed directory: {real_dir}")
79
+
80
+ def is_within_project(self, file_path: str) -> bool:
81
+ """
82
+ Check if file path is within project boundaries.
83
+
84
+ Args:
85
+ file_path: File path to check
86
+
87
+ Returns:
88
+ True if path is within allowed boundaries
89
+ """
90
+ try:
91
+ if not file_path:
92
+ log_warning("Empty file path provided to boundary check")
93
+ return False
94
+
95
+ # Resolve real path to handle symlinks
96
+ real_path = os.path.realpath(file_path)
97
+
98
+ # Check against all allowed directories
99
+ for allowed_dir in self.allowed_directories:
100
+ if real_path.startswith(allowed_dir + os.sep) or real_path == allowed_dir:
101
+ log_debug(f"File path within boundaries: {file_path}")
102
+ return True
103
+
104
+ log_warning(f"File path outside boundaries: {file_path} -> {real_path}")
105
+ return False
106
+
107
+ except Exception as e:
108
+ log_warning(f"Boundary check error for {file_path}: {e}")
109
+ return False
110
+
111
+ def get_relative_path(self, file_path: str) -> Optional[str]:
112
+ """
113
+ Get relative path from project root if within boundaries.
114
+
115
+ Args:
116
+ file_path: File path to convert
117
+
118
+ Returns:
119
+ Relative path from project root, or None if outside boundaries
120
+ """
121
+ if not self.is_within_project(file_path):
122
+ return None
123
+
124
+ try:
125
+ real_path = os.path.realpath(file_path)
126
+ rel_path = os.path.relpath(real_path, self.project_root)
127
+
128
+ # Ensure relative path doesn't start with ..
129
+ if rel_path.startswith(".."):
130
+ log_warning(f"Relative path calculation failed: {rel_path}")
131
+ return None
132
+
133
+ return rel_path
134
+
135
+ except Exception as e:
136
+ log_warning(f"Relative path calculation error: {e}")
137
+ return None
138
+
139
+ def validate_and_resolve_path(self, file_path: str) -> Optional[str]:
140
+ """
141
+ Validate path and return resolved absolute path if within boundaries.
142
+
143
+ Args:
144
+ file_path: File path to validate and resolve
145
+
146
+ Returns:
147
+ Resolved absolute path if valid, None otherwise
148
+ """
149
+ try:
150
+ # Handle relative paths from project root
151
+ if not os.path.isabs(file_path):
152
+ full_path = os.path.join(self.project_root, file_path)
153
+ else:
154
+ full_path = file_path
155
+
156
+ # Check boundaries
157
+ if not self.is_within_project(full_path):
158
+ return None
159
+
160
+ # Return real path
161
+ return os.path.realpath(full_path)
162
+
163
+ except Exception as e:
164
+ log_warning(f"Path validation error: {e}")
165
+ return None
166
+
167
+ def list_allowed_directories(self) -> Set[str]:
168
+ """
169
+ Get list of all allowed directories.
170
+
171
+ Returns:
172
+ Set of allowed directory paths
173
+ """
174
+ return self.allowed_directories.copy()
175
+
176
+ def is_symlink_safe(self, file_path: str) -> bool:
177
+ """
178
+ Check if file path is safe from symlink attacks.
179
+
180
+ Args:
181
+ file_path: File path to check
182
+
183
+ Returns:
184
+ True if path is safe from symlink attacks
185
+ """
186
+ try:
187
+ if not os.path.exists(file_path):
188
+ return True # Non-existent files are safe
189
+
190
+ # Check if any component in the path is a symlink
191
+ path_parts = Path(file_path).parts
192
+ current_path = ""
193
+
194
+ for part in path_parts:
195
+ current_path = os.path.join(current_path, part) if current_path else part
196
+
197
+ if os.path.islink(current_path):
198
+ # Check if symlink target is within boundaries
199
+ target = os.path.realpath(current_path)
200
+ if not self.is_within_project(target):
201
+ log_warning(f"Unsafe symlink detected: {current_path} -> {target}")
202
+ return False
203
+
204
+ return True
205
+
206
+ except Exception as e:
207
+ log_warning(f"Symlink safety check error: {e}")
208
+ return False
209
+
210
+ def audit_access(self, file_path: str, operation: str) -> None:
211
+ """
212
+ Log file access for security auditing.
213
+
214
+ Args:
215
+ file_path: File path being accessed
216
+ operation: Type of operation (read, write, analyze, etc.)
217
+ """
218
+ is_within = self.is_within_project(file_path)
219
+ status = "ALLOWED" if is_within else "DENIED"
220
+
221
+ log_info(f"AUDIT: {status} {operation} access to {file_path}")
222
+
223
+ if not is_within:
224
+ log_warning(f"SECURITY: Unauthorized access attempt to {file_path}")
225
+
226
+ def __str__(self) -> str:
227
+ """String representation of boundary manager."""
228
+ return f"ProjectBoundaryManager(root={self.project_root}, allowed_dirs={len(self.allowed_directories)})"
229
+
230
+ def __repr__(self) -> str:
231
+ """Detailed representation of boundary manager."""
232
+ return (
233
+ f"ProjectBoundaryManager("
234
+ f"project_root='{self.project_root}', "
235
+ f"allowed_directories={self.allowed_directories}"
236
+ f")"
237
+ )
@@ -0,0 +1,292 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Regex Safety Checker for Tree-sitter Analyzer
4
+
5
+ Provides ReDoS (Regular Expression Denial of Service) attack prevention
6
+ by analyzing regex patterns for potentially dangerous constructs.
7
+ """
8
+
9
+ import re
10
+ import time
11
+ from typing import List, Optional, Tuple
12
+
13
+ from ..exceptions import SecurityError
14
+ from ..utils import log_debug, log_warning
15
+
16
+
17
+ class RegexSafetyChecker:
18
+ """
19
+ Regex safety checker for ReDoS attack prevention.
20
+
21
+ This class analyzes regular expressions for patterns that could
22
+ lead to catastrophic backtracking and ReDoS attacks.
23
+
24
+ Features:
25
+ - Pattern complexity analysis
26
+ - Dangerous construct detection
27
+ - Execution time monitoring
28
+ - Safe pattern compilation
29
+ """
30
+
31
+ # Maximum allowed pattern length
32
+ MAX_PATTERN_LENGTH = 1000
33
+
34
+ # Maximum execution time for pattern testing (seconds)
35
+ MAX_EXECUTION_TIME = 1.0
36
+
37
+ # Dangerous regex patterns that can cause ReDoS
38
+ DANGEROUS_PATTERNS = [
39
+ # Nested quantifiers
40
+ r'\(.+\)\+', # (a+)+
41
+ r'\(.*\)\*', # (a*)*
42
+ r'\(.{0,}\)\+', # (.{0,})+
43
+ r'\(.+\)\{.*\}', # (a+){n,m}
44
+
45
+ # Alternation with overlap
46
+ r'\(a\|a\)\*', # (a|a)*
47
+ r'\([^|]*\|[^|]*\)\+', # (abc|abd)+
48
+
49
+ # Exponential backtracking patterns
50
+ r'\(.*\)\1', # (.*)\1 - backreference
51
+ r'\(\?\=.*\)\+', # (?=.*)+
52
+ r'\(\?\!.*\)\+', # (?!.*)+
53
+ r'\(\?\<\=.*\)\+', # (?<=.*)+
54
+ r'\(\?\<\!.*\)\+', # (?<!.*)+
55
+
56
+ # Catastrophic patterns
57
+ r'\([^)]*\+[^)]*\)\+', # Nested + quantifiers
58
+ r'\([^)]*\*[^)]*\)\*', # Nested * quantifiers
59
+ ]
60
+
61
+ def __init__(self) -> None:
62
+ """Initialize regex safety checker."""
63
+ log_debug("RegexSafetyChecker initialized")
64
+
65
+ def validate_pattern(self, pattern: str) -> Tuple[bool, str]:
66
+ """
67
+ Validate regex pattern for safety.
68
+
69
+ Args:
70
+ pattern: Regex pattern to validate
71
+
72
+ Returns:
73
+ Tuple of (is_safe, error_message)
74
+
75
+ Example:
76
+ >>> checker = RegexSafetyChecker()
77
+ >>> is_safe, error = checker.validate_pattern(r"hello.*world")
78
+ >>> assert is_safe
79
+ """
80
+ try:
81
+ # Basic validation
82
+ if not pattern or not isinstance(pattern, str):
83
+ return False, "Pattern must be a non-empty string"
84
+
85
+ # Length check
86
+ if len(pattern) > self.MAX_PATTERN_LENGTH:
87
+ return False, f"Pattern too long: {len(pattern)} > {self.MAX_PATTERN_LENGTH}"
88
+
89
+ # Check for dangerous patterns
90
+ dangerous_found = self._check_dangerous_patterns(pattern)
91
+ if dangerous_found:
92
+ return False, f"Potentially dangerous regex pattern detected: {dangerous_found}"
93
+
94
+ # Compilation check
95
+ compilation_error = self._check_compilation(pattern)
96
+ if compilation_error:
97
+ return False, f"Invalid regex pattern: {compilation_error}"
98
+
99
+ # Performance check
100
+ performance_error = self._check_performance(pattern)
101
+ if performance_error:
102
+ return False, f"Pattern performance issue: {performance_error}"
103
+
104
+ log_debug(f"Regex pattern validation passed: {pattern}")
105
+ return True, ""
106
+
107
+ except Exception as e:
108
+ log_warning(f"Regex validation error: {e}")
109
+ return False, f"Validation error: {str(e)}"
110
+
111
+ def _check_dangerous_patterns(self, pattern: str) -> Optional[str]:
112
+ """
113
+ Check for known dangerous regex patterns.
114
+
115
+ Args:
116
+ pattern: Pattern to check
117
+
118
+ Returns:
119
+ Description of dangerous pattern found, or None if safe
120
+ """
121
+ for dangerous_pattern in self.DANGEROUS_PATTERNS:
122
+ try:
123
+ if re.search(dangerous_pattern, pattern):
124
+ log_warning(f"Dangerous pattern detected: {dangerous_pattern} in {pattern}")
125
+ return dangerous_pattern
126
+ except re.error:
127
+ # If the dangerous pattern itself is invalid, skip it
128
+ continue
129
+
130
+ return None
131
+
132
+ def _check_compilation(self, pattern: str) -> Optional[str]:
133
+ """
134
+ Check if pattern compiles successfully.
135
+
136
+ Args:
137
+ pattern: Pattern to compile
138
+
139
+ Returns:
140
+ Error message if compilation fails, None if successful
141
+ """
142
+ try:
143
+ re.compile(pattern)
144
+ return None
145
+ except re.error as e:
146
+ log_warning(f"Regex compilation failed: {e}")
147
+ return str(e)
148
+
149
+ def _check_performance(self, pattern: str) -> Optional[str]:
150
+ """
151
+ Check pattern performance with test strings.
152
+
153
+ Args:
154
+ pattern: Pattern to test
155
+
156
+ Returns:
157
+ Error message if performance is poor, None if acceptable
158
+ """
159
+ try:
160
+ compiled_pattern = re.compile(pattern)
161
+
162
+ # Test strings that might cause backtracking
163
+ test_strings = [
164
+ "a" * 100, # Long string of same character
165
+ "ab" * 50, # Alternating pattern
166
+ "x" * 50 + "y", # Long string with different ending
167
+ "a" * 30 + "b" * 30 + "c" * 30, # Mixed long string
168
+ ]
169
+
170
+ for test_string in test_strings:
171
+ start_time = time.time()
172
+
173
+ try:
174
+ # Test both search and match operations
175
+ compiled_pattern.search(test_string)
176
+ compiled_pattern.match(test_string)
177
+
178
+ execution_time = time.time() - start_time
179
+
180
+ if execution_time > self.MAX_EXECUTION_TIME:
181
+ log_warning(
182
+ f"Regex performance issue: {execution_time:.3f}s > {self.MAX_EXECUTION_TIME}s"
183
+ )
184
+ return f"Pattern execution too slow: {execution_time:.3f}s"
185
+
186
+ except Exception as e:
187
+ log_warning(f"Regex execution error: {e}")
188
+ return f"Pattern execution error: {str(e)}"
189
+
190
+ return None
191
+
192
+ except Exception as e:
193
+ log_warning(f"Performance check error: {e}")
194
+ return f"Performance check failed: {str(e)}"
195
+
196
+ def analyze_complexity(self, pattern: str) -> dict:
197
+ """
198
+ Analyze regex pattern complexity.
199
+
200
+ Args:
201
+ pattern: Pattern to analyze
202
+
203
+ Returns:
204
+ Dictionary with complexity metrics
205
+ """
206
+ try:
207
+ metrics = {
208
+ "length": len(pattern),
209
+ "quantifiers": len(re.findall(r'[+*?{]', pattern)),
210
+ "groups": len(re.findall(r'\(', pattern)),
211
+ "alternations": len(re.findall(r'\|', pattern)),
212
+ "character_classes": len(re.findall(r'\[', pattern)),
213
+ "anchors": len(re.findall(r'[\^$]', pattern)),
214
+ "complexity_score": 0,
215
+ }
216
+
217
+ # Calculate complexity score
218
+ metrics["complexity_score"] = (
219
+ metrics["length"] * 0.1 +
220
+ metrics["quantifiers"] * 2 +
221
+ metrics["groups"] * 1.5 +
222
+ metrics["alternations"] * 3 +
223
+ metrics["character_classes"] * 1
224
+ )
225
+
226
+ return metrics
227
+
228
+ except Exception as e:
229
+ log_warning(f"Complexity analysis error: {e}")
230
+ return {"error": str(e)}
231
+
232
+ def suggest_safer_pattern(self, pattern: str) -> Optional[str]:
233
+ """
234
+ Suggest a safer alternative for dangerous patterns.
235
+
236
+ Args:
237
+ pattern: Original pattern
238
+
239
+ Returns:
240
+ Suggested safer pattern, or None if no suggestion available
241
+ """
242
+ # Only suggest for patterns that are actually dangerous
243
+ is_dangerous = self._check_dangerous_patterns(pattern)
244
+ if not is_dangerous:
245
+ return None
246
+
247
+ # Simple pattern replacements for common dangerous cases
248
+ replacements = {
249
+ r'\(.+\)\+': r'[^\\s]+', # Replace (a+)+ with [^\s]+
250
+ r'\(.*\)\*': r'[^\\s]*', # Replace (.*)* with [^\s]*
251
+ }
252
+
253
+ for dangerous, safer in replacements.items():
254
+ if re.search(dangerous, pattern):
255
+ suggested = re.sub(dangerous, safer, pattern)
256
+ log_debug(f"Suggested safer pattern: {pattern} -> {suggested}")
257
+ return suggested
258
+
259
+ return None
260
+
261
+ def get_safe_flags(self) -> int:
262
+ """
263
+ Get recommended safe regex flags.
264
+
265
+ Returns:
266
+ Combination of safe regex flags
267
+ """
268
+ # Use flags that prevent some ReDoS attacks
269
+ return re.MULTILINE | re.DOTALL
270
+
271
+ def create_safe_pattern(self, pattern: str, flags: Optional[int] = None) -> Optional[re.Pattern]:
272
+ """
273
+ Create a safely compiled regex pattern.
274
+
275
+ Args:
276
+ pattern: Pattern to compile
277
+ flags: Optional regex flags
278
+
279
+ Returns:
280
+ Compiled pattern if safe, None if dangerous
281
+ """
282
+ is_safe, error = self.validate_pattern(pattern)
283
+ if not is_safe:
284
+ log_warning(f"Cannot create unsafe pattern: {error}")
285
+ return None
286
+
287
+ try:
288
+ safe_flags = flags if flags is not None else self.get_safe_flags()
289
+ return re.compile(pattern, safe_flags)
290
+ except re.error as e:
291
+ log_warning(f"Pattern compilation failed: {e}")
292
+ return None