tree-sitter-analyzer 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (69) hide show
  1. tree_sitter_analyzer/__init__.py +132 -132
  2. tree_sitter_analyzer/__main__.py +11 -11
  3. tree_sitter_analyzer/api.py +533 -533
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -12
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -26
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +160 -160
  9. tree_sitter_analyzer/cli/commands/default_command.py +18 -18
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +141 -141
  11. tree_sitter_analyzer/cli/commands/query_command.py +81 -81
  12. tree_sitter_analyzer/cli/commands/structure_command.py +138 -138
  13. tree_sitter_analyzer/cli/commands/summary_command.py +101 -101
  14. tree_sitter_analyzer/cli/commands/table_command.py +235 -235
  15. tree_sitter_analyzer/cli/info_commands.py +121 -121
  16. tree_sitter_analyzer/cli_main.py +297 -297
  17. tree_sitter_analyzer/core/__init__.py +15 -15
  18. tree_sitter_analyzer/core/analysis_engine.py +555 -555
  19. tree_sitter_analyzer/core/cache_service.py +320 -320
  20. tree_sitter_analyzer/core/engine.py +566 -566
  21. tree_sitter_analyzer/core/parser.py +293 -293
  22. tree_sitter_analyzer/encoding_utils.py +459 -459
  23. tree_sitter_analyzer/exceptions.py +406 -337
  24. tree_sitter_analyzer/file_handler.py +210 -210
  25. tree_sitter_analyzer/formatters/__init__.py +1 -1
  26. tree_sitter_analyzer/formatters/base_formatter.py +167 -167
  27. tree_sitter_analyzer/formatters/formatter_factory.py +78 -78
  28. tree_sitter_analyzer/interfaces/__init__.py +9 -9
  29. tree_sitter_analyzer/interfaces/cli.py +528 -528
  30. tree_sitter_analyzer/interfaces/cli_adapter.py +343 -343
  31. tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -206
  32. tree_sitter_analyzer/interfaces/mcp_server.py +425 -405
  33. tree_sitter_analyzer/languages/__init__.py +10 -10
  34. tree_sitter_analyzer/languages/javascript_plugin.py +446 -446
  35. tree_sitter_analyzer/languages/python_plugin.py +755 -755
  36. tree_sitter_analyzer/mcp/__init__.py +31 -31
  37. tree_sitter_analyzer/mcp/resources/__init__.py +44 -44
  38. tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -209
  39. tree_sitter_analyzer/mcp/server.py +346 -333
  40. tree_sitter_analyzer/mcp/tools/__init__.py +30 -30
  41. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +654 -654
  42. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -247
  43. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -54
  44. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +300 -300
  45. tree_sitter_analyzer/mcp/tools/table_format_tool.py +362 -362
  46. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +543 -543
  47. tree_sitter_analyzer/mcp/utils/__init__.py +107 -107
  48. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -549
  49. tree_sitter_analyzer/output_manager.py +253 -253
  50. tree_sitter_analyzer/plugins/__init__.py +280 -280
  51. tree_sitter_analyzer/plugins/base.py +529 -529
  52. tree_sitter_analyzer/plugins/manager.py +379 -379
  53. tree_sitter_analyzer/queries/__init__.py +26 -26
  54. tree_sitter_analyzer/queries/java.py +391 -391
  55. tree_sitter_analyzer/queries/javascript.py +148 -148
  56. tree_sitter_analyzer/queries/python.py +285 -285
  57. tree_sitter_analyzer/queries/typescript.py +229 -229
  58. tree_sitter_analyzer/query_loader.py +257 -257
  59. tree_sitter_analyzer/security/__init__.py +22 -0
  60. tree_sitter_analyzer/security/boundary_manager.py +237 -0
  61. tree_sitter_analyzer/security/regex_checker.py +292 -0
  62. tree_sitter_analyzer/security/validator.py +224 -0
  63. tree_sitter_analyzer/table_formatter.py +652 -473
  64. tree_sitter_analyzer/utils.py +277 -277
  65. {tree_sitter_analyzer-0.6.2.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/METADATA +4 -1
  66. tree_sitter_analyzer-0.8.0.dist-info/RECORD +76 -0
  67. tree_sitter_analyzer-0.6.2.dist-info/RECORD +0 -72
  68. {tree_sitter_analyzer-0.6.2.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/WHEEL +0 -0
  69. {tree_sitter_analyzer-0.6.2.dist-info → tree_sitter_analyzer-0.8.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,224 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Security Validator for Tree-sitter Analyzer
4
+
5
+ Provides unified security validation framework inspired by code-index-mcp's
6
+ ValidationHelper but enhanced for tree-sitter analyzer's requirements.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Optional, Tuple
13
+
14
+ from ..exceptions import SecurityError
15
+ from ..utils import log_debug, log_warning
16
+ from .boundary_manager import ProjectBoundaryManager
17
+ from .regex_checker import RegexSafetyChecker
18
+
19
+
20
+ class SecurityValidator:
21
+ """
22
+ Unified security validation framework.
23
+
24
+ This class provides comprehensive security validation for file paths,
25
+ regex patterns, and other user inputs to prevent security vulnerabilities.
26
+
27
+ Features:
28
+ - Multi-layer path traversal protection
29
+ - Project boundary enforcement
30
+ - ReDoS attack prevention
31
+ - Input sanitization
32
+ """
33
+
34
+ def __init__(self, project_root: Optional[str] = None) -> None:
35
+ """
36
+ Initialize security validator.
37
+
38
+ Args:
39
+ project_root: Optional project root directory for boundary checks
40
+ """
41
+ self.boundary_manager = (
42
+ ProjectBoundaryManager(project_root) if project_root else None
43
+ )
44
+ self.regex_checker = RegexSafetyChecker()
45
+
46
+ log_debug(f"SecurityValidator initialized with project_root: {project_root}")
47
+
48
+ def validate_file_path(
49
+ self, file_path: str, base_path: Optional[str] = None
50
+ ) -> Tuple[bool, str]:
51
+ """
52
+ Validate file path with comprehensive security checks.
53
+
54
+ Implements multi-layer defense against path traversal attacks
55
+ and ensures file access stays within project boundaries.
56
+
57
+ Args:
58
+ file_path: File path to validate
59
+ base_path: Optional base path for relative path validation
60
+
61
+ Returns:
62
+ Tuple of (is_valid, error_message)
63
+
64
+ Example:
65
+ >>> validator = SecurityValidator("/project/root")
66
+ >>> is_valid, error = validator.validate_file_path("src/main.py")
67
+ >>> assert is_valid
68
+ """
69
+ try:
70
+ # Layer 1: Basic input validation
71
+ if not file_path or not isinstance(file_path, str):
72
+ return False, "File path must be a non-empty string"
73
+
74
+ # Layer 2: Null byte injection check
75
+ if "\x00" in file_path:
76
+ log_warning(f"Null byte detected in file path: {file_path}")
77
+ return False, "File path contains null bytes"
78
+
79
+ # Layer 3: Windows drive letter check (before absolute path check)
80
+ if len(file_path) > 1 and file_path[1] == ":":
81
+ return False, "Windows drive letters are not allowed"
82
+
83
+ # Layer 4: Absolute path rejection
84
+ if os.path.isabs(file_path):
85
+ return False, "Absolute file paths are not allowed"
86
+
87
+ # Layer 5: Path normalization and traversal check
88
+ norm_path = os.path.normpath(file_path)
89
+ if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."):
90
+ log_warning(f"Path traversal attempt detected: {file_path}")
91
+ return False, "Directory traversal not allowed"
92
+
93
+ # Layer 6: Project boundary validation
94
+ if self.boundary_manager and base_path:
95
+ if not self.boundary_manager.is_within_project(
96
+ os.path.join(base_path, norm_path)
97
+ ):
98
+ return False, "Access denied. File path must be within project directory"
99
+
100
+ # Layer 7: Symbolic link check (if file exists)
101
+ if base_path:
102
+ full_path = os.path.join(base_path, norm_path)
103
+ if os.path.exists(full_path) and os.path.islink(full_path):
104
+ log_warning(f"Symbolic link detected: {full_path}")
105
+ return False, "Symbolic links are not allowed"
106
+
107
+ log_debug(f"File path validation passed: {file_path}")
108
+ return True, ""
109
+
110
+ except Exception as e:
111
+ log_warning(f"File path validation error: {e}")
112
+ return False, f"Validation error: {str(e)}"
113
+
114
+ def validate_directory_path(
115
+ self, dir_path: str, must_exist: bool = True
116
+ ) -> Tuple[bool, str]:
117
+ """
118
+ Validate directory path for security and existence.
119
+
120
+ Args:
121
+ dir_path: Directory path to validate
122
+ must_exist: Whether directory must exist
123
+
124
+ Returns:
125
+ Tuple of (is_valid, error_message)
126
+ """
127
+ try:
128
+ # Basic validation using file path validator
129
+ is_valid, error = self.validate_file_path(dir_path)
130
+ if not is_valid:
131
+ return False, error
132
+
133
+ # Check if path exists and is directory
134
+ if must_exist:
135
+ if not os.path.exists(dir_path):
136
+ return False, f"Directory does not exist: {dir_path}"
137
+
138
+ if not os.path.isdir(dir_path):
139
+ return False, f"Path is not a directory: {dir_path}"
140
+
141
+ log_debug(f"Directory path validation passed: {dir_path}")
142
+ return True, ""
143
+
144
+ except Exception as e:
145
+ log_warning(f"Directory path validation error: {e}")
146
+ return False, f"Validation error: {str(e)}"
147
+
148
+ def validate_regex_pattern(self, pattern: str) -> Tuple[bool, str]:
149
+ """
150
+ Validate regex pattern for ReDoS attack prevention.
151
+
152
+ Args:
153
+ pattern: Regex pattern to validate
154
+
155
+ Returns:
156
+ Tuple of (is_valid, error_message)
157
+ """
158
+ return self.regex_checker.validate_pattern(pattern)
159
+
160
+ def sanitize_input(self, user_input: str, max_length: int = 1000) -> str:
161
+ """
162
+ Sanitize user input by removing dangerous characters.
163
+
164
+ Args:
165
+ user_input: Input string to sanitize
166
+ max_length: Maximum allowed length
167
+
168
+ Returns:
169
+ Sanitized input string
170
+
171
+ Raises:
172
+ SecurityError: If input is too long or contains dangerous content
173
+ """
174
+ if not isinstance(user_input, str):
175
+ raise SecurityError("Input must be a string")
176
+
177
+ if len(user_input) > max_length:
178
+ raise SecurityError(f"Input too long: {len(user_input)} > {max_length}")
179
+
180
+ # Remove null bytes and control characters
181
+ sanitized = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', user_input)
182
+
183
+ # Log if sanitization occurred
184
+ if sanitized != user_input:
185
+ log_warning("Input sanitization performed")
186
+
187
+ return sanitized
188
+
189
+ def validate_glob_pattern(self, pattern: str) -> Tuple[bool, str]:
190
+ """
191
+ Validate glob pattern for safe file matching.
192
+
193
+ Args:
194
+ pattern: Glob pattern to validate
195
+
196
+ Returns:
197
+ Tuple of (is_valid, error_message)
198
+ """
199
+ try:
200
+ # Basic input validation
201
+ if not pattern or not isinstance(pattern, str):
202
+ return False, "Pattern must be a non-empty string"
203
+
204
+ # Check for dangerous patterns
205
+ dangerous_patterns = [
206
+ "..", # Path traversal
207
+ "//", # Double slashes
208
+ "\\\\", # Double backslashes
209
+ ]
210
+
211
+ for dangerous in dangerous_patterns:
212
+ if dangerous in pattern:
213
+ return False, f"Dangerous pattern detected: {dangerous}"
214
+
215
+ # Validate length
216
+ if len(pattern) > 500:
217
+ return False, "Pattern too long"
218
+
219
+ log_debug(f"Glob pattern validation passed: {pattern}")
220
+ return True, ""
221
+
222
+ except Exception as e:
223
+ log_warning(f"Glob pattern validation error: {e}")
224
+ return False, f"Validation error: {str(e)}"