tree-sitter-analyzer 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (29) hide show
  1. tree_sitter_analyzer/__init__.py +132 -132
  2. tree_sitter_analyzer/api.py +542 -542
  3. tree_sitter_analyzer/cli/commands/base_command.py +181 -181
  4. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -139
  5. tree_sitter_analyzer/cli/info_commands.py +124 -124
  6. tree_sitter_analyzer/cli_main.py +327 -327
  7. tree_sitter_analyzer/core/analysis_engine.py +584 -584
  8. tree_sitter_analyzer/core/query_service.py +162 -162
  9. tree_sitter_analyzer/file_handler.py +212 -212
  10. tree_sitter_analyzer/formatters/base_formatter.py +169 -169
  11. tree_sitter_analyzer/interfaces/cli.py +535 -535
  12. tree_sitter_analyzer/mcp/__init__.py +1 -1
  13. tree_sitter_analyzer/mcp/resources/__init__.py +0 -1
  14. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +16 -5
  15. tree_sitter_analyzer/mcp/server.py +655 -655
  16. tree_sitter_analyzer/mcp/tools/__init__.py +28 -30
  17. tree_sitter_analyzer/mcp/utils/__init__.py +1 -2
  18. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -569
  19. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -414
  20. tree_sitter_analyzer/output_manager.py +257 -257
  21. tree_sitter_analyzer/project_detector.py +330 -330
  22. tree_sitter_analyzer/security/boundary_manager.py +260 -260
  23. tree_sitter_analyzer/security/validator.py +257 -257
  24. tree_sitter_analyzer/table_formatter.py +710 -710
  25. tree_sitter_analyzer/utils.py +335 -335
  26. {tree_sitter_analyzer-1.0.0.dist-info → tree_sitter_analyzer-1.1.1.dist-info}/METADATA +12 -12
  27. {tree_sitter_analyzer-1.0.0.dist-info → tree_sitter_analyzer-1.1.1.dist-info}/RECORD +29 -29
  28. {tree_sitter_analyzer-1.0.0.dist-info → tree_sitter_analyzer-1.1.1.dist-info}/WHEEL +0 -0
  29. {tree_sitter_analyzer-1.0.0.dist-info → tree_sitter_analyzer-1.1.1.dist-info}/entry_points.txt +0 -0
@@ -1,257 +1,257 @@
1
- #!/usr/bin/env python3
2
- """
3
- Security Validator for Tree-sitter Analyzer
4
-
5
- Provides unified security validation framework inspired by code-index-mcp's
6
- ValidationHelper but enhanced for tree-sitter analyzer's requirements.
7
- """
8
-
9
- import re
10
- from pathlib import Path
11
-
12
- from ..exceptions import SecurityError
13
- from ..utils import log_debug, log_warning
14
- from .boundary_manager import ProjectBoundaryManager
15
- from .regex_checker import RegexSafetyChecker
16
-
17
-
18
- class SecurityValidator:
19
- """
20
- Unified security validation framework.
21
-
22
- This class provides comprehensive security validation for file paths,
23
- regex patterns, and other user inputs to prevent security vulnerabilities.
24
-
25
- Features:
26
- - Multi-layer path traversal protection
27
- - Project boundary enforcement
28
- - ReDoS attack prevention
29
- - Input sanitization
30
- """
31
-
32
- def __init__(self, project_root: str | None = None) -> None:
33
- """
34
- Initialize security validator.
35
-
36
- Args:
37
- project_root: Optional project root directory for boundary checks
38
- """
39
- self.boundary_manager = (
40
- ProjectBoundaryManager(project_root) if project_root else None
41
- )
42
- self.regex_checker = RegexSafetyChecker()
43
-
44
- log_debug(f"SecurityValidator initialized with project_root: {project_root}")
45
-
46
- def validate_file_path(
47
- self, file_path: str, base_path: str | None = None
48
- ) -> tuple[bool, str]:
49
- """
50
- Validate file path with comprehensive security checks.
51
-
52
- Implements multi-layer defense against path traversal attacks
53
- and ensures file access stays within project boundaries.
54
-
55
- Args:
56
- file_path: File path to validate
57
- base_path: Optional base path for relative path validation
58
-
59
- Returns:
60
- Tuple of (is_valid, error_message)
61
-
62
- Example:
63
- >>> validator = SecurityValidator("/project/root")
64
- >>> is_valid, error = validator.validate_file_path("src/main.py")
65
- >>> assert is_valid
66
- """
67
- try:
68
- # Layer 1: Basic input validation
69
- if not file_path or not isinstance(file_path, str):
70
- return False, "File path must be a non-empty string"
71
-
72
- # Layer 2: Null byte injection check
73
- if "\x00" in file_path:
74
- log_warning(f"Null byte detected in file path: {file_path}")
75
- return False, "File path contains null bytes"
76
-
77
- # Layer 3: Windows drive letter check (only on non-Windows systems)
78
- # Check if we're on Windows by checking for drive letter support
79
- import platform
80
-
81
- if (
82
- len(file_path) > 1
83
- and file_path[1] == ":"
84
- and platform.system() != "Windows"
85
- ):
86
- return False, "Windows drive letters are not allowed on this system"
87
-
88
- # Layer 4: Absolute path check (cross-platform)
89
- if Path(file_path).is_absolute() or file_path.startswith(("/", "\\")):
90
- # If project boundaries are configured, enforce them strictly
91
- if self.boundary_manager and self.boundary_manager.project_root:
92
- if not self.boundary_manager.is_within_project(file_path):
93
- return False, "Absolute path must be within project directory"
94
- # Within project
95
- return True, ""
96
- else:
97
- # In test/dev contexts without project boundaries, allow absolute
98
- # paths under system temp folder only (safe sandbox)
99
- import tempfile
100
-
101
- temp_dir = Path(tempfile.gettempdir()).resolve()
102
- real_path = Path(file_path).resolve()
103
- try:
104
- real_path.relative_to(temp_dir)
105
- return True, ""
106
- except ValueError:
107
- pass
108
- return False, "Absolute file paths are not allowed"
109
-
110
- # Layer 5: Path normalization and traversal check
111
- norm_path = str(Path(file_path))
112
- if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."):
113
- log_warning(f"Path traversal attempt detected: {file_path}")
114
- return False, "Directory traversal not allowed"
115
-
116
- # Layer 6: Project boundary validation
117
- if self.boundary_manager and base_path:
118
- if not self.boundary_manager.is_within_project(
119
- str(Path(base_path) / norm_path)
120
- ):
121
- return (
122
- False,
123
- "Access denied. File path must be within project directory",
124
- )
125
-
126
- # Layer 7: Symbolic link check (if file exists)
127
- if base_path:
128
- full_path = Path(base_path) / norm_path
129
- if full_path.exists() and full_path.is_symlink():
130
- log_warning(f"Symbolic link detected: {full_path}")
131
- return False, "Symbolic links are not allowed"
132
-
133
- log_debug(f"File path validation passed: {file_path}")
134
- return True, ""
135
-
136
- except Exception as e:
137
- log_warning(f"File path validation error: {e}")
138
- return False, f"Validation error: {str(e)}"
139
-
140
- def validate_directory_path(
141
- self, dir_path: str, must_exist: bool = True
142
- ) -> tuple[bool, str]:
143
- """
144
- Validate directory path for security and existence.
145
-
146
- Args:
147
- dir_path: Directory path to validate
148
- must_exist: Whether directory must exist
149
-
150
- Returns:
151
- Tuple of (is_valid, error_message)
152
- """
153
- try:
154
- # Basic validation using file path validator
155
- is_valid, error = self.validate_file_path(dir_path)
156
- if not is_valid:
157
- return False, error
158
-
159
- # Check if path exists and is directory
160
- if must_exist:
161
- dir_path_obj = Path(dir_path)
162
- if not dir_path_obj.exists():
163
- return False, f"Directory does not exist: {dir_path}"
164
-
165
- if not dir_path_obj.is_dir():
166
- return False, f"Path is not a directory: {dir_path}"
167
-
168
- log_debug(f"Directory path validation passed: {dir_path}")
169
- return True, ""
170
-
171
- except Exception as e:
172
- log_warning(f"Directory path validation error: {e}")
173
- return False, f"Validation error: {str(e)}"
174
-
175
- def validate_regex_pattern(self, pattern: str) -> tuple[bool, str]:
176
- """
177
- Validate regex pattern for ReDoS attack prevention.
178
-
179
- Args:
180
- pattern: Regex pattern to validate
181
-
182
- Returns:
183
- Tuple of (is_valid, error_message)
184
- """
185
- return self.regex_checker.validate_pattern(pattern)
186
-
187
- def sanitize_input(self, user_input: str, max_length: int = 1000) -> str:
188
- """
189
- Sanitize user input by removing dangerous characters.
190
-
191
- Args:
192
- user_input: Input string to sanitize
193
- max_length: Maximum allowed length
194
-
195
- Returns:
196
- Sanitized input string
197
-
198
- Raises:
199
- SecurityError: If input is too long or contains dangerous content
200
- """
201
- if not isinstance(user_input, str):
202
- raise SecurityError("Input must be a string")
203
-
204
- if len(user_input) > max_length:
205
- raise SecurityError(f"Input too long: {len(user_input)} > {max_length}")
206
-
207
- # Remove null bytes and control characters
208
- sanitized = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", user_input)
209
-
210
- # Remove HTML/XML tags for XSS prevention
211
- sanitized = re.sub(r"<[^>]*>", "", sanitized)
212
-
213
- # Remove potentially dangerous characters
214
- sanitized = re.sub(r'[<>"\']', "", sanitized)
215
-
216
- # Log if sanitization occurred
217
- if sanitized != user_input:
218
- log_warning("Input sanitization performed")
219
-
220
- return sanitized
221
-
222
- def validate_glob_pattern(self, pattern: str) -> tuple[bool, str]:
223
- """
224
- Validate glob pattern for safe file matching.
225
-
226
- Args:
227
- pattern: Glob pattern to validate
228
-
229
- Returns:
230
- Tuple of (is_valid, error_message)
231
- """
232
- try:
233
- # Basic input validation
234
- if not pattern or not isinstance(pattern, str):
235
- return False, "Pattern must be a non-empty string"
236
-
237
- # Check for dangerous patterns
238
- dangerous_patterns = [
239
- "..", # Path traversal
240
- "//", # Double slashes
241
- "\\\\", # Double backslashes
242
- ]
243
-
244
- for dangerous in dangerous_patterns:
245
- if dangerous in pattern:
246
- return False, f"Dangerous pattern detected: {dangerous}"
247
-
248
- # Validate length
249
- if len(pattern) > 500:
250
- return False, "Pattern too long"
251
-
252
- log_debug(f"Glob pattern validation passed: {pattern}")
253
- return True, ""
254
-
255
- except Exception as e:
256
- log_warning(f"Glob pattern validation error: {e}")
257
- return False, f"Validation error: {str(e)}"
1
+ #!/usr/bin/env python3
2
+ """
3
+ Security Validator for Tree-sitter Analyzer
4
+
5
+ Provides unified security validation framework inspired by code-index-mcp's
6
+ ValidationHelper but enhanced for tree-sitter analyzer's requirements.
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+
12
+ from ..exceptions import SecurityError
13
+ from ..utils import log_debug, log_warning
14
+ from .boundary_manager import ProjectBoundaryManager
15
+ from .regex_checker import RegexSafetyChecker
16
+
17
+
18
+ class SecurityValidator:
19
+ """
20
+ Unified security validation framework.
21
+
22
+ This class provides comprehensive security validation for file paths,
23
+ regex patterns, and other user inputs to prevent security vulnerabilities.
24
+
25
+ Features:
26
+ - Multi-layer path traversal protection
27
+ - Project boundary enforcement
28
+ - ReDoS attack prevention
29
+ - Input sanitization
30
+ """
31
+
32
+ def __init__(self, project_root: str | None = None) -> None:
33
+ """
34
+ Initialize security validator.
35
+
36
+ Args:
37
+ project_root: Optional project root directory for boundary checks
38
+ """
39
+ self.boundary_manager = (
40
+ ProjectBoundaryManager(project_root) if project_root else None
41
+ )
42
+ self.regex_checker = RegexSafetyChecker()
43
+
44
+ log_debug(f"SecurityValidator initialized with project_root: {project_root}")
45
+
46
+ def validate_file_path(
47
+ self, file_path: str, base_path: str | None = None
48
+ ) -> tuple[bool, str]:
49
+ """
50
+ Validate file path with comprehensive security checks.
51
+
52
+ Implements multi-layer defense against path traversal attacks
53
+ and ensures file access stays within project boundaries.
54
+
55
+ Args:
56
+ file_path: File path to validate
57
+ base_path: Optional base path for relative path validation
58
+
59
+ Returns:
60
+ Tuple of (is_valid, error_message)
61
+
62
+ Example:
63
+ >>> validator = SecurityValidator("/project/root")
64
+ >>> is_valid, error = validator.validate_file_path("src/main.py")
65
+ >>> assert is_valid
66
+ """
67
+ try:
68
+ # Layer 1: Basic input validation
69
+ if not file_path or not isinstance(file_path, str):
70
+ return False, "File path must be a non-empty string"
71
+
72
+ # Layer 2: Null byte injection check
73
+ if "\x00" in file_path:
74
+ log_warning(f"Null byte detected in file path: {file_path}")
75
+ return False, "File path contains null bytes"
76
+
77
+ # Layer 3: Windows drive letter check (only on non-Windows systems)
78
+ # Check if we're on Windows by checking for drive letter support
79
+ import platform
80
+
81
+ if (
82
+ len(file_path) > 1
83
+ and file_path[1] == ":"
84
+ and platform.system() != "Windows"
85
+ ):
86
+ return False, "Windows drive letters are not allowed on this system"
87
+
88
+ # Layer 4: Absolute path check (cross-platform)
89
+ if Path(file_path).is_absolute() or file_path.startswith(("/", "\\")):
90
+ # If project boundaries are configured, enforce them strictly
91
+ if self.boundary_manager and self.boundary_manager.project_root:
92
+ if not self.boundary_manager.is_within_project(file_path):
93
+ return False, "Absolute path must be within project directory"
94
+ # Within project
95
+ return True, ""
96
+ else:
97
+ # In test/dev contexts without project boundaries, allow absolute
98
+ # paths under system temp folder only (safe sandbox)
99
+ import tempfile
100
+
101
+ temp_dir = Path(tempfile.gettempdir()).resolve()
102
+ real_path = Path(file_path).resolve()
103
+ try:
104
+ real_path.relative_to(temp_dir)
105
+ return True, ""
106
+ except ValueError:
107
+ pass
108
+ return False, "Absolute file paths are not allowed"
109
+
110
+ # Layer 5: Path normalization and traversal check
111
+ norm_path = str(Path(file_path))
112
+ if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."):
113
+ log_warning(f"Path traversal attempt detected: {file_path}")
114
+ return False, "Directory traversal not allowed"
115
+
116
+ # Layer 6: Project boundary validation
117
+ if self.boundary_manager and base_path:
118
+ if not self.boundary_manager.is_within_project(
119
+ str(Path(base_path) / norm_path)
120
+ ):
121
+ return (
122
+ False,
123
+ "Access denied. File path must be within project directory",
124
+ )
125
+
126
+ # Layer 7: Symbolic link check (if file exists)
127
+ if base_path:
128
+ full_path = Path(base_path) / norm_path
129
+ if full_path.exists() and full_path.is_symlink():
130
+ log_warning(f"Symbolic link detected: {full_path}")
131
+ return False, "Symbolic links are not allowed"
132
+
133
+ log_debug(f"File path validation passed: {file_path}")
134
+ return True, ""
135
+
136
+ except Exception as e:
137
+ log_warning(f"File path validation error: {e}")
138
+ return False, f"Validation error: {str(e)}"
139
+
140
+ def validate_directory_path(
141
+ self, dir_path: str, must_exist: bool = True
142
+ ) -> tuple[bool, str]:
143
+ """
144
+ Validate directory path for security and existence.
145
+
146
+ Args:
147
+ dir_path: Directory path to validate
148
+ must_exist: Whether directory must exist
149
+
150
+ Returns:
151
+ Tuple of (is_valid, error_message)
152
+ """
153
+ try:
154
+ # Basic validation using file path validator
155
+ is_valid, error = self.validate_file_path(dir_path)
156
+ if not is_valid:
157
+ return False, error
158
+
159
+ # Check if path exists and is directory
160
+ if must_exist:
161
+ dir_path_obj = Path(dir_path)
162
+ if not dir_path_obj.exists():
163
+ return False, f"Directory does not exist: {dir_path}"
164
+
165
+ if not dir_path_obj.is_dir():
166
+ return False, f"Path is not a directory: {dir_path}"
167
+
168
+ log_debug(f"Directory path validation passed: {dir_path}")
169
+ return True, ""
170
+
171
+ except Exception as e:
172
+ log_warning(f"Directory path validation error: {e}")
173
+ return False, f"Validation error: {str(e)}"
174
+
175
+ def validate_regex_pattern(self, pattern: str) -> tuple[bool, str]:
176
+ """
177
+ Validate regex pattern for ReDoS attack prevention.
178
+
179
+ Args:
180
+ pattern: Regex pattern to validate
181
+
182
+ Returns:
183
+ Tuple of (is_valid, error_message)
184
+ """
185
+ return self.regex_checker.validate_pattern(pattern)
186
+
187
+ def sanitize_input(self, user_input: str, max_length: int = 1000) -> str:
188
+ """
189
+ Sanitize user input by removing dangerous characters.
190
+
191
+ Args:
192
+ user_input: Input string to sanitize
193
+ max_length: Maximum allowed length
194
+
195
+ Returns:
196
+ Sanitized input string
197
+
198
+ Raises:
199
+ SecurityError: If input is too long or contains dangerous content
200
+ """
201
+ if not isinstance(user_input, str):
202
+ raise SecurityError("Input must be a string")
203
+
204
+ if len(user_input) > max_length:
205
+ raise SecurityError(f"Input too long: {len(user_input)} > {max_length}")
206
+
207
+ # Remove null bytes and control characters
208
+ sanitized = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", user_input)
209
+
210
+ # Remove HTML/XML tags for XSS prevention
211
+ sanitized = re.sub(r"<[^>]*>", "", sanitized)
212
+
213
+ # Remove potentially dangerous characters
214
+ sanitized = re.sub(r'[<>"\']', "", sanitized)
215
+
216
+ # Log if sanitization occurred
217
+ if sanitized != user_input:
218
+ log_warning("Input sanitization performed")
219
+
220
+ return sanitized
221
+
222
+ def validate_glob_pattern(self, pattern: str) -> tuple[bool, str]:
223
+ """
224
+ Validate glob pattern for safe file matching.
225
+
226
+ Args:
227
+ pattern: Glob pattern to validate
228
+
229
+ Returns:
230
+ Tuple of (is_valid, error_message)
231
+ """
232
+ try:
233
+ # Basic input validation
234
+ if not pattern or not isinstance(pattern, str):
235
+ return False, "Pattern must be a non-empty string"
236
+
237
+ # Check for dangerous patterns
238
+ dangerous_patterns = [
239
+ "..", # Path traversal
240
+ "//", # Double slashes
241
+ "\\\\", # Double backslashes
242
+ ]
243
+
244
+ for dangerous in dangerous_patterns:
245
+ if dangerous in pattern:
246
+ return False, f"Dangerous pattern detected: {dangerous}"
247
+
248
+ # Validate length
249
+ if len(pattern) > 500:
250
+ return False, "Pattern too long"
251
+
252
+ log_debug(f"Glob pattern validation passed: {pattern}")
253
+ return True, ""
254
+
255
+ except Exception as e:
256
+ log_warning(f"Glob pattern validation error: {e}")
257
+ return False, f"Validation error: {str(e)}"