tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Security Validator for Tree-sitter Analyzer
|
|
4
|
+
|
|
5
|
+
Provides unified security validation framework inspired by code-index-mcp's
|
|
6
|
+
ValidationHelper but enhanced for tree-sitter analyzer's requirements.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from ..exceptions import SecurityError
|
|
13
|
+
from ..utils import log_debug, log_warning
|
|
14
|
+
from .boundary_manager import ProjectBoundaryManager
|
|
15
|
+
from .regex_checker import RegexSafetyChecker
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SecurityValidator:
|
|
19
|
+
"""
|
|
20
|
+
Unified security validation framework.
|
|
21
|
+
|
|
22
|
+
This class provides comprehensive security validation for file paths,
|
|
23
|
+
regex patterns, and other user inputs to prevent security vulnerabilities.
|
|
24
|
+
|
|
25
|
+
Features:
|
|
26
|
+
- Multi-layer path traversal protection
|
|
27
|
+
- Project boundary enforcement
|
|
28
|
+
- ReDoS attack prevention
|
|
29
|
+
- Input sanitization
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, project_root: str | None = None) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Initialize security validator.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
project_root: Optional project root directory for boundary checks
|
|
38
|
+
"""
|
|
39
|
+
self.boundary_manager: ProjectBoundaryManager | None
|
|
40
|
+
|
|
41
|
+
# Ensure project_root is properly resolved if provided
|
|
42
|
+
if project_root:
|
|
43
|
+
try:
|
|
44
|
+
resolved_root = str(Path(project_root).resolve())
|
|
45
|
+
self.boundary_manager = ProjectBoundaryManager(resolved_root)
|
|
46
|
+
log_debug(
|
|
47
|
+
f"SecurityValidator initialized with resolved project_root: {resolved_root}"
|
|
48
|
+
)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
log_warning(
|
|
51
|
+
f"Failed to initialize ProjectBoundaryManager with {project_root}: {e}"
|
|
52
|
+
)
|
|
53
|
+
self.boundary_manager = None
|
|
54
|
+
else:
|
|
55
|
+
self.boundary_manager = None
|
|
56
|
+
|
|
57
|
+
self.regex_checker = RegexSafetyChecker()
|
|
58
|
+
|
|
59
|
+
log_debug(f"SecurityValidator initialized with project_root: {project_root}")
|
|
60
|
+
|
|
61
|
+
def validate_file_path(
|
|
62
|
+
self, file_path: str, base_path: str | None = None
|
|
63
|
+
) -> tuple[bool, str]:
|
|
64
|
+
"""
|
|
65
|
+
Validate file path with comprehensive security checks.
|
|
66
|
+
|
|
67
|
+
Implements multi-layer defense against path traversal attacks
|
|
68
|
+
and ensures file access stays within project boundaries.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
file_path: File path to validate
|
|
72
|
+
base_path: Optional base path for relative path validation
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Tuple of (is_valid, error_message)
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
>>> validator = SecurityValidator("/project/root")
|
|
79
|
+
>>> is_valid, error = validator.validate_file_path("src/main.py")
|
|
80
|
+
>>> assert is_valid
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
# Layer 1: Basic input validation
|
|
84
|
+
if not file_path or not isinstance(file_path, str):
|
|
85
|
+
return False, "File path must be a non-empty string"
|
|
86
|
+
|
|
87
|
+
# Layer 2: Null byte injection check
|
|
88
|
+
if "\x00" in file_path:
|
|
89
|
+
log_warning(f"Null byte detected in file path: {file_path}")
|
|
90
|
+
return False, "File path contains null bytes"
|
|
91
|
+
|
|
92
|
+
# Layer 3: Windows drive letter check (only on non-Windows systems)
|
|
93
|
+
is_valid, error = self._validate_windows_drive_letter(file_path)
|
|
94
|
+
if not is_valid:
|
|
95
|
+
return False, error
|
|
96
|
+
|
|
97
|
+
# Layer 4: Absolute path security validation
|
|
98
|
+
if Path(file_path).is_absolute() or file_path.startswith(("/", "\\")):
|
|
99
|
+
is_valid, error = self._validate_absolute_path(file_path)
|
|
100
|
+
if not is_valid:
|
|
101
|
+
return False, error
|
|
102
|
+
|
|
103
|
+
# Layer 5: Path normalization and traversal check
|
|
104
|
+
is_valid, error = self._validate_path_traversal(file_path)
|
|
105
|
+
if not is_valid:
|
|
106
|
+
return False, error
|
|
107
|
+
|
|
108
|
+
# Layer 6: Project boundary validation
|
|
109
|
+
is_valid, error = self._validate_project_boundary(file_path, base_path)
|
|
110
|
+
if not is_valid:
|
|
111
|
+
return False, error
|
|
112
|
+
|
|
113
|
+
# Layer 7: Symbolic link and junction check (check both original and resolved paths)
|
|
114
|
+
# First check the original file_path directly for symlinks and junctions
|
|
115
|
+
try:
|
|
116
|
+
original_path = Path(file_path)
|
|
117
|
+
log_debug(f"Checking symlink status for original path: {original_path}")
|
|
118
|
+
# Check for symlinks even if the file doesn't exist yet (broken symlinks)
|
|
119
|
+
is_symlink = original_path.is_symlink()
|
|
120
|
+
log_debug(f"original_path.is_symlink() = {is_symlink}")
|
|
121
|
+
if is_symlink:
|
|
122
|
+
log_warning(
|
|
123
|
+
f"Symbolic link detected in original path: {original_path}"
|
|
124
|
+
)
|
|
125
|
+
return False, "Symbolic links are not allowed"
|
|
126
|
+
|
|
127
|
+
# Additional check for Windows junctions and reparse points (only if exists)
|
|
128
|
+
if original_path.exists() and self._is_junction_or_reparse_point(
|
|
129
|
+
original_path
|
|
130
|
+
):
|
|
131
|
+
log_warning(
|
|
132
|
+
f"Junction or reparse point detected in original path: {original_path}"
|
|
133
|
+
)
|
|
134
|
+
return False, "Junctions and reparse points are not allowed"
|
|
135
|
+
|
|
136
|
+
except (OSError, PermissionError) as e:
|
|
137
|
+
# If we can't check symlink status, continue with other checks
|
|
138
|
+
log_debug(f"Exception checking symlink status: {e}")
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
# Then check the full path (base_path + file_path) if base_path is provided
|
|
142
|
+
if base_path:
|
|
143
|
+
norm_path = str(Path(file_path))
|
|
144
|
+
full_path = Path(base_path) / norm_path
|
|
145
|
+
|
|
146
|
+
# Check if the full path is a symlink or junction
|
|
147
|
+
try:
|
|
148
|
+
# Check for symlinks even if the file doesn't exist yet (broken symlinks)
|
|
149
|
+
if full_path.is_symlink():
|
|
150
|
+
log_warning(f"Symbolic link detected: {full_path}")
|
|
151
|
+
return False, "Symbolic links are not allowed"
|
|
152
|
+
|
|
153
|
+
# Additional check for Windows junctions and reparse points (only if exists)
|
|
154
|
+
if full_path.exists() and self._is_junction_or_reparse_point(
|
|
155
|
+
full_path
|
|
156
|
+
):
|
|
157
|
+
log_warning(f"Junction or reparse point detected: {full_path}")
|
|
158
|
+
return False, "Junctions and reparse points are not allowed"
|
|
159
|
+
|
|
160
|
+
except (OSError, PermissionError):
|
|
161
|
+
# If we can't check symlink status due to permissions, be cautious
|
|
162
|
+
log_warning(f"Cannot verify symlink status for: {full_path}")
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
# Check parent directories for junctions (Windows-specific security measure)
|
|
166
|
+
try:
|
|
167
|
+
if self._has_junction_in_path(full_path):
|
|
168
|
+
log_warning(f"Junction detected in path hierarchy: {full_path}")
|
|
169
|
+
return False, "Paths containing junctions are not allowed"
|
|
170
|
+
except (OSError, PermissionError):
|
|
171
|
+
# If we can't check parent directories, continue
|
|
172
|
+
pass
|
|
173
|
+
else:
|
|
174
|
+
# For absolute paths or when no base_path is provided, use original_path
|
|
175
|
+
full_path = original_path
|
|
176
|
+
|
|
177
|
+
# Check parent directories for junctions
|
|
178
|
+
try:
|
|
179
|
+
if self._has_junction_in_path(full_path):
|
|
180
|
+
log_warning(f"Junction detected in path hierarchy: {full_path}")
|
|
181
|
+
return False, "Paths containing junctions are not allowed"
|
|
182
|
+
except (OSError, PermissionError):
|
|
183
|
+
# If we can't check parent directories, continue
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
log_debug(f"File path validation passed: {file_path}")
|
|
187
|
+
return True, ""
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
log_warning(f"File path validation error: {e}")
|
|
191
|
+
return False, f"Validation error: {str(e)}"
|
|
192
|
+
|
|
193
|
+
def validate_directory_path(
|
|
194
|
+
self, dir_path: str, must_exist: bool = True
|
|
195
|
+
) -> tuple[bool, str]:
|
|
196
|
+
"""
|
|
197
|
+
Validate directory path for security and existence.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
dir_path: Directory path to validate
|
|
201
|
+
must_exist: Whether directory must exist
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Tuple of (is_valid, error_message)
|
|
205
|
+
"""
|
|
206
|
+
try:
|
|
207
|
+
# Basic validation using file path validator
|
|
208
|
+
is_valid, error = self.validate_file_path(dir_path)
|
|
209
|
+
if not is_valid:
|
|
210
|
+
return False, error
|
|
211
|
+
|
|
212
|
+
# Check if path exists and is directory
|
|
213
|
+
if must_exist:
|
|
214
|
+
dir_path_obj = Path(dir_path)
|
|
215
|
+
if not dir_path_obj.exists():
|
|
216
|
+
return False, f"Directory does not exist: {dir_path}"
|
|
217
|
+
|
|
218
|
+
if not dir_path_obj.is_dir():
|
|
219
|
+
return False, f"Path is not a directory: {dir_path}"
|
|
220
|
+
|
|
221
|
+
log_debug(f"Directory path validation passed: {dir_path}")
|
|
222
|
+
return True, ""
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
log_warning(f"Directory path validation error: {e}")
|
|
226
|
+
return False, f"Validation error: {str(e)}"
|
|
227
|
+
|
|
228
|
+
def validate_regex_pattern(self, pattern: str) -> tuple[bool, str]:
|
|
229
|
+
"""
|
|
230
|
+
Validate regex pattern for ReDoS attack prevention.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
pattern: Regex pattern to validate
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Tuple of (is_valid, error_message)
|
|
237
|
+
"""
|
|
238
|
+
return self.regex_checker.validate_pattern(pattern)
|
|
239
|
+
|
|
240
|
+
def sanitize_input(self, user_input: str, max_length: int = 1000) -> str:
|
|
241
|
+
"""
|
|
242
|
+
Sanitize user input by removing dangerous characters.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
user_input: Input string to sanitize
|
|
246
|
+
max_length: Maximum allowed length
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Sanitized input string
|
|
250
|
+
|
|
251
|
+
Raises:
|
|
252
|
+
SecurityError: If input is too long or contains dangerous content
|
|
253
|
+
"""
|
|
254
|
+
if not isinstance(user_input, str):
|
|
255
|
+
raise SecurityError("Input must be a string")
|
|
256
|
+
|
|
257
|
+
if len(user_input) > max_length:
|
|
258
|
+
raise SecurityError(f"Input too long: {len(user_input)} > {max_length}")
|
|
259
|
+
|
|
260
|
+
# Remove null bytes and control characters
|
|
261
|
+
sanitized = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", user_input)
|
|
262
|
+
|
|
263
|
+
# Remove HTML/XML tags for XSS prevention
|
|
264
|
+
sanitized = re.sub(r"<[^>]*>", "", sanitized)
|
|
265
|
+
|
|
266
|
+
# Remove potentially dangerous characters
|
|
267
|
+
sanitized = re.sub(r'[<>"\']', "", sanitized)
|
|
268
|
+
|
|
269
|
+
# Log if sanitization occurred
|
|
270
|
+
if sanitized != user_input:
|
|
271
|
+
log_warning("Input sanitization performed")
|
|
272
|
+
|
|
273
|
+
return sanitized
|
|
274
|
+
|
|
275
|
+
def validate_glob_pattern(self, pattern: str) -> tuple[bool, str]:
|
|
276
|
+
"""
|
|
277
|
+
Validate glob pattern for safe file matching.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
pattern: Glob pattern to validate
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Tuple of (is_valid, error_message)
|
|
284
|
+
"""
|
|
285
|
+
try:
|
|
286
|
+
# Basic input validation
|
|
287
|
+
if not pattern or not isinstance(pattern, str):
|
|
288
|
+
return False, "Pattern must be a non-empty string"
|
|
289
|
+
|
|
290
|
+
# Check for dangerous patterns
|
|
291
|
+
dangerous_patterns = [
|
|
292
|
+
"..", # Path traversal
|
|
293
|
+
"//", # Double slashes
|
|
294
|
+
"\\\\", # Double backslashes
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
for dangerous in dangerous_patterns:
|
|
298
|
+
if dangerous in pattern:
|
|
299
|
+
return False, f"Dangerous pattern detected: {dangerous}"
|
|
300
|
+
|
|
301
|
+
# Validate length
|
|
302
|
+
if len(pattern) > 500:
|
|
303
|
+
return False, "Pattern too long"
|
|
304
|
+
|
|
305
|
+
log_debug(f"Glob pattern validation passed: {pattern}")
|
|
306
|
+
return True, ""
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
log_warning(f"Glob pattern validation error: {e}")
|
|
310
|
+
return False, f"Validation error: {str(e)}"
|
|
311
|
+
|
|
312
|
+
def validate_path(
|
|
313
|
+
self, path: str, base_path: str | None = None
|
|
314
|
+
) -> tuple[bool, str]:
|
|
315
|
+
"""
|
|
316
|
+
Alias for validate_file_path for backward compatibility.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
path: Path to validate
|
|
320
|
+
base_path: Optional base path for relative path validation
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Tuple of (is_valid, error_message)
|
|
324
|
+
"""
|
|
325
|
+
return self.validate_file_path(path, base_path)
|
|
326
|
+
|
|
327
|
+
def is_safe_path(self, path: str, base_path: str | None = None) -> bool:
|
|
328
|
+
"""
|
|
329
|
+
Check if a path is safe (backward compatibility method).
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
path: Path to check
|
|
333
|
+
base_path: Optional base path for relative path validation
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
True if path is safe, False otherwise
|
|
337
|
+
"""
|
|
338
|
+
is_valid, _ = self.validate_file_path(path, base_path)
|
|
339
|
+
return is_valid
|
|
340
|
+
|
|
341
|
+
def _is_junction_or_reparse_point(self, path: Path) -> bool:
|
|
342
|
+
"""
|
|
343
|
+
Check if a path is a Windows junction or reparse point.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
path: Path to check
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
True if the path is a junction or reparse point
|
|
350
|
+
"""
|
|
351
|
+
try:
|
|
352
|
+
import platform
|
|
353
|
+
|
|
354
|
+
if platform.system() != "Windows":
|
|
355
|
+
return False
|
|
356
|
+
|
|
357
|
+
# On Windows, check for reparse points using stat
|
|
358
|
+
import stat
|
|
359
|
+
|
|
360
|
+
if path.exists():
|
|
361
|
+
path_stat = path.stat()
|
|
362
|
+
# Check if it has the reparse point attribute
|
|
363
|
+
if hasattr(stat, "FILE_ATTRIBUTE_REPARSE_POINT"):
|
|
364
|
+
return bool(
|
|
365
|
+
path_stat.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# Alternative method using Windows API
|
|
369
|
+
try:
|
|
370
|
+
import ctypes
|
|
371
|
+
from ctypes import wintypes
|
|
372
|
+
|
|
373
|
+
# GetFileAttributesW function
|
|
374
|
+
_GetFileAttributesW = ctypes.windll.kernel32.GetFileAttributesW
|
|
375
|
+
_GetFileAttributesW.argtypes = [wintypes.LPCWSTR]
|
|
376
|
+
_GetFileAttributesW.restype = wintypes.DWORD
|
|
377
|
+
|
|
378
|
+
FILE_ATTRIBUTE_REPARSE_POINT = 0x400
|
|
379
|
+
INVALID_FILE_ATTRIBUTES = 0xFFFFFFFF
|
|
380
|
+
|
|
381
|
+
attributes = _GetFileAttributesW(str(path))
|
|
382
|
+
if attributes != INVALID_FILE_ATTRIBUTES:
|
|
383
|
+
return bool(attributes & FILE_ATTRIBUTE_REPARSE_POINT)
|
|
384
|
+
|
|
385
|
+
except (ImportError, AttributeError, OSError):
|
|
386
|
+
pass # nosec
|
|
387
|
+
|
|
388
|
+
except Exception:
|
|
389
|
+
# If any error occurs, assume it's not a junction for safety
|
|
390
|
+
pass # nosec
|
|
391
|
+
|
|
392
|
+
return False
|
|
393
|
+
|
|
394
|
+
def _has_junction_in_path(self, path: Path) -> bool:
|
|
395
|
+
"""
|
|
396
|
+
Check if any parent directory in the path is a junction.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
path: Path to check
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
True if any parent directory is a junction
|
|
403
|
+
"""
|
|
404
|
+
try:
|
|
405
|
+
current_path = path.resolve() if path.exists() else path
|
|
406
|
+
|
|
407
|
+
# Check each parent directory
|
|
408
|
+
for parent in current_path.parents:
|
|
409
|
+
if self._is_junction_or_reparse_point(parent):
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
except Exception:
|
|
413
|
+
# If any error occurs, assume no junctions for safety
|
|
414
|
+
pass # nosec
|
|
415
|
+
|
|
416
|
+
return False
|
|
417
|
+
|
|
418
|
+
def _validate_windows_drive_letter(self, file_path: str) -> tuple[bool, str]:
|
|
419
|
+
"""
|
|
420
|
+
Validate Windows drive letter on non-Windows systems.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
file_path: File path to validate
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Tuple of (is_valid, error_message)
|
|
427
|
+
"""
|
|
428
|
+
import platform
|
|
429
|
+
|
|
430
|
+
if (
|
|
431
|
+
len(file_path) > 1
|
|
432
|
+
and file_path[1] == ":"
|
|
433
|
+
and platform.system() != "Windows"
|
|
434
|
+
):
|
|
435
|
+
return (
|
|
436
|
+
False,
|
|
437
|
+
f"Windows drive letters are not allowed on {platform.system()} system",
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
return True, ""
|
|
441
|
+
|
|
442
|
+
def _validate_absolute_path(self, file_path: str) -> tuple[bool, str]:
|
|
443
|
+
"""
|
|
444
|
+
Validate absolute path with project boundary and test environment checks.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
file_path: Absolute file path to validate
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Tuple of (is_valid, error_message)
|
|
451
|
+
"""
|
|
452
|
+
log_debug(f"Processing absolute path: {file_path}")
|
|
453
|
+
|
|
454
|
+
# Check project boundaries first (highest priority)
|
|
455
|
+
if self.boundary_manager and self.boundary_manager.project_root:
|
|
456
|
+
if not self.boundary_manager.is_within_project(file_path):
|
|
457
|
+
return False, "Absolute path must be within project directory"
|
|
458
|
+
log_debug("Absolute path is within project boundaries")
|
|
459
|
+
return True, ""
|
|
460
|
+
|
|
461
|
+
# If no project boundaries, check test environment allowances
|
|
462
|
+
is_test_allowed, error = self._check_test_environment_access(file_path)
|
|
463
|
+
if not is_test_allowed:
|
|
464
|
+
return False, error
|
|
465
|
+
|
|
466
|
+
log_debug("Absolute path allowed in test environment")
|
|
467
|
+
return True, ""
|
|
468
|
+
|
|
469
|
+
def _check_test_environment_access(self, file_path: str) -> tuple[bool, str]:
|
|
470
|
+
"""
|
|
471
|
+
Check if absolute path access is allowed in test/development environment.
|
|
472
|
+
|
|
473
|
+
This method allows access to system temporary directories when no project
|
|
474
|
+
boundaries are configured, which is common in test environments.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
file_path: File path to check
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
Tuple of (is_allowed, error_message)
|
|
481
|
+
"""
|
|
482
|
+
import os
|
|
483
|
+
import tempfile
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
# Check if we're in a test environment
|
|
487
|
+
is_test_env = (
|
|
488
|
+
"pytest" in os.environ.get("_", "")
|
|
489
|
+
or "PYTEST_CURRENT_TEST" in os.environ
|
|
490
|
+
or "CI" in os.environ
|
|
491
|
+
or "GITHUB_ACTIONS" in os.environ
|
|
492
|
+
or any(
|
|
493
|
+
"test" in arg.lower()
|
|
494
|
+
for arg in getattr(getattr(os, "sys", None), "argv", [])
|
|
495
|
+
if hasattr(os, "sys")
|
|
496
|
+
)
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
if is_test_env:
|
|
500
|
+
log_debug("Test environment detected - allowing temporary file access")
|
|
501
|
+
|
|
502
|
+
# Allow access to common temporary directories
|
|
503
|
+
temp_dirs = [
|
|
504
|
+
Path(tempfile.gettempdir()).resolve(),
|
|
505
|
+
Path("/tmp").resolve() if Path("/tmp").exists() else None, # nosec
|
|
506
|
+
(Path("/var/tmp").resolve() if Path("/var/tmp").exists() else None), # nosec
|
|
507
|
+
]
|
|
508
|
+
|
|
509
|
+
real_path = Path(file_path).resolve()
|
|
510
|
+
log_debug(f"Checking test environment access: {real_path}")
|
|
511
|
+
|
|
512
|
+
for temp_dir in temp_dirs:
|
|
513
|
+
if temp_dir and temp_dir.exists():
|
|
514
|
+
try:
|
|
515
|
+
real_path.relative_to(temp_dir)
|
|
516
|
+
log_debug(
|
|
517
|
+
f"Path is under temp directory {temp_dir} - allowed in test environment"
|
|
518
|
+
)
|
|
519
|
+
return True, ""
|
|
520
|
+
except ValueError:
|
|
521
|
+
continue
|
|
522
|
+
|
|
523
|
+
# In test environment, also allow access to files that start with temp file patterns
|
|
524
|
+
file_name = Path(file_path).name
|
|
525
|
+
if (
|
|
526
|
+
file_name.startswith(("tmp", "temp"))
|
|
527
|
+
or "_test_" in file_name
|
|
528
|
+
or file_name.endswith(("_test.py", "_test.js", ".tmp"))
|
|
529
|
+
):
|
|
530
|
+
log_debug(
|
|
531
|
+
"Temporary test file pattern detected - allowed in test environment"
|
|
532
|
+
)
|
|
533
|
+
return True, ""
|
|
534
|
+
|
|
535
|
+
# Fallback to original temp directory check
|
|
536
|
+
temp_dir = Path(tempfile.gettempdir()).resolve()
|
|
537
|
+
real_path = Path(file_path).resolve()
|
|
538
|
+
|
|
539
|
+
log_debug(f"Checking test environment access: {real_path} under {temp_dir}")
|
|
540
|
+
|
|
541
|
+
# Allow access under system temp directory (safe sandbox)
|
|
542
|
+
real_path.relative_to(temp_dir)
|
|
543
|
+
log_debug(
|
|
544
|
+
"Path is under system temp directory - allowed in test environment"
|
|
545
|
+
)
|
|
546
|
+
return True, ""
|
|
547
|
+
|
|
548
|
+
except ValueError:
|
|
549
|
+
return False, "Absolute file paths are not allowed"
|
|
550
|
+
except Exception as e:
|
|
551
|
+
log_debug(f"Error in test environment check: {e}")
|
|
552
|
+
return False, "Absolute file paths are not allowed"
|
|
553
|
+
|
|
554
|
+
def _validate_path_traversal(self, file_path: str) -> tuple[bool, str]:
|
|
555
|
+
"""
|
|
556
|
+
Validate file path for directory traversal attempts.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
file_path: File path to validate
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
Tuple of (is_valid, error_message)
|
|
563
|
+
"""
|
|
564
|
+
norm_path = str(Path(file_path))
|
|
565
|
+
|
|
566
|
+
# Check for various path traversal patterns
|
|
567
|
+
traversal_patterns = ["..\\", "../", ".."]
|
|
568
|
+
|
|
569
|
+
if any(
|
|
570
|
+
pattern in norm_path for pattern in traversal_patterns[:2]
|
|
571
|
+
) or norm_path.startswith(traversal_patterns[2]):
|
|
572
|
+
log_warning(f"Path traversal attempt detected: {file_path} -> {norm_path}")
|
|
573
|
+
return False, "Directory traversal not allowed"
|
|
574
|
+
|
|
575
|
+
return True, ""
|
|
576
|
+
|
|
577
|
+
def _validate_project_boundary(
|
|
578
|
+
self, file_path: str, base_path: str | None
|
|
579
|
+
) -> tuple[bool, str]:
|
|
580
|
+
"""
|
|
581
|
+
Validate file path against project boundaries when base_path is provided.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
file_path: File path to validate
|
|
585
|
+
base_path: Base path for relative path validation
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
Tuple of (is_valid, error_message)
|
|
589
|
+
"""
|
|
590
|
+
if not (self.boundary_manager and base_path):
|
|
591
|
+
return True, ""
|
|
592
|
+
|
|
593
|
+
norm_path = str(Path(file_path))
|
|
594
|
+
full_path = str(Path(base_path) / norm_path)
|
|
595
|
+
|
|
596
|
+
if not self.boundary_manager.is_within_project(full_path):
|
|
597
|
+
return (False, "Access denied. File path must be within project directory")
|
|
598
|
+
|
|
599
|
+
return True, ""
|