tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,599 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Security Validator for Tree-sitter Analyzer
4
+
5
+ Provides unified security validation framework inspired by code-index-mcp's
6
+ ValidationHelper but enhanced for tree-sitter analyzer's requirements.
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+
12
+ from ..exceptions import SecurityError
13
+ from ..utils import log_debug, log_warning
14
+ from .boundary_manager import ProjectBoundaryManager
15
+ from .regex_checker import RegexSafetyChecker
16
+
17
+
18
+ class SecurityValidator:
19
+ """
20
+ Unified security validation framework.
21
+
22
+ This class provides comprehensive security validation for file paths,
23
+ regex patterns, and other user inputs to prevent security vulnerabilities.
24
+
25
+ Features:
26
+ - Multi-layer path traversal protection
27
+ - Project boundary enforcement
28
+ - ReDoS attack prevention
29
+ - Input sanitization
30
+ """
31
+
32
+ def __init__(self, project_root: str | None = None) -> None:
33
+ """
34
+ Initialize security validator.
35
+
36
+ Args:
37
+ project_root: Optional project root directory for boundary checks
38
+ """
39
+ self.boundary_manager: ProjectBoundaryManager | None
40
+
41
+ # Ensure project_root is properly resolved if provided
42
+ if project_root:
43
+ try:
44
+ resolved_root = str(Path(project_root).resolve())
45
+ self.boundary_manager = ProjectBoundaryManager(resolved_root)
46
+ log_debug(
47
+ f"SecurityValidator initialized with resolved project_root: {resolved_root}"
48
+ )
49
+ except Exception as e:
50
+ log_warning(
51
+ f"Failed to initialize ProjectBoundaryManager with {project_root}: {e}"
52
+ )
53
+ self.boundary_manager = None
54
+ else:
55
+ self.boundary_manager = None
56
+
57
+ self.regex_checker = RegexSafetyChecker()
58
+
59
+ log_debug(f"SecurityValidator initialized with project_root: {project_root}")
60
+
61
+ def validate_file_path(
62
+ self, file_path: str, base_path: str | None = None
63
+ ) -> tuple[bool, str]:
64
+ """
65
+ Validate file path with comprehensive security checks.
66
+
67
+ Implements multi-layer defense against path traversal attacks
68
+ and ensures file access stays within project boundaries.
69
+
70
+ Args:
71
+ file_path: File path to validate
72
+ base_path: Optional base path for relative path validation
73
+
74
+ Returns:
75
+ Tuple of (is_valid, error_message)
76
+
77
+ Example:
78
+ >>> validator = SecurityValidator("/project/root")
79
+ >>> is_valid, error = validator.validate_file_path("src/main.py")
80
+ >>> assert is_valid
81
+ """
82
+ try:
83
+ # Layer 1: Basic input validation
84
+ if not file_path or not isinstance(file_path, str):
85
+ return False, "File path must be a non-empty string"
86
+
87
+ # Layer 2: Null byte injection check
88
+ if "\x00" in file_path:
89
+ log_warning(f"Null byte detected in file path: {file_path}")
90
+ return False, "File path contains null bytes"
91
+
92
+ # Layer 3: Windows drive letter check (only on non-Windows systems)
93
+ is_valid, error = self._validate_windows_drive_letter(file_path)
94
+ if not is_valid:
95
+ return False, error
96
+
97
+ # Layer 4: Absolute path security validation
98
+ if Path(file_path).is_absolute() or file_path.startswith(("/", "\\")):
99
+ is_valid, error = self._validate_absolute_path(file_path)
100
+ if not is_valid:
101
+ return False, error
102
+
103
+ # Layer 5: Path normalization and traversal check
104
+ is_valid, error = self._validate_path_traversal(file_path)
105
+ if not is_valid:
106
+ return False, error
107
+
108
+ # Layer 6: Project boundary validation
109
+ is_valid, error = self._validate_project_boundary(file_path, base_path)
110
+ if not is_valid:
111
+ return False, error
112
+
113
+ # Layer 7: Symbolic link and junction check (check both original and resolved paths)
114
+ # First check the original file_path directly for symlinks and junctions
115
+ try:
116
+ original_path = Path(file_path)
117
+ log_debug(f"Checking symlink status for original path: {original_path}")
118
+ # Check for symlinks even if the file doesn't exist yet (broken symlinks)
119
+ is_symlink = original_path.is_symlink()
120
+ log_debug(f"original_path.is_symlink() = {is_symlink}")
121
+ if is_symlink:
122
+ log_warning(
123
+ f"Symbolic link detected in original path: {original_path}"
124
+ )
125
+ return False, "Symbolic links are not allowed"
126
+
127
+ # Additional check for Windows junctions and reparse points (only if exists)
128
+ if original_path.exists() and self._is_junction_or_reparse_point(
129
+ original_path
130
+ ):
131
+ log_warning(
132
+ f"Junction or reparse point detected in original path: {original_path}"
133
+ )
134
+ return False, "Junctions and reparse points are not allowed"
135
+
136
+ except (OSError, PermissionError) as e:
137
+ # If we can't check symlink status, continue with other checks
138
+ log_debug(f"Exception checking symlink status: {e}")
139
+ pass
140
+
141
+ # Then check the full path (base_path + file_path) if base_path is provided
142
+ if base_path:
143
+ norm_path = str(Path(file_path))
144
+ full_path = Path(base_path) / norm_path
145
+
146
+ # Check if the full path is a symlink or junction
147
+ try:
148
+ # Check for symlinks even if the file doesn't exist yet (broken symlinks)
149
+ if full_path.is_symlink():
150
+ log_warning(f"Symbolic link detected: {full_path}")
151
+ return False, "Symbolic links are not allowed"
152
+
153
+ # Additional check for Windows junctions and reparse points (only if exists)
154
+ if full_path.exists() and self._is_junction_or_reparse_point(
155
+ full_path
156
+ ):
157
+ log_warning(f"Junction or reparse point detected: {full_path}")
158
+ return False, "Junctions and reparse points are not allowed"
159
+
160
+ except (OSError, PermissionError):
161
+ # If we can't check symlink status due to permissions, be cautious
162
+ log_warning(f"Cannot verify symlink status for: {full_path}")
163
+ pass
164
+
165
+ # Check parent directories for junctions (Windows-specific security measure)
166
+ try:
167
+ if self._has_junction_in_path(full_path):
168
+ log_warning(f"Junction detected in path hierarchy: {full_path}")
169
+ return False, "Paths containing junctions are not allowed"
170
+ except (OSError, PermissionError):
171
+ # If we can't check parent directories, continue
172
+ pass
173
+ else:
174
+ # For absolute paths or when no base_path is provided, use original_path
175
+ full_path = original_path
176
+
177
+ # Check parent directories for junctions
178
+ try:
179
+ if self._has_junction_in_path(full_path):
180
+ log_warning(f"Junction detected in path hierarchy: {full_path}")
181
+ return False, "Paths containing junctions are not allowed"
182
+ except (OSError, PermissionError):
183
+ # If we can't check parent directories, continue
184
+ pass
185
+
186
+ log_debug(f"File path validation passed: {file_path}")
187
+ return True, ""
188
+
189
+ except Exception as e:
190
+ log_warning(f"File path validation error: {e}")
191
+ return False, f"Validation error: {str(e)}"
192
+
193
+ def validate_directory_path(
194
+ self, dir_path: str, must_exist: bool = True
195
+ ) -> tuple[bool, str]:
196
+ """
197
+ Validate directory path for security and existence.
198
+
199
+ Args:
200
+ dir_path: Directory path to validate
201
+ must_exist: Whether directory must exist
202
+
203
+ Returns:
204
+ Tuple of (is_valid, error_message)
205
+ """
206
+ try:
207
+ # Basic validation using file path validator
208
+ is_valid, error = self.validate_file_path(dir_path)
209
+ if not is_valid:
210
+ return False, error
211
+
212
+ # Check if path exists and is directory
213
+ if must_exist:
214
+ dir_path_obj = Path(dir_path)
215
+ if not dir_path_obj.exists():
216
+ return False, f"Directory does not exist: {dir_path}"
217
+
218
+ if not dir_path_obj.is_dir():
219
+ return False, f"Path is not a directory: {dir_path}"
220
+
221
+ log_debug(f"Directory path validation passed: {dir_path}")
222
+ return True, ""
223
+
224
+ except Exception as e:
225
+ log_warning(f"Directory path validation error: {e}")
226
+ return False, f"Validation error: {str(e)}"
227
+
228
+ def validate_regex_pattern(self, pattern: str) -> tuple[bool, str]:
229
+ """
230
+ Validate regex pattern for ReDoS attack prevention.
231
+
232
+ Args:
233
+ pattern: Regex pattern to validate
234
+
235
+ Returns:
236
+ Tuple of (is_valid, error_message)
237
+ """
238
+ return self.regex_checker.validate_pattern(pattern)
239
+
240
+ def sanitize_input(self, user_input: str, max_length: int = 1000) -> str:
241
+ """
242
+ Sanitize user input by removing dangerous characters.
243
+
244
+ Args:
245
+ user_input: Input string to sanitize
246
+ max_length: Maximum allowed length
247
+
248
+ Returns:
249
+ Sanitized input string
250
+
251
+ Raises:
252
+ SecurityError: If input is too long or contains dangerous content
253
+ """
254
+ if not isinstance(user_input, str):
255
+ raise SecurityError("Input must be a string")
256
+
257
+ if len(user_input) > max_length:
258
+ raise SecurityError(f"Input too long: {len(user_input)} > {max_length}")
259
+
260
+ # Remove null bytes and control characters
261
+ sanitized = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", user_input)
262
+
263
+ # Remove HTML/XML tags for XSS prevention
264
+ sanitized = re.sub(r"<[^>]*>", "", sanitized)
265
+
266
+ # Remove potentially dangerous characters
267
+ sanitized = re.sub(r'[<>"\']', "", sanitized)
268
+
269
+ # Log if sanitization occurred
270
+ if sanitized != user_input:
271
+ log_warning("Input sanitization performed")
272
+
273
+ return sanitized
274
+
275
+ def validate_glob_pattern(self, pattern: str) -> tuple[bool, str]:
276
+ """
277
+ Validate glob pattern for safe file matching.
278
+
279
+ Args:
280
+ pattern: Glob pattern to validate
281
+
282
+ Returns:
283
+ Tuple of (is_valid, error_message)
284
+ """
285
+ try:
286
+ # Basic input validation
287
+ if not pattern or not isinstance(pattern, str):
288
+ return False, "Pattern must be a non-empty string"
289
+
290
+ # Check for dangerous patterns
291
+ dangerous_patterns = [
292
+ "..", # Path traversal
293
+ "//", # Double slashes
294
+ "\\\\", # Double backslashes
295
+ ]
296
+
297
+ for dangerous in dangerous_patterns:
298
+ if dangerous in pattern:
299
+ return False, f"Dangerous pattern detected: {dangerous}"
300
+
301
+ # Validate length
302
+ if len(pattern) > 500:
303
+ return False, "Pattern too long"
304
+
305
+ log_debug(f"Glob pattern validation passed: {pattern}")
306
+ return True, ""
307
+
308
+ except Exception as e:
309
+ log_warning(f"Glob pattern validation error: {e}")
310
+ return False, f"Validation error: {str(e)}"
311
+
312
+ def validate_path(
313
+ self, path: str, base_path: str | None = None
314
+ ) -> tuple[bool, str]:
315
+ """
316
+ Alias for validate_file_path for backward compatibility.
317
+
318
+ Args:
319
+ path: Path to validate
320
+ base_path: Optional base path for relative path validation
321
+
322
+ Returns:
323
+ Tuple of (is_valid, error_message)
324
+ """
325
+ return self.validate_file_path(path, base_path)
326
+
327
+ def is_safe_path(self, path: str, base_path: str | None = None) -> bool:
328
+ """
329
+ Check if a path is safe (backward compatibility method).
330
+
331
+ Args:
332
+ path: Path to check
333
+ base_path: Optional base path for relative path validation
334
+
335
+ Returns:
336
+ True if path is safe, False otherwise
337
+ """
338
+ is_valid, _ = self.validate_file_path(path, base_path)
339
+ return is_valid
340
+
341
+ def _is_junction_or_reparse_point(self, path: Path) -> bool:
342
+ """
343
+ Check if a path is a Windows junction or reparse point.
344
+
345
+ Args:
346
+ path: Path to check
347
+
348
+ Returns:
349
+ True if the path is a junction or reparse point
350
+ """
351
+ try:
352
+ import platform
353
+
354
+ if platform.system() != "Windows":
355
+ return False
356
+
357
+ # On Windows, check for reparse points using stat
358
+ import stat
359
+
360
+ if path.exists():
361
+ path_stat = path.stat()
362
+ # Check if it has the reparse point attribute
363
+ if hasattr(stat, "FILE_ATTRIBUTE_REPARSE_POINT"):
364
+ return bool(
365
+ path_stat.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT
366
+ )
367
+
368
+ # Alternative method using Windows API
369
+ try:
370
+ import ctypes
371
+ from ctypes import wintypes
372
+
373
+ # GetFileAttributesW function
374
+ _GetFileAttributesW = ctypes.windll.kernel32.GetFileAttributesW
375
+ _GetFileAttributesW.argtypes = [wintypes.LPCWSTR]
376
+ _GetFileAttributesW.restype = wintypes.DWORD
377
+
378
+ FILE_ATTRIBUTE_REPARSE_POINT = 0x400
379
+ INVALID_FILE_ATTRIBUTES = 0xFFFFFFFF
380
+
381
+ attributes = _GetFileAttributesW(str(path))
382
+ if attributes != INVALID_FILE_ATTRIBUTES:
383
+ return bool(attributes & FILE_ATTRIBUTE_REPARSE_POINT)
384
+
385
+ except (ImportError, AttributeError, OSError):
386
+ pass # nosec
387
+
388
+ except Exception:
389
+ # If any error occurs, assume it's not a junction for safety
390
+ pass # nosec
391
+
392
+ return False
393
+
394
+ def _has_junction_in_path(self, path: Path) -> bool:
395
+ """
396
+ Check if any parent directory in the path is a junction.
397
+
398
+ Args:
399
+ path: Path to check
400
+
401
+ Returns:
402
+ True if any parent directory is a junction
403
+ """
404
+ try:
405
+ current_path = path.resolve() if path.exists() else path
406
+
407
+ # Check each parent directory
408
+ for parent in current_path.parents:
409
+ if self._is_junction_or_reparse_point(parent):
410
+ return True
411
+
412
+ except Exception:
413
+ # If any error occurs, assume no junctions for safety
414
+ pass # nosec
415
+
416
+ return False
417
+
418
+ def _validate_windows_drive_letter(self, file_path: str) -> tuple[bool, str]:
419
+ """
420
+ Validate Windows drive letter on non-Windows systems.
421
+
422
+ Args:
423
+ file_path: File path to validate
424
+
425
+ Returns:
426
+ Tuple of (is_valid, error_message)
427
+ """
428
+ import platform
429
+
430
+ if (
431
+ len(file_path) > 1
432
+ and file_path[1] == ":"
433
+ and platform.system() != "Windows"
434
+ ):
435
+ return (
436
+ False,
437
+ f"Windows drive letters are not allowed on {platform.system()} system",
438
+ )
439
+
440
+ return True, ""
441
+
442
+ def _validate_absolute_path(self, file_path: str) -> tuple[bool, str]:
443
+ """
444
+ Validate absolute path with project boundary and test environment checks.
445
+
446
+ Args:
447
+ file_path: Absolute file path to validate
448
+
449
+ Returns:
450
+ Tuple of (is_valid, error_message)
451
+ """
452
+ log_debug(f"Processing absolute path: {file_path}")
453
+
454
+ # Check project boundaries first (highest priority)
455
+ if self.boundary_manager and self.boundary_manager.project_root:
456
+ if not self.boundary_manager.is_within_project(file_path):
457
+ return False, "Absolute path must be within project directory"
458
+ log_debug("Absolute path is within project boundaries")
459
+ return True, ""
460
+
461
+ # If no project boundaries, check test environment allowances
462
+ is_test_allowed, error = self._check_test_environment_access(file_path)
463
+ if not is_test_allowed:
464
+ return False, error
465
+
466
+ log_debug("Absolute path allowed in test environment")
467
+ return True, ""
468
+
469
+ def _check_test_environment_access(self, file_path: str) -> tuple[bool, str]:
470
+ """
471
+ Check if absolute path access is allowed in test/development environment.
472
+
473
+ This method allows access to system temporary directories when no project
474
+ boundaries are configured, which is common in test environments.
475
+
476
+ Args:
477
+ file_path: File path to check
478
+
479
+ Returns:
480
+ Tuple of (is_allowed, error_message)
481
+ """
482
+ import os
483
+ import tempfile
484
+
485
+ try:
486
+ # Check if we're in a test environment
487
+ is_test_env = (
488
+ "pytest" in os.environ.get("_", "")
489
+ or "PYTEST_CURRENT_TEST" in os.environ
490
+ or "CI" in os.environ
491
+ or "GITHUB_ACTIONS" in os.environ
492
+ or any(
493
+ "test" in arg.lower()
494
+ for arg in getattr(getattr(os, "sys", None), "argv", [])
495
+ if hasattr(os, "sys")
496
+ )
497
+ )
498
+
499
+ if is_test_env:
500
+ log_debug("Test environment detected - allowing temporary file access")
501
+
502
+ # Allow access to common temporary directories
503
+ temp_dirs = [
504
+ Path(tempfile.gettempdir()).resolve(),
505
+ Path("/tmp").resolve() if Path("/tmp").exists() else None, # nosec
506
+ (Path("/var/tmp").resolve() if Path("/var/tmp").exists() else None), # nosec
507
+ ]
508
+
509
+ real_path = Path(file_path).resolve()
510
+ log_debug(f"Checking test environment access: {real_path}")
511
+
512
+ for temp_dir in temp_dirs:
513
+ if temp_dir and temp_dir.exists():
514
+ try:
515
+ real_path.relative_to(temp_dir)
516
+ log_debug(
517
+ f"Path is under temp directory {temp_dir} - allowed in test environment"
518
+ )
519
+ return True, ""
520
+ except ValueError:
521
+ continue
522
+
523
+ # In test environment, also allow access to files that start with temp file patterns
524
+ file_name = Path(file_path).name
525
+ if (
526
+ file_name.startswith(("tmp", "temp"))
527
+ or "_test_" in file_name
528
+ or file_name.endswith(("_test.py", "_test.js", ".tmp"))
529
+ ):
530
+ log_debug(
531
+ "Temporary test file pattern detected - allowed in test environment"
532
+ )
533
+ return True, ""
534
+
535
+ # Fallback to original temp directory check
536
+ temp_dir = Path(tempfile.gettempdir()).resolve()
537
+ real_path = Path(file_path).resolve()
538
+
539
+ log_debug(f"Checking test environment access: {real_path} under {temp_dir}")
540
+
541
+ # Allow access under system temp directory (safe sandbox)
542
+ real_path.relative_to(temp_dir)
543
+ log_debug(
544
+ "Path is under system temp directory - allowed in test environment"
545
+ )
546
+ return True, ""
547
+
548
+ except ValueError:
549
+ return False, "Absolute file paths are not allowed"
550
+ except Exception as e:
551
+ log_debug(f"Error in test environment check: {e}")
552
+ return False, "Absolute file paths are not allowed"
553
+
554
+ def _validate_path_traversal(self, file_path: str) -> tuple[bool, str]:
555
+ """
556
+ Validate file path for directory traversal attempts.
557
+
558
+ Args:
559
+ file_path: File path to validate
560
+
561
+ Returns:
562
+ Tuple of (is_valid, error_message)
563
+ """
564
+ norm_path = str(Path(file_path))
565
+
566
+ # Check for various path traversal patterns
567
+ traversal_patterns = ["..\\", "../", ".."]
568
+
569
+ if any(
570
+ pattern in norm_path for pattern in traversal_patterns[:2]
571
+ ) or norm_path.startswith(traversal_patterns[2]):
572
+ log_warning(f"Path traversal attempt detected: {file_path} -> {norm_path}")
573
+ return False, "Directory traversal not allowed"
574
+
575
+ return True, ""
576
+
577
+ def _validate_project_boundary(
578
+ self, file_path: str, base_path: str | None
579
+ ) -> tuple[bool, str]:
580
+ """
581
+ Validate file path against project boundaries when base_path is provided.
582
+
583
+ Args:
584
+ file_path: File path to validate
585
+ base_path: Base path for relative path validation
586
+
587
+ Returns:
588
+ Tuple of (is_valid, error_message)
589
+ """
590
+ if not (self.boundary_manager and base_path):
591
+ return True, ""
592
+
593
+ norm_path = str(Path(file_path))
594
+ full_path = str(Path(base_path) / norm_path)
595
+
596
+ if not self.boundary_manager.is_within_project(full_path):
597
+ return (False, "Access denied. File path must be within project directory")
598
+
599
+ return True, ""