tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,343 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Search Cache Module for MCP Tools
4
+
5
+ Provides basic caching functionality for search results to improve performance
6
+ by avoiding repeated expensive search operations.
7
+
8
+ This is a simplified version focusing on core caching features for Phase 2.
9
+ """
10
+
11
+ import logging
12
+ import threading
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class SearchCache:
21
+ """Thread-safe in-memory search result cache with TTL and LRU eviction"""
22
+
23
+ def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600):
24
+ """
25
+ Initialize the search cache.
26
+
27
+ Args:
28
+ max_size: Maximum number of cached entries
29
+ ttl_seconds: Time-to-live for cached entries in seconds (default: 1 hour)
30
+ """
31
+ self.cache: dict[str, dict[str, Any]] = {}
32
+ self.max_size = max_size
33
+ self.ttl_seconds = ttl_seconds
34
+ self._lock = threading.RLock() # Reentrant lock for thread safety
35
+ self._access_times: dict[str, float] = {} # Track access times for LRU
36
+
37
+ # Statistics
38
+ self._hits = 0
39
+ self._misses = 0
40
+ self._evictions = 0
41
+
42
+ def _is_expired(self, timestamp: float) -> bool:
43
+ """Check if a cache entry is expired"""
44
+ return time.time() - timestamp > self.ttl_seconds
45
+
46
+ def _cleanup_expired(self) -> None:
47
+ """Remove expired entries (should be called with lock held)"""
48
+ current_time = time.time()
49
+ expired_keys = [
50
+ key
51
+ for key, entry in self.cache.items()
52
+ if current_time - entry["timestamp"] > self.ttl_seconds
53
+ ]
54
+ for key in expired_keys:
55
+ del self.cache[key]
56
+ if key in self._access_times:
57
+ del self._access_times[key]
58
+
59
+ if expired_keys:
60
+ logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries")
61
+
62
+ def get(self, cache_key: str) -> Any:
63
+ """
64
+ Get cached result if valid.
65
+
66
+ Args:
67
+ cache_key: The cache key to look up
68
+
69
+ Returns:
70
+ Cached data if found and valid, None otherwise
71
+ """
72
+ with self._lock:
73
+ if cache_key in self.cache:
74
+ entry = self.cache[cache_key]
75
+ if not self._is_expired(entry["timestamp"]):
76
+ # Update access time for LRU
77
+ self._access_times[cache_key] = time.time()
78
+ self._hits += 1
79
+ logger.debug(f"Cache hit for key: {cache_key[:50]}...")
80
+ return entry["data"]
81
+ else:
82
+ # Remove expired entry
83
+ del self.cache[cache_key]
84
+ if cache_key in self._access_times:
85
+ del self._access_times[cache_key]
86
+
87
+ self._misses += 1
88
+ return None
89
+
90
+ def get_compatible_result(self, cache_key: str, requested_format: str) -> Any:
91
+ """
92
+ Get cached result and try to derive compatible formats.
93
+
94
+ This enables smart cross-format caching where count results can be used
95
+ to derive file lists without additional searches.
96
+
97
+ Args:
98
+ cache_key: The cache key
99
+ requested_format: The format being requested ('file_list', 'summary', etc.)
100
+
101
+ Returns:
102
+ Compatible cached data if derivable, None otherwise
103
+ """
104
+ # First try direct cache hit - but only if the format matches
105
+ direct_result = self.get(cache_key)
106
+ if direct_result is not None:
107
+ # Check if the cached result matches the requested format
108
+ if self._is_format_compatible(direct_result, requested_format):
109
+ return direct_result
110
+
111
+ # Try to find compatible cached results for derivation
112
+ # Look for count_only results that can derive file lists
113
+ if requested_format in ["file_list", "summary", "files_only"]:
114
+ # Look for a count_only version of the same search
115
+ count_key = self._derive_count_key_from_cache_key(cache_key)
116
+ if count_key and count_key != cache_key:
117
+ count_result = self.get(count_key)
118
+ if count_result and self._can_derive_file_list(count_result):
119
+ logger.debug(f"Deriving {requested_format} from cached count data")
120
+ return self._derive_file_list_result(count_result, requested_format)
121
+
122
+ return None
123
+
124
+ def _is_format_compatible(self, cached_result: Any, requested_format: str) -> bool:
125
+ """
126
+ Check if a cached result is compatible with the requested format.
127
+
128
+ This prevents returning wrong format data (e.g., returning integer total
129
+ when detailed results are requested).
130
+ """
131
+ if requested_format == "total_only":
132
+ # total_only expects a simple integer
133
+ return isinstance(cached_result, int)
134
+ elif requested_format == "count_only":
135
+ # count_only expects a dict with file_counts
136
+ return isinstance(cached_result, dict) and (
137
+ "file_counts" in cached_result or "count_only" in cached_result
138
+ )
139
+ elif requested_format in ["summary", "file_list", "files_only"]:
140
+ # These formats expect dict results with specific structures
141
+ return isinstance(cached_result, dict) and cached_result.get(
142
+ "success", False
143
+ )
144
+ elif requested_format in ["normal", "group_by_file"]:
145
+ # Normal format expects dict with matches, files, or results data
146
+ return isinstance(cached_result, dict) and (
147
+ "matches" in cached_result
148
+ or "files" in cached_result
149
+ or "results" in cached_result
150
+ )
151
+ else:
152
+ # For unknown formats or test scenarios, allow dict results but not primitives
153
+ # This maintains backward compatibility while preventing the integer bug
154
+ return isinstance(cached_result, dict)
155
+
156
+ def _derive_count_key_from_cache_key(self, cache_key: str) -> str | None:
157
+ """Try to derive what the count_only cache key would be for this search."""
158
+ # Simple heuristic: replace summary_only with count_only_matches
159
+ if "summary_only" in cache_key:
160
+ return cache_key.replace(
161
+ "'summary_only': True", "'count_only_matches': True"
162
+ )
163
+ elif "count_only_matches" not in cache_key:
164
+ # Add count_only_matches parameter
165
+ return cache_key.replace("}", ", 'count_only_matches': True}")
166
+ return None
167
+
168
+ def _can_derive_file_list(self, count_result: dict[str, Any]) -> bool:
169
+ """Check if a count result contains file count data that can derive file lists."""
170
+ return (
171
+ isinstance(count_result, dict)
172
+ and "file_counts" in count_result
173
+ and isinstance(count_result["file_counts"], dict)
174
+ )
175
+
176
+ def _derive_file_list_result(
177
+ self, count_result: dict[str, Any], requested_format: str
178
+ ) -> dict[str, Any]:
179
+ """Derive file list result from count data."""
180
+ try:
181
+ from ..tools import fd_rg_utils # Import here to avoid circular imports
182
+
183
+ file_counts = count_result.get("file_counts", {})
184
+ if requested_format == "summary":
185
+ derived_result = fd_rg_utils.create_file_summary_from_count_data(
186
+ file_counts
187
+ )
188
+ derived_result["cache_derived"] = True # Mark as derived from cache
189
+ return derived_result
190
+ elif requested_format in ["file_list", "files_only"]:
191
+ file_list = fd_rg_utils.extract_file_list_from_count_data(file_counts)
192
+ return {
193
+ "success": True,
194
+ "files": file_list,
195
+ "file_count": len(file_list),
196
+ "total_matches": file_counts.get("__total__", 0),
197
+ "cache_derived": True, # Mark as derived from cache
198
+ }
199
+ except ImportError:
200
+ logger.warning("Could not import fd_rg_utils for cache derivation")
201
+
202
+ return count_result
203
+
204
+ def set(self, cache_key: str, data: dict[str, Any] | Any) -> None:
205
+ """
206
+ Set cached result.
207
+
208
+ Args:
209
+ cache_key: The cache key
210
+ data: The data to cache
211
+ """
212
+ with self._lock:
213
+ self._cleanup_expired()
214
+
215
+ # If cache is full and this is a new key, remove LRU entry
216
+ if len(self.cache) >= self.max_size and cache_key not in self.cache:
217
+ # Remove least recently used entry
218
+ if self._access_times:
219
+ lru_key = min(
220
+ self._access_times.keys(),
221
+ key=lambda k: self._access_times.get(k, 0),
222
+ )
223
+ del self.cache[lru_key]
224
+ del self._access_times[lru_key]
225
+ self._evictions += 1
226
+ logger.debug(f"Cache full, removed LRU entry: {lru_key[:50]}...")
227
+
228
+ current_time = time.time()
229
+ self.cache[cache_key] = {"data": data, "timestamp": current_time}
230
+ self._access_times[cache_key] = current_time
231
+ logger.debug(f"Cached result for key: {cache_key[:50]}...")
232
+
233
+ def clear(self) -> None:
234
+ """Clear all cached results"""
235
+ with self._lock:
236
+ self.cache.clear()
237
+ self._access_times.clear()
238
+ self._hits = 0
239
+ self._misses = 0
240
+ self._evictions = 0
241
+ logger.info("Search cache cleared")
242
+
243
+ def get_stats(self) -> dict[str, Any]:
244
+ """Get cache statistics"""
245
+ with self._lock:
246
+ total_requests = self._hits + self._misses
247
+ hit_rate = (self._hits / total_requests * 100) if total_requests > 0 else 0
248
+
249
+ return {
250
+ "size": len(self.cache),
251
+ "max_size": self.max_size,
252
+ "ttl_seconds": self.ttl_seconds,
253
+ "hits": self._hits,
254
+ "misses": self._misses,
255
+ "hit_rate_percent": round(hit_rate, 2),
256
+ "evictions": self._evictions,
257
+ "expired_entries": len(
258
+ [
259
+ key
260
+ for key, entry in self.cache.items()
261
+ if self._is_expired(entry["timestamp"])
262
+ ]
263
+ ),
264
+ }
265
+
266
+ def create_cache_key(self, query: str, roots: list[str], **params: Any) -> str:
267
+ """
268
+ Create a deterministic cache key for search parameters.
269
+
270
+ Args:
271
+ query: Search query
272
+ roots: List of root directories
273
+ **params: Additional search parameters
274
+
275
+ Returns:
276
+ Cache key string
277
+ """
278
+ # Normalize query
279
+ normalized_query = query.strip().lower()
280
+
281
+ # Normalize roots - resolve paths and sort for consistency
282
+ normalized_roots = []
283
+ for r in roots:
284
+ try:
285
+ resolved = str(Path(r).resolve())
286
+ normalized_roots.append(resolved)
287
+ except Exception:
288
+ # If path resolution fails, use original
289
+ normalized_roots.append(r)
290
+ normalized_roots.sort()
291
+
292
+ # Only include parameters that affect search results
293
+ relevant_params = {
294
+ "case": params.get("case", "smart"),
295
+ "include_globs": (
296
+ sorted(params.get("include_globs", []))
297
+ if params.get("include_globs")
298
+ else []
299
+ ),
300
+ "exclude_globs": (
301
+ sorted(params.get("exclude_globs", []))
302
+ if params.get("exclude_globs")
303
+ else []
304
+ ),
305
+ "no_ignore": params.get("no_ignore", False),
306
+ "hidden": params.get("hidden", False),
307
+ "fixed_strings": params.get("fixed_strings", False),
308
+ "word": params.get("word", False),
309
+ "multiline": params.get("multiline", False),
310
+ "max_filesize": params.get("max_filesize", ""),
311
+ }
312
+
313
+ # Create deterministic key
314
+ key_parts = [
315
+ normalized_query,
316
+ str(normalized_roots),
317
+ str(sorted(relevant_params.items())),
318
+ ]
319
+ return "|".join(key_parts)
320
+
321
+
322
+ # Global cache instance for easy access
323
+ _default_cache = None
324
+
325
+
326
+ def get_default_cache() -> SearchCache:
327
+ """Get the default search cache instance"""
328
+ global _default_cache
329
+ if _default_cache is None:
330
+ _default_cache = SearchCache()
331
+ return _default_cache
332
+
333
+
334
+ def configure_cache(max_size: int = 1000, ttl_seconds: int = 3600) -> None:
335
+ """Configure the default search cache"""
336
+ global _default_cache
337
+ _default_cache = SearchCache(max_size, ttl_seconds)
338
+
339
+
340
+ def clear_cache() -> None:
341
+ """Clear the default search cache"""
342
+ cache = get_default_cache()
343
+ cache.clear()