tree-sitter-analyzer 1.2.5__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +30 -7
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +24 -1
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +165 -7
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +323 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- {tree_sitter_analyzer-1.2.5.dist-info → tree_sitter_analyzer-1.3.2.dist-info}/METADATA +9 -9
- {tree_sitter_analyzer-1.2.5.dist-info → tree_sitter_analyzer-1.3.2.dist-info}/RECORD +10 -8
- {tree_sitter_analyzer-1.2.5.dist-info → tree_sitter_analyzer-1.3.2.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.2.5.dist-info → tree_sitter_analyzer-1.3.2.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
|
@@ -147,15 +147,16 @@ def build_fd_command(
|
|
|
147
147
|
if limit is not None:
|
|
148
148
|
cmd += ["--max-results", str(limit)]
|
|
149
149
|
|
|
150
|
-
#
|
|
151
|
-
#
|
|
152
|
-
if roots:
|
|
153
|
-
for root in roots:
|
|
154
|
-
cmd += ["--search-path", root]
|
|
155
|
-
|
|
156
|
-
# Pattern goes last if specified
|
|
150
|
+
# Pattern goes before roots if present
|
|
151
|
+
# If no pattern is specified, use '.' to match all files
|
|
157
152
|
if pattern:
|
|
158
153
|
cmd.append(pattern)
|
|
154
|
+
else:
|
|
155
|
+
cmd.append(".")
|
|
156
|
+
|
|
157
|
+
# Append roots - these are search directories, not patterns
|
|
158
|
+
if roots:
|
|
159
|
+
cmd += roots
|
|
159
160
|
|
|
160
161
|
return cmd
|
|
161
162
|
|
|
@@ -517,6 +518,28 @@ def parse_rg_count_output(stdout_bytes: bytes) -> dict[str, int]:
|
|
|
517
518
|
return results
|
|
518
519
|
|
|
519
520
|
|
|
521
|
+
def extract_file_list_from_count_data(count_data: dict[str, int]) -> list[str]:
|
|
522
|
+
"""Extract file list from count data, excluding the special __total__ key."""
|
|
523
|
+
return [file_path for file_path in count_data.keys() if file_path != "__total__"]
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def create_file_summary_from_count_data(count_data: dict[str, int]) -> dict[str, Any]:
|
|
527
|
+
"""Create a file summary structure from count data."""
|
|
528
|
+
file_list = extract_file_list_from_count_data(count_data)
|
|
529
|
+
total_matches = count_data.get("__total__", 0)
|
|
530
|
+
|
|
531
|
+
return {
|
|
532
|
+
"success": True,
|
|
533
|
+
"total_matches": total_matches,
|
|
534
|
+
"file_count": len(file_list),
|
|
535
|
+
"files": [
|
|
536
|
+
{"file": file_path, "match_count": count_data[file_path]}
|
|
537
|
+
for file_path in file_list
|
|
538
|
+
],
|
|
539
|
+
"derived_from_count": True, # 标识这是从count数据推导的
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
|
|
520
543
|
@dataclass
|
|
521
544
|
class TempFileList:
|
|
522
545
|
path: str
|
|
@@ -7,14 +7,18 @@ Safely list files/directories based on name patterns and constraints, using fd.
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
+
import logging
|
|
10
11
|
import time
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Any
|
|
13
14
|
|
|
14
15
|
from ..utils.error_handler import handle_mcp_errors
|
|
16
|
+
from ..utils.gitignore_detector import get_default_detector
|
|
15
17
|
from . import fd_rg_utils
|
|
16
18
|
from .base_tool import BaseMCPTool
|
|
17
19
|
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
18
22
|
|
|
19
23
|
class ListFilesTool(BaseMCPTool):
|
|
20
24
|
"""MCP tool that wraps fd to list files with safety limits."""
|
|
@@ -176,6 +180,25 @@ class ListFilesTool(BaseMCPTool):
|
|
|
176
180
|
fd_rg_utils.MAX_RESULTS_HARD_CAP,
|
|
177
181
|
)
|
|
178
182
|
|
|
183
|
+
# Smart .gitignore detection
|
|
184
|
+
no_ignore = bool(arguments.get("no_ignore", False))
|
|
185
|
+
if not no_ignore:
|
|
186
|
+
# Auto-detect if we should use --no-ignore
|
|
187
|
+
detector = get_default_detector()
|
|
188
|
+
original_roots = arguments.get("roots", [])
|
|
189
|
+
should_ignore = detector.should_use_no_ignore(
|
|
190
|
+
original_roots, self.project_root
|
|
191
|
+
)
|
|
192
|
+
if should_ignore:
|
|
193
|
+
no_ignore = True
|
|
194
|
+
# Log the auto-detection for debugging
|
|
195
|
+
detection_info = detector.get_detection_info(
|
|
196
|
+
original_roots, self.project_root
|
|
197
|
+
)
|
|
198
|
+
logger.info(
|
|
199
|
+
f"Auto-enabled --no-ignore due to .gitignore interference: {detection_info['reason']}"
|
|
200
|
+
)
|
|
201
|
+
|
|
179
202
|
cmd = fd_rg_utils.build_fd_command(
|
|
180
203
|
pattern=arguments.get("pattern"),
|
|
181
204
|
glob=bool(arguments.get("glob", False)),
|
|
@@ -185,7 +208,7 @@ class ListFilesTool(BaseMCPTool):
|
|
|
185
208
|
depth=arguments.get("depth"),
|
|
186
209
|
follow_symlinks=bool(arguments.get("follow_symlinks", False)),
|
|
187
210
|
hidden=bool(arguments.get("hidden", False)),
|
|
188
|
-
no_ignore=
|
|
211
|
+
no_ignore=no_ignore, # Use the potentially auto-detected value
|
|
189
212
|
size=arguments.get("size"),
|
|
190
213
|
changed_within=arguments.get("changed_within"),
|
|
191
214
|
changed_before=arguments.get("changed_before"),
|
|
@@ -7,18 +7,36 @@ Search content in files under roots or an explicit file list using ripgrep --jso
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
+
import logging
|
|
10
11
|
import time
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Any
|
|
13
14
|
|
|
14
15
|
from ..utils.error_handler import handle_mcp_errors
|
|
16
|
+
from ..utils.gitignore_detector import get_default_detector
|
|
17
|
+
from ..utils.search_cache import get_default_cache
|
|
15
18
|
from . import fd_rg_utils
|
|
16
19
|
from .base_tool import BaseMCPTool
|
|
17
20
|
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
18
23
|
|
|
19
24
|
class SearchContentTool(BaseMCPTool):
|
|
20
25
|
"""MCP tool that wraps ripgrep to search content with safety limits."""
|
|
21
26
|
|
|
27
|
+
def __init__(
|
|
28
|
+
self, project_root: str | None = None, enable_cache: bool = True
|
|
29
|
+
) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Initialize the search content tool.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
project_root: Optional project root directory
|
|
35
|
+
enable_cache: Whether to enable search result caching (default: True)
|
|
36
|
+
"""
|
|
37
|
+
super().__init__(project_root)
|
|
38
|
+
self.cache = get_default_cache() if enable_cache else None
|
|
39
|
+
|
|
22
40
|
def get_tool_definition(self) -> dict[str, Any]:
|
|
23
41
|
return {
|
|
24
42
|
"name": "search_content",
|
|
@@ -209,8 +227,50 @@ class SearchContentTool(BaseMCPTool):
|
|
|
209
227
|
raise ValueError(f"{key} must be an array of strings")
|
|
210
228
|
return True
|
|
211
229
|
|
|
230
|
+
def _determine_requested_format(self, arguments: dict[str, Any]) -> str:
|
|
231
|
+
"""Determine the requested output format based on arguments."""
|
|
232
|
+
if arguments.get("total_only", False):
|
|
233
|
+
return "total_only"
|
|
234
|
+
elif arguments.get("count_only_matches", False):
|
|
235
|
+
return "count_only"
|
|
236
|
+
elif arguments.get("summary_only", False):
|
|
237
|
+
return "summary"
|
|
238
|
+
elif arguments.get("group_by_file", False):
|
|
239
|
+
return "group_by_file"
|
|
240
|
+
else:
|
|
241
|
+
return "normal"
|
|
242
|
+
|
|
243
|
+
def _create_count_only_cache_key(
|
|
244
|
+
self, total_only_cache_key: str, arguments: dict[str, Any]
|
|
245
|
+
) -> str | None:
|
|
246
|
+
"""
|
|
247
|
+
Create a count_only_matches cache key from a total_only cache key.
|
|
248
|
+
|
|
249
|
+
This enables cross-format caching where total_only results can serve
|
|
250
|
+
future count_only_matches queries.
|
|
251
|
+
"""
|
|
252
|
+
if not self.cache:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
# Create modified arguments with count_only_matches instead of total_only
|
|
256
|
+
count_only_args = arguments.copy()
|
|
257
|
+
count_only_args.pop("total_only", None)
|
|
258
|
+
count_only_args["count_only_matches"] = True
|
|
259
|
+
|
|
260
|
+
# Generate cache key for count_only_matches version
|
|
261
|
+
cache_params = {
|
|
262
|
+
k: v
|
|
263
|
+
for k, v in count_only_args.items()
|
|
264
|
+
if k not in ["query", "roots", "files"]
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
roots = arguments.get("roots", [])
|
|
268
|
+
return self.cache.create_cache_key(
|
|
269
|
+
query=arguments["query"], roots=roots, **cache_params
|
|
270
|
+
)
|
|
271
|
+
|
|
212
272
|
@handle_mcp_errors("search_content")
|
|
213
|
-
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
273
|
+
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
|
|
214
274
|
self.validate_arguments(arguments)
|
|
215
275
|
|
|
216
276
|
roots = arguments.get("roots")
|
|
@@ -220,6 +280,31 @@ class SearchContentTool(BaseMCPTool):
|
|
|
220
280
|
if files:
|
|
221
281
|
files = self._validate_files(files)
|
|
222
282
|
|
|
283
|
+
# Check cache if enabled
|
|
284
|
+
cache_key = None
|
|
285
|
+
if self.cache:
|
|
286
|
+
# Create cache key with relevant parameters (excluding 'query' and 'roots' from kwargs)
|
|
287
|
+
cache_params = {
|
|
288
|
+
k: v
|
|
289
|
+
for k, v in arguments.items()
|
|
290
|
+
if k not in ["query", "roots", "files"]
|
|
291
|
+
}
|
|
292
|
+
cache_key = self.cache.create_cache_key(
|
|
293
|
+
query=arguments["query"], roots=roots or [], **cache_params
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Try smart cross-format caching first
|
|
297
|
+
requested_format = self._determine_requested_format(arguments)
|
|
298
|
+
cached_result = self.cache.get_compatible_result(
|
|
299
|
+
cache_key, requested_format
|
|
300
|
+
)
|
|
301
|
+
if cached_result is not None:
|
|
302
|
+
# Add cache hit indicator to result
|
|
303
|
+
if isinstance(cached_result, dict):
|
|
304
|
+
cached_result = cached_result.copy()
|
|
305
|
+
cached_result["cache_hit"] = True
|
|
306
|
+
return cached_result
|
|
307
|
+
|
|
223
308
|
# Clamp counts to safety limits
|
|
224
309
|
max_count = fd_rg_utils.clamp_int(
|
|
225
310
|
arguments.get("max_count"),
|
|
@@ -247,6 +332,26 @@ class SearchContentTool(BaseMCPTool):
|
|
|
247
332
|
)
|
|
248
333
|
summary_only = bool(arguments.get("summary_only", False))
|
|
249
334
|
|
|
335
|
+
# Smart .gitignore detection
|
|
336
|
+
no_ignore = bool(arguments.get("no_ignore", False))
|
|
337
|
+
if not no_ignore and roots: # Only for roots mode, not files mode
|
|
338
|
+
# Auto-detect if we should use --no-ignore
|
|
339
|
+
detector = get_default_detector()
|
|
340
|
+
original_roots = arguments.get("roots", [])
|
|
341
|
+
should_ignore = detector.should_use_no_ignore(
|
|
342
|
+
original_roots, self.project_root
|
|
343
|
+
)
|
|
344
|
+
if should_ignore:
|
|
345
|
+
no_ignore = True
|
|
346
|
+
# Log the auto-detection for debugging
|
|
347
|
+
# Logger already defined at module level
|
|
348
|
+
detection_info = detector.get_detection_info(
|
|
349
|
+
original_roots, self.project_root
|
|
350
|
+
)
|
|
351
|
+
logger.info(
|
|
352
|
+
f"Auto-enabled --no-ignore due to .gitignore interference: {detection_info['reason']}"
|
|
353
|
+
)
|
|
354
|
+
|
|
250
355
|
# Roots mode
|
|
251
356
|
cmd = fd_rg_utils.build_rg_command(
|
|
252
357
|
query=arguments["query"],
|
|
@@ -258,7 +363,7 @@ class SearchContentTool(BaseMCPTool):
|
|
|
258
363
|
exclude_globs=arguments.get("exclude_globs"),
|
|
259
364
|
follow_symlinks=bool(arguments.get("follow_symlinks", False)),
|
|
260
365
|
hidden=bool(arguments.get("hidden", False)),
|
|
261
|
-
no_ignore=
|
|
366
|
+
no_ignore=no_ignore, # Use the potentially auto-detected value
|
|
262
367
|
max_filesize=arguments.get("max_filesize"),
|
|
263
368
|
context_before=arguments.get("context_before"),
|
|
264
369
|
context_after=arguments.get("context_after"),
|
|
@@ -283,14 +388,43 @@ class SearchContentTool(BaseMCPTool):
|
|
|
283
388
|
if total_only:
|
|
284
389
|
# Parse count output and return only the total
|
|
285
390
|
file_counts = fd_rg_utils.parse_rg_count_output(out)
|
|
286
|
-
total_matches = file_counts.
|
|
391
|
+
total_matches = file_counts.get("__total__", 0)
|
|
392
|
+
|
|
393
|
+
# Cache the FULL count data for future cross-format optimization
|
|
394
|
+
# This allows count_only_matches queries to be served from this cache
|
|
395
|
+
if self.cache and cache_key:
|
|
396
|
+
# Cache both the simple total and the detailed count structure
|
|
397
|
+
self.cache.set(cache_key, total_matches)
|
|
398
|
+
|
|
399
|
+
# Also cache the equivalent count_only_matches result for cross-format optimization
|
|
400
|
+
count_only_cache_key = self._create_count_only_cache_key(
|
|
401
|
+
cache_key, arguments
|
|
402
|
+
)
|
|
403
|
+
if count_only_cache_key:
|
|
404
|
+
# Create a copy of file_counts without __total__ for the detailed result
|
|
405
|
+
file_counts_copy = {
|
|
406
|
+
k: v for k, v in file_counts.items() if k != "__total__"
|
|
407
|
+
}
|
|
408
|
+
detailed_count_result = {
|
|
409
|
+
"success": True,
|
|
410
|
+
"count_only": True,
|
|
411
|
+
"total_matches": total_matches,
|
|
412
|
+
"file_counts": file_counts_copy, # Keep the file-level data (without __total__)
|
|
413
|
+
"elapsed_ms": elapsed_ms,
|
|
414
|
+
"derived_from_total_only": True, # Mark as derived
|
|
415
|
+
}
|
|
416
|
+
self.cache.set(count_only_cache_key, detailed_count_result)
|
|
417
|
+
logger.debug(
|
|
418
|
+
"Cross-cached total_only result as count_only_matches for future optimization"
|
|
419
|
+
)
|
|
420
|
+
|
|
287
421
|
return total_matches
|
|
288
422
|
|
|
289
423
|
# Handle count-only mode
|
|
290
424
|
if count_only_matches:
|
|
291
425
|
file_counts = fd_rg_utils.parse_rg_count_output(out)
|
|
292
426
|
total_matches = file_counts.pop("__total__", 0)
|
|
293
|
-
|
|
427
|
+
result = {
|
|
294
428
|
"success": True,
|
|
295
429
|
"count_only": True,
|
|
296
430
|
"total_matches": total_matches,
|
|
@@ -298,6 +432,12 @@ class SearchContentTool(BaseMCPTool):
|
|
|
298
432
|
"elapsed_ms": elapsed_ms,
|
|
299
433
|
}
|
|
300
434
|
|
|
435
|
+
# Cache the result
|
|
436
|
+
if self.cache and cache_key:
|
|
437
|
+
self.cache.set(cache_key, result)
|
|
438
|
+
|
|
439
|
+
return result
|
|
440
|
+
|
|
301
441
|
# Handle normal mode
|
|
302
442
|
matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
|
|
303
443
|
truncated = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
|
|
@@ -312,12 +452,18 @@ class SearchContentTool(BaseMCPTool):
|
|
|
312
452
|
# Apply file grouping if requested (takes priority over other formats)
|
|
313
453
|
group_by_file = arguments.get("group_by_file", False)
|
|
314
454
|
if group_by_file and matches:
|
|
315
|
-
|
|
455
|
+
result = fd_rg_utils.group_matches_by_file(matches)
|
|
456
|
+
|
|
457
|
+
# Cache the result
|
|
458
|
+
if self.cache and cache_key:
|
|
459
|
+
self.cache.set(cache_key, result)
|
|
460
|
+
|
|
461
|
+
return result
|
|
316
462
|
|
|
317
463
|
# Handle summary mode
|
|
318
464
|
if summary_only:
|
|
319
465
|
summary = fd_rg_utils.summarize_search_results(matches)
|
|
320
|
-
|
|
466
|
+
result = {
|
|
321
467
|
"success": True,
|
|
322
468
|
"count": len(matches),
|
|
323
469
|
"truncated": truncated,
|
|
@@ -325,10 +471,22 @@ class SearchContentTool(BaseMCPTool):
|
|
|
325
471
|
"summary": summary,
|
|
326
472
|
}
|
|
327
473
|
|
|
328
|
-
|
|
474
|
+
# Cache the result
|
|
475
|
+
if self.cache and cache_key:
|
|
476
|
+
self.cache.set(cache_key, result)
|
|
477
|
+
|
|
478
|
+
return result
|
|
479
|
+
|
|
480
|
+
result = {
|
|
329
481
|
"success": True,
|
|
330
482
|
"count": len(matches),
|
|
331
483
|
"truncated": truncated,
|
|
332
484
|
"elapsed_ms": elapsed_ms,
|
|
333
485
|
"results": matches,
|
|
334
486
|
}
|
|
487
|
+
|
|
488
|
+
# Cache the result
|
|
489
|
+
if self.cache and cache_key:
|
|
490
|
+
self.cache.set(cache_key, result)
|
|
491
|
+
|
|
492
|
+
return result
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Gitignore Detection Utility
|
|
4
|
+
|
|
5
|
+
Intelligently detects when .gitignore rules might interfere with file searches
|
|
6
|
+
and suggests using --no-ignore option when appropriate.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GitignoreDetector:
|
|
17
|
+
"""Detects .gitignore interference with file searches"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
self.common_ignore_patterns = {
|
|
21
|
+
# Directory patterns that commonly cause search issues
|
|
22
|
+
"build/*",
|
|
23
|
+
"dist/*",
|
|
24
|
+
"node_modules/*",
|
|
25
|
+
"__pycache__/*",
|
|
26
|
+
"target/*",
|
|
27
|
+
".git/*",
|
|
28
|
+
".svn/*",
|
|
29
|
+
".hg/*",
|
|
30
|
+
"code/*",
|
|
31
|
+
"src/*",
|
|
32
|
+
"lib/*",
|
|
33
|
+
"app/*", # Added code/* which is our case
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def should_use_no_ignore(
|
|
37
|
+
self, roots: list[str], project_root: str | None = None
|
|
38
|
+
) -> bool:
|
|
39
|
+
"""
|
|
40
|
+
Determine if --no-ignore should be used based on search context
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
roots: List of root directories to search
|
|
44
|
+
project_root: Optional project root directory
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
True if --no-ignore should be used
|
|
48
|
+
"""
|
|
49
|
+
# Only apply auto-detection for root directory searches
|
|
50
|
+
if not (len(roots) == 1 and roots[0] in [".", "./"]):
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
if not project_root:
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
project_path = Path(project_root).resolve()
|
|
58
|
+
|
|
59
|
+
# Check for .gitignore files that might interfere
|
|
60
|
+
gitignore_files = self._find_gitignore_files(project_path)
|
|
61
|
+
|
|
62
|
+
for gitignore_file in gitignore_files:
|
|
63
|
+
# Use the directory containing the .gitignore as the reference point
|
|
64
|
+
gitignore_dir = gitignore_file.parent
|
|
65
|
+
if self._has_interfering_patterns(
|
|
66
|
+
gitignore_file, gitignore_dir, project_path
|
|
67
|
+
):
|
|
68
|
+
logger.debug(
|
|
69
|
+
f"Found interfering .gitignore patterns in {gitignore_file}"
|
|
70
|
+
)
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.warning(f"Error detecting .gitignore interference: {e}")
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
def _find_gitignore_files(self, project_path: Path) -> list[Path]:
|
|
80
|
+
"""Find .gitignore files in project hierarchy"""
|
|
81
|
+
gitignore_files = []
|
|
82
|
+
|
|
83
|
+
# Check current directory and parent directories
|
|
84
|
+
current = project_path
|
|
85
|
+
max_depth = 3 # Limit search depth
|
|
86
|
+
|
|
87
|
+
for _ in range(max_depth):
|
|
88
|
+
gitignore_path = current / ".gitignore"
|
|
89
|
+
if gitignore_path.exists():
|
|
90
|
+
gitignore_files.append(gitignore_path)
|
|
91
|
+
|
|
92
|
+
parent = current.parent
|
|
93
|
+
if parent == current: # Reached root
|
|
94
|
+
break
|
|
95
|
+
current = parent
|
|
96
|
+
|
|
97
|
+
return gitignore_files
|
|
98
|
+
|
|
99
|
+
def _has_interfering_patterns(
|
|
100
|
+
self, gitignore_file: Path, gitignore_dir: Path, current_search_dir: Path
|
|
101
|
+
) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Check if .gitignore file has patterns that might interfere with searches
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
gitignore_file: Path to the .gitignore file
|
|
107
|
+
gitignore_dir: Directory containing the .gitignore file
|
|
108
|
+
current_search_dir: Directory where the search is being performed
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
with open(gitignore_file, encoding="utf-8", errors="ignore") as f:
|
|
112
|
+
lines = f.readlines()
|
|
113
|
+
|
|
114
|
+
for line in lines:
|
|
115
|
+
line = line.strip()
|
|
116
|
+
|
|
117
|
+
# Skip comments and empty lines
|
|
118
|
+
if not line or line.startswith("#"):
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# Check for patterns that commonly cause search issues
|
|
122
|
+
if self._is_interfering_pattern(
|
|
123
|
+
line, gitignore_dir, current_search_dir
|
|
124
|
+
):
|
|
125
|
+
logger.debug(f"Found interfering pattern: {line}")
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.warning(f"Error reading .gitignore file {gitignore_file}: {e}")
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
def _is_interfering_pattern(
|
|
135
|
+
self, pattern: str, gitignore_dir: Path, current_search_dir: Path
|
|
136
|
+
) -> bool:
|
|
137
|
+
"""
|
|
138
|
+
Check if a gitignore pattern is likely to interfere with searches
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
pattern: The gitignore pattern
|
|
142
|
+
gitignore_dir: Directory containing the .gitignore file
|
|
143
|
+
current_search_dir: Directory where the search is being performed
|
|
144
|
+
"""
|
|
145
|
+
# Remove leading slash
|
|
146
|
+
pattern = pattern.lstrip("/")
|
|
147
|
+
|
|
148
|
+
# Check for broad directory exclusions that contain searchable files
|
|
149
|
+
if pattern.endswith("/*") or pattern.endswith("/"):
|
|
150
|
+
dir_name = pattern.rstrip("/*")
|
|
151
|
+
|
|
152
|
+
# Check if the pattern affects the current search directory
|
|
153
|
+
pattern_dir = gitignore_dir / dir_name
|
|
154
|
+
|
|
155
|
+
# If we're searching in a subdirectory that would be ignored by this pattern
|
|
156
|
+
if self._is_search_dir_affected_by_pattern(
|
|
157
|
+
current_search_dir, pattern_dir, gitignore_dir
|
|
158
|
+
):
|
|
159
|
+
if pattern_dir.exists() and pattern_dir.is_dir():
|
|
160
|
+
# Check if this directory contains searchable files
|
|
161
|
+
if self._directory_has_searchable_files(pattern_dir):
|
|
162
|
+
logger.debug(
|
|
163
|
+
f"Pattern '{pattern}' interferes with search - directory contains searchable files"
|
|
164
|
+
)
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
# Check for patterns that ignore entire source directories
|
|
168
|
+
source_dirs = [
|
|
169
|
+
"code",
|
|
170
|
+
"src",
|
|
171
|
+
"lib",
|
|
172
|
+
"app",
|
|
173
|
+
"main",
|
|
174
|
+
"java",
|
|
175
|
+
"python",
|
|
176
|
+
"js",
|
|
177
|
+
"ts",
|
|
178
|
+
]
|
|
179
|
+
pattern_dir_name = pattern.rstrip("/*")
|
|
180
|
+
if pattern_dir_name in source_dirs:
|
|
181
|
+
pattern_dir = gitignore_dir / pattern_dir_name
|
|
182
|
+
if self._is_search_dir_affected_by_pattern(
|
|
183
|
+
current_search_dir, pattern_dir, gitignore_dir
|
|
184
|
+
):
|
|
185
|
+
if pattern_dir.exists() and pattern_dir.is_dir():
|
|
186
|
+
if self._directory_has_searchable_files(pattern_dir):
|
|
187
|
+
logger.debug(
|
|
188
|
+
f"Pattern '{pattern}' interferes with search - ignores source directory"
|
|
189
|
+
)
|
|
190
|
+
return True
|
|
191
|
+
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
def _is_search_dir_affected_by_pattern(
|
|
195
|
+
self, search_dir: Path, pattern_dir: Path, gitignore_dir: Path
|
|
196
|
+
) -> bool:
|
|
197
|
+
"""Check if the search directory would be affected by a gitignore pattern"""
|
|
198
|
+
try:
|
|
199
|
+
# If search_dir is the same as pattern_dir or is a subdirectory of pattern_dir
|
|
200
|
+
search_resolved = search_dir.resolve()
|
|
201
|
+
pattern_resolved = pattern_dir.resolve()
|
|
202
|
+
|
|
203
|
+
# Check if we're searching in the directory that would be ignored
|
|
204
|
+
return search_resolved == pattern_resolved or str(
|
|
205
|
+
search_resolved
|
|
206
|
+
).startswith(str(pattern_resolved) + os.sep)
|
|
207
|
+
except Exception:
|
|
208
|
+
# If path resolution fails, assume it could be affected
|
|
209
|
+
return True
|
|
210
|
+
|
|
211
|
+
def _directory_has_searchable_files(self, directory: Path) -> bool:
|
|
212
|
+
"""Check if directory contains files that users typically want to search"""
|
|
213
|
+
searchable_extensions = {
|
|
214
|
+
".java",
|
|
215
|
+
".py",
|
|
216
|
+
".js",
|
|
217
|
+
".ts",
|
|
218
|
+
".cpp",
|
|
219
|
+
".c",
|
|
220
|
+
".h",
|
|
221
|
+
".cs",
|
|
222
|
+
".go",
|
|
223
|
+
".rs",
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
# Quick check - look for any files with searchable extensions
|
|
228
|
+
for file_path in directory.rglob("*"):
|
|
229
|
+
if (
|
|
230
|
+
file_path.is_file()
|
|
231
|
+
and file_path.suffix.lower() in searchable_extensions
|
|
232
|
+
):
|
|
233
|
+
return True
|
|
234
|
+
return False
|
|
235
|
+
except Exception:
|
|
236
|
+
# If we can't scan, assume it might have searchable files
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
def get_detection_info(
|
|
240
|
+
self, roots: list[str], project_root: str | None = None
|
|
241
|
+
) -> dict:
|
|
242
|
+
"""
|
|
243
|
+
Get detailed information about gitignore detection
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Dictionary with detection details for debugging/logging
|
|
247
|
+
"""
|
|
248
|
+
info = {
|
|
249
|
+
"should_use_no_ignore": False,
|
|
250
|
+
"detected_gitignore_files": [],
|
|
251
|
+
"interfering_patterns": [],
|
|
252
|
+
"reason": "No interference detected",
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if not (len(roots) == 1 and roots[0] in [".", "./"]):
|
|
256
|
+
info["reason"] = "Not a root directory search"
|
|
257
|
+
return info
|
|
258
|
+
|
|
259
|
+
if not project_root:
|
|
260
|
+
info["reason"] = "No project root specified"
|
|
261
|
+
return info
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
project_path = Path(project_root).resolve()
|
|
265
|
+
gitignore_files = self._find_gitignore_files(project_path)
|
|
266
|
+
info["detected_gitignore_files"] = [str(f) for f in gitignore_files]
|
|
267
|
+
|
|
268
|
+
for gitignore_file in gitignore_files:
|
|
269
|
+
gitignore_dir = gitignore_file.parent
|
|
270
|
+
patterns = self._get_interfering_patterns(
|
|
271
|
+
gitignore_file, gitignore_dir, project_path
|
|
272
|
+
)
|
|
273
|
+
if patterns:
|
|
274
|
+
info["interfering_patterns"].extend(patterns)
|
|
275
|
+
|
|
276
|
+
if info["interfering_patterns"]:
|
|
277
|
+
info["should_use_no_ignore"] = True
|
|
278
|
+
info["reason"] = (
|
|
279
|
+
f"Found {len(info['interfering_patterns'])} interfering patterns"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
except Exception as e:
|
|
283
|
+
info["reason"] = f"Error during detection: {e}"
|
|
284
|
+
|
|
285
|
+
return info
|
|
286
|
+
|
|
287
|
+
def _get_interfering_patterns(
|
|
288
|
+
self, gitignore_file: Path, gitignore_dir: Path, current_search_dir: Path
|
|
289
|
+
) -> list[str]:
|
|
290
|
+
"""Get list of interfering patterns from a gitignore file"""
|
|
291
|
+
interfering = []
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
with open(gitignore_file, encoding="utf-8", errors="ignore") as f:
|
|
295
|
+
lines = f.readlines()
|
|
296
|
+
|
|
297
|
+
for line in lines:
|
|
298
|
+
line = line.strip()
|
|
299
|
+
if (
|
|
300
|
+
line
|
|
301
|
+
and not line.startswith("#")
|
|
302
|
+
and self._is_interfering_pattern(
|
|
303
|
+
line, gitignore_dir, current_search_dir
|
|
304
|
+
)
|
|
305
|
+
):
|
|
306
|
+
interfering.append(line)
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.warning(f"Error reading .gitignore file {gitignore_file}: {e}")
|
|
310
|
+
|
|
311
|
+
return interfering
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# Global instance for easy access
|
|
315
|
+
_default_detector = None
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def get_default_detector() -> GitignoreDetector:
|
|
319
|
+
"""Get the default gitignore detector instance"""
|
|
320
|
+
global _default_detector
|
|
321
|
+
if _default_detector is None:
|
|
322
|
+
_default_detector = GitignoreDetector()
|
|
323
|
+
return _default_detector
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Search Cache Module for MCP Tools
|
|
4
|
+
|
|
5
|
+
Provides basic caching functionality for search results to improve performance
|
|
6
|
+
by avoiding repeated expensive search operations.
|
|
7
|
+
|
|
8
|
+
This is a simplified version focusing on core caching features for Phase 2.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SearchCache:
|
|
21
|
+
"""Thread-safe in-memory search result cache with TTL and LRU eviction"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600):
|
|
24
|
+
"""
|
|
25
|
+
Initialize the search cache.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
max_size: Maximum number of cached entries
|
|
29
|
+
ttl_seconds: Time-to-live for cached entries in seconds (default: 1 hour)
|
|
30
|
+
"""
|
|
31
|
+
self.cache: dict[str, dict[str, Any]] = {}
|
|
32
|
+
self.max_size = max_size
|
|
33
|
+
self.ttl_seconds = ttl_seconds
|
|
34
|
+
self._lock = threading.RLock() # Reentrant lock for thread safety
|
|
35
|
+
self._access_times: dict[str, float] = {} # Track access times for LRU
|
|
36
|
+
|
|
37
|
+
# Statistics
|
|
38
|
+
self._hits = 0
|
|
39
|
+
self._misses = 0
|
|
40
|
+
self._evictions = 0
|
|
41
|
+
|
|
42
|
+
def _is_expired(self, timestamp: float) -> bool:
|
|
43
|
+
"""Check if a cache entry is expired"""
|
|
44
|
+
return time.time() - timestamp > self.ttl_seconds
|
|
45
|
+
|
|
46
|
+
def _cleanup_expired(self) -> None:
|
|
47
|
+
"""Remove expired entries (should be called with lock held)"""
|
|
48
|
+
current_time = time.time()
|
|
49
|
+
expired_keys = [
|
|
50
|
+
key
|
|
51
|
+
for key, entry in self.cache.items()
|
|
52
|
+
if current_time - entry["timestamp"] > self.ttl_seconds
|
|
53
|
+
]
|
|
54
|
+
for key in expired_keys:
|
|
55
|
+
del self.cache[key]
|
|
56
|
+
if key in self._access_times:
|
|
57
|
+
del self._access_times[key]
|
|
58
|
+
|
|
59
|
+
if expired_keys:
|
|
60
|
+
logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries")
|
|
61
|
+
|
|
62
|
+
def get(self, cache_key: str) -> Any:
|
|
63
|
+
"""
|
|
64
|
+
Get cached result if valid.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
cache_key: The cache key to look up
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Cached data if found and valid, None otherwise
|
|
71
|
+
"""
|
|
72
|
+
with self._lock:
|
|
73
|
+
if cache_key in self.cache:
|
|
74
|
+
entry = self.cache[cache_key]
|
|
75
|
+
if not self._is_expired(entry["timestamp"]):
|
|
76
|
+
# Update access time for LRU
|
|
77
|
+
self._access_times[cache_key] = time.time()
|
|
78
|
+
self._hits += 1
|
|
79
|
+
logger.debug(f"Cache hit for key: {cache_key[:50]}...")
|
|
80
|
+
return entry["data"]
|
|
81
|
+
else:
|
|
82
|
+
# Remove expired entry
|
|
83
|
+
del self.cache[cache_key]
|
|
84
|
+
if cache_key in self._access_times:
|
|
85
|
+
del self._access_times[cache_key]
|
|
86
|
+
|
|
87
|
+
self._misses += 1
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
def get_compatible_result(self, cache_key: str, requested_format: str) -> Any:
|
|
91
|
+
"""
|
|
92
|
+
Get cached result and try to derive compatible formats.
|
|
93
|
+
|
|
94
|
+
This enables smart cross-format caching where count results can be used
|
|
95
|
+
to derive file lists without additional searches.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
cache_key: The cache key
|
|
99
|
+
requested_format: The format being requested ('file_list', 'summary', etc.)
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Compatible cached data if derivable, None otherwise
|
|
103
|
+
"""
|
|
104
|
+
# First try direct cache hit - but only if the format matches
|
|
105
|
+
direct_result = self.get(cache_key)
|
|
106
|
+
if direct_result is not None:
|
|
107
|
+
# Check if the cached result matches the requested format
|
|
108
|
+
if self._is_format_compatible(direct_result, requested_format):
|
|
109
|
+
return direct_result
|
|
110
|
+
|
|
111
|
+
# Try to find compatible cached results for derivation
|
|
112
|
+
# Look for count_only results that can derive file lists
|
|
113
|
+
if requested_format in ["file_list", "summary", "files_only"]:
|
|
114
|
+
# Look for a count_only version of the same search
|
|
115
|
+
count_key = self._derive_count_key_from_cache_key(cache_key)
|
|
116
|
+
if count_key and count_key != cache_key:
|
|
117
|
+
count_result = self.get(count_key)
|
|
118
|
+
if count_result and self._can_derive_file_list(count_result):
|
|
119
|
+
logger.debug(f"Deriving {requested_format} from cached count data")
|
|
120
|
+
return self._derive_file_list_result(count_result, requested_format)
|
|
121
|
+
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def _is_format_compatible(self, cached_result: Any, requested_format: str) -> bool:
|
|
125
|
+
"""
|
|
126
|
+
Check if a cached result is compatible with the requested format.
|
|
127
|
+
|
|
128
|
+
This prevents returning wrong format data (e.g., returning integer total
|
|
129
|
+
when detailed results are requested).
|
|
130
|
+
"""
|
|
131
|
+
if requested_format == "total_only":
|
|
132
|
+
# total_only expects a simple integer
|
|
133
|
+
return isinstance(cached_result, int)
|
|
134
|
+
elif requested_format == "count_only":
|
|
135
|
+
# count_only expects a dict with file_counts
|
|
136
|
+
return isinstance(cached_result, dict) and (
|
|
137
|
+
"file_counts" in cached_result or "count_only" in cached_result
|
|
138
|
+
)
|
|
139
|
+
elif requested_format in ["summary", "file_list", "files_only"]:
|
|
140
|
+
# These formats expect dict results with specific structures
|
|
141
|
+
return isinstance(cached_result, dict) and cached_result.get(
|
|
142
|
+
"success", False
|
|
143
|
+
)
|
|
144
|
+
elif requested_format in ["normal", "group_by_file"]:
|
|
145
|
+
# Normal format expects dict with matches, files, or results data
|
|
146
|
+
return isinstance(cached_result, dict) and (
|
|
147
|
+
"matches" in cached_result
|
|
148
|
+
or "files" in cached_result
|
|
149
|
+
or "results" in cached_result
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
# For unknown formats or test scenarios, allow dict results but not primitives
|
|
153
|
+
# This maintains backward compatibility while preventing the integer bug
|
|
154
|
+
return isinstance(cached_result, dict)
|
|
155
|
+
|
|
156
|
+
def _derive_count_key_from_cache_key(self, cache_key: str) -> str | None:
|
|
157
|
+
"""Try to derive what the count_only cache key would be for this search."""
|
|
158
|
+
# Simple heuristic: replace summary_only with count_only_matches
|
|
159
|
+
if "summary_only" in cache_key:
|
|
160
|
+
return cache_key.replace(
|
|
161
|
+
"'summary_only': True", "'count_only_matches': True"
|
|
162
|
+
)
|
|
163
|
+
elif "count_only_matches" not in cache_key:
|
|
164
|
+
# Add count_only_matches parameter
|
|
165
|
+
return cache_key.replace("}", ", 'count_only_matches': True}")
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def _can_derive_file_list(self, count_result: dict[str, Any]) -> bool:
|
|
169
|
+
"""Check if a count result contains file count data that can derive file lists."""
|
|
170
|
+
return (
|
|
171
|
+
isinstance(count_result, dict)
|
|
172
|
+
and "file_counts" in count_result
|
|
173
|
+
and isinstance(count_result["file_counts"], dict)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def _derive_file_list_result(
|
|
177
|
+
self, count_result: dict[str, Any], requested_format: str
|
|
178
|
+
) -> dict[str, Any]:
|
|
179
|
+
"""Derive file list result from count data."""
|
|
180
|
+
try:
|
|
181
|
+
from ..tools import fd_rg_utils # Import here to avoid circular imports
|
|
182
|
+
|
|
183
|
+
file_counts = count_result.get("file_counts", {})
|
|
184
|
+
if requested_format == "summary":
|
|
185
|
+
derived_result = fd_rg_utils.create_file_summary_from_count_data(
|
|
186
|
+
file_counts
|
|
187
|
+
)
|
|
188
|
+
derived_result["cache_derived"] = True # Mark as derived from cache
|
|
189
|
+
return derived_result
|
|
190
|
+
elif requested_format in ["file_list", "files_only"]:
|
|
191
|
+
file_list = fd_rg_utils.extract_file_list_from_count_data(file_counts)
|
|
192
|
+
return {
|
|
193
|
+
"success": True,
|
|
194
|
+
"files": file_list,
|
|
195
|
+
"file_count": len(file_list),
|
|
196
|
+
"total_matches": file_counts.get("__total__", 0),
|
|
197
|
+
"cache_derived": True, # Mark as derived from cache
|
|
198
|
+
}
|
|
199
|
+
except ImportError:
|
|
200
|
+
logger.warning("Could not import fd_rg_utils for cache derivation")
|
|
201
|
+
|
|
202
|
+
return count_result
|
|
203
|
+
|
|
204
|
+
def set(self, cache_key: str, data: dict[str, Any] | Any) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Set cached result.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
cache_key: The cache key
|
|
210
|
+
data: The data to cache
|
|
211
|
+
"""
|
|
212
|
+
with self._lock:
|
|
213
|
+
self._cleanup_expired()
|
|
214
|
+
|
|
215
|
+
# If cache is full and this is a new key, remove LRU entry
|
|
216
|
+
if len(self.cache) >= self.max_size and cache_key not in self.cache:
|
|
217
|
+
# Remove least recently used entry
|
|
218
|
+
if self._access_times:
|
|
219
|
+
lru_key = min(
|
|
220
|
+
self._access_times.keys(),
|
|
221
|
+
key=lambda k: self._access_times.get(k, 0),
|
|
222
|
+
)
|
|
223
|
+
del self.cache[lru_key]
|
|
224
|
+
del self._access_times[lru_key]
|
|
225
|
+
self._evictions += 1
|
|
226
|
+
logger.debug(f"Cache full, removed LRU entry: {lru_key[:50]}...")
|
|
227
|
+
|
|
228
|
+
current_time = time.time()
|
|
229
|
+
self.cache[cache_key] = {"data": data, "timestamp": current_time}
|
|
230
|
+
self._access_times[cache_key] = current_time
|
|
231
|
+
logger.debug(f"Cached result for key: {cache_key[:50]}...")
|
|
232
|
+
|
|
233
|
+
def clear(self) -> None:
|
|
234
|
+
"""Clear all cached results"""
|
|
235
|
+
with self._lock:
|
|
236
|
+
self.cache.clear()
|
|
237
|
+
self._access_times.clear()
|
|
238
|
+
self._hits = 0
|
|
239
|
+
self._misses = 0
|
|
240
|
+
self._evictions = 0
|
|
241
|
+
logger.info("Search cache cleared")
|
|
242
|
+
|
|
243
|
+
def get_stats(self) -> dict[str, Any]:
|
|
244
|
+
"""Get cache statistics"""
|
|
245
|
+
with self._lock:
|
|
246
|
+
total_requests = self._hits + self._misses
|
|
247
|
+
hit_rate = (self._hits / total_requests * 100) if total_requests > 0 else 0
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"size": len(self.cache),
|
|
251
|
+
"max_size": self.max_size,
|
|
252
|
+
"ttl_seconds": self.ttl_seconds,
|
|
253
|
+
"hits": self._hits,
|
|
254
|
+
"misses": self._misses,
|
|
255
|
+
"hit_rate_percent": round(hit_rate, 2),
|
|
256
|
+
"evictions": self._evictions,
|
|
257
|
+
"expired_entries": len(
|
|
258
|
+
[
|
|
259
|
+
key
|
|
260
|
+
for key, entry in self.cache.items()
|
|
261
|
+
if self._is_expired(entry["timestamp"])
|
|
262
|
+
]
|
|
263
|
+
),
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
def create_cache_key(self, query: str, roots: list[str], **params: Any) -> str:
|
|
267
|
+
"""
|
|
268
|
+
Create a deterministic cache key for search parameters.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
query: Search query
|
|
272
|
+
roots: List of root directories
|
|
273
|
+
**params: Additional search parameters
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Cache key string
|
|
277
|
+
"""
|
|
278
|
+
# Normalize query
|
|
279
|
+
normalized_query = query.strip().lower()
|
|
280
|
+
|
|
281
|
+
# Normalize roots - resolve paths and sort for consistency
|
|
282
|
+
normalized_roots = []
|
|
283
|
+
for r in roots:
|
|
284
|
+
try:
|
|
285
|
+
resolved = str(Path(r).resolve())
|
|
286
|
+
normalized_roots.append(resolved)
|
|
287
|
+
except Exception:
|
|
288
|
+
# If path resolution fails, use original
|
|
289
|
+
normalized_roots.append(r)
|
|
290
|
+
normalized_roots.sort()
|
|
291
|
+
|
|
292
|
+
# Only include parameters that affect search results
|
|
293
|
+
relevant_params = {
|
|
294
|
+
"case": params.get("case", "smart"),
|
|
295
|
+
"include_globs": (
|
|
296
|
+
sorted(params.get("include_globs", []))
|
|
297
|
+
if params.get("include_globs")
|
|
298
|
+
else []
|
|
299
|
+
),
|
|
300
|
+
"exclude_globs": (
|
|
301
|
+
sorted(params.get("exclude_globs", []))
|
|
302
|
+
if params.get("exclude_globs")
|
|
303
|
+
else []
|
|
304
|
+
),
|
|
305
|
+
"no_ignore": params.get("no_ignore", False),
|
|
306
|
+
"hidden": params.get("hidden", False),
|
|
307
|
+
"fixed_strings": params.get("fixed_strings", False),
|
|
308
|
+
"word": params.get("word", False),
|
|
309
|
+
"multiline": params.get("multiline", False),
|
|
310
|
+
"max_filesize": params.get("max_filesize", ""),
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
# Create deterministic key
|
|
314
|
+
key_parts = [
|
|
315
|
+
normalized_query,
|
|
316
|
+
str(normalized_roots),
|
|
317
|
+
str(sorted(relevant_params.items())),
|
|
318
|
+
]
|
|
319
|
+
return "|".join(key_parts)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# Global cache instance for easy access
|
|
323
|
+
_default_cache = None
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def get_default_cache() -> SearchCache:
|
|
327
|
+
"""Get the default search cache instance"""
|
|
328
|
+
global _default_cache
|
|
329
|
+
if _default_cache is None:
|
|
330
|
+
_default_cache = SearchCache()
|
|
331
|
+
return _default_cache
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def configure_cache(max_size: int = 1000, ttl_seconds: int = 3600) -> None:
|
|
335
|
+
"""Configure the default search cache"""
|
|
336
|
+
global _default_cache
|
|
337
|
+
_default_cache = SearchCache(max_size, ttl_seconds)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def clear_cache() -> None:
|
|
341
|
+
"""Clear the default search cache"""
|
|
342
|
+
cache = get_default_cache()
|
|
343
|
+
cache.clear()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tree-sitter-analyzer
|
|
3
|
-
Version: 1.2
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: Extensible multi-language code analyzer framework using Tree-sitter with dynamic plugin architecture
|
|
5
5
|
Project-URL: Homepage, https://github.com/aimasteracc/tree-sitter-analyzer
|
|
6
6
|
Project-URL: Documentation, https://github.com/aimasteracc/tree-sitter-analyzer#readme
|
|
@@ -163,11 +163,11 @@ Description-Content-Type: text/markdown
|
|
|
163
163
|
|
|
164
164
|
[](https://python.org)
|
|
165
165
|
[](LICENSE)
|
|
166
|
-
[](#quality-assurance)
|
|
167
|
+
[](#quality-assurance)
|
|
168
168
|
[](#quality-assurance)
|
|
169
169
|
[](https://pypi.org/project/tree-sitter-analyzer/)
|
|
170
|
-
[](https://github.com/aimasteracc/tree-sitter-analyzer/releases)
|
|
171
171
|
[](https://github.com/aimasteracc/tree-sitter-analyzer)
|
|
172
172
|
|
|
173
173
|
## 🚀 Break LLM Token Limits, Let AI Understand Code Files of Any Size
|
|
@@ -920,11 +920,11 @@ Tree-sitter Analyzer automatically detects and protects project boundaries:
|
|
|
920
920
|
- **Zero test failures** - Fully CI/CD ready
|
|
921
921
|
- **Cross-platform compatibility** - Windows, macOS, Linux
|
|
922
922
|
|
|
923
|
-
### ⚡ **Latest Quality Achievements (v1.2
|
|
923
|
+
### ⚡ **Latest Quality Achievements (v1.3.2)**
|
|
924
924
|
- ✅ **Cross-platform path compatibility** - Fixed Windows short path names and macOS symbolic link differences
|
|
925
925
|
- ✅ **Windows environment** - Implemented robust path normalization using Windows API
|
|
926
926
|
- ✅ **macOS environment** - Fixed `/var` vs `/private/var` symbolic link differences
|
|
927
|
-
- ✅ **Comprehensive test coverage** -
|
|
927
|
+
- ✅ **Comprehensive test coverage** - 1605 tests, 74.36% coverage
|
|
928
928
|
- ✅ **GitFlow implementation** - Professional development/release branch strategy. See [GitFlow documentation](GITFLOW.md) for details.
|
|
929
929
|
|
|
930
930
|
### ⚙️ **Running Tests**
|
|
@@ -943,7 +943,7 @@ uv run pytest tests/test_mcp_server_initialization.py -v
|
|
|
943
943
|
- **Language detector**: 98.41% (Excellent)
|
|
944
944
|
- **CLI main entry**: 94.36% (Excellent)
|
|
945
945
|
- **Query filtering system**: 96.06% (Excellent)
|
|
946
|
-
- **MCP fd/rg tools**: 93.04% (Excellent) - *
|
|
946
|
+
- **MCP fd/rg tools**: 93.04% (Excellent) - *Enhanced in v1.3.2 with cache format compatibility fix*
|
|
947
947
|
- **Query service**: 86.25% (Good)
|
|
948
948
|
- **Error handling**: 82.76% (Good)
|
|
949
949
|
|
|
@@ -1041,9 +1041,9 @@ All AI prompts in this document have been thoroughly tested in real environments
|
|
|
1041
1041
|
|
|
1042
1042
|
**Test Environment:**
|
|
1043
1043
|
- Operating System: Windows 10
|
|
1044
|
-
- Project: tree-sitter-analyzer v1.2
|
|
1044
|
+
- Project: tree-sitter-analyzer v1.3.2
|
|
1045
1045
|
- Test Files: BigService.java (1419 lines), sample.py (256 lines), MultiClass.java (54 lines)
|
|
1046
|
-
- Test Coverage:
|
|
1046
|
+
- Test Coverage: 1605 tests passed, 74.36% coverage
|
|
1047
1047
|
- Test Tools: All MCP tools (check_code_scale, analyze_code_structure, extract_code_section, query_code, list_files, search_content, find_and_grep)
|
|
1048
1048
|
|
|
1049
1049
|
**🚀 Start Now** → [30-Second Quick Start](#-30-second-quick-start)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
tree_sitter_analyzer/__init__.py,sha256=
|
|
1
|
+
tree_sitter_analyzer/__init__.py,sha256=7P5MV_TiIjGznUHZWtFsRiAQxrgngbW8KlgQ5CK6qms,3067
|
|
2
2
|
tree_sitter_analyzer/__main__.py,sha256=Zl79tpe4UaMu-7yeztc06tgP0CVMRnvGgas4ZQP5SCs,228
|
|
3
3
|
tree_sitter_analyzer/api.py,sha256=N_bcf1pLwzXS3elPn30OySLR6ehsHdWpchXMycjl0PY,17399
|
|
4
4
|
tree_sitter_analyzer/cli_main.py,sha256=jWjVJ5AgNmtf6Z7CgeK3IF-zi7yIiu9zn4Oyvzl-iNQ,10349
|
|
@@ -57,17 +57,19 @@ tree_sitter_analyzer/mcp/tools/__init__.py,sha256=9KfetZTaUhvWTeKuZPYzWb7ZomFQ8S
|
|
|
57
57
|
tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py,sha256=JyS9gey2oFoWjzsiiLjwcqTgwBYGlbY01vAK3QYUuF4,28470
|
|
58
58
|
tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py,sha256=mssed7bEfGeGxW4mOf7dg8BDS1oqHLolIBNX9DaZ3DM,8997
|
|
59
59
|
tree_sitter_analyzer/mcp/tools/base_tool.py,sha256=qf2My325azlnKOugNVMN_R1jtZcjXVy354sGVKzvZls,3546
|
|
60
|
-
tree_sitter_analyzer/mcp/tools/fd_rg_utils.py,sha256=
|
|
60
|
+
tree_sitter_analyzer/mcp/tools/fd_rg_utils.py,sha256=evvnCk61OrOrWnkdQGl3LcoGYpW2iesHBYVZ6qS6FJQ,17692
|
|
61
61
|
tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py,sha256=uYHat0kShQyreK1TXtvBG1e_HAL8ZsRHr49CzP7PLAo,20272
|
|
62
|
-
tree_sitter_analyzer/mcp/tools/list_files_tool.py,sha256=
|
|
62
|
+
tree_sitter_analyzer/mcp/tools/list_files_tool.py,sha256=TA1BRQtb-D5x1pD-IcRJYnP0WnnFfl9q7skI25MOdHk,12873
|
|
63
63
|
tree_sitter_analyzer/mcp/tools/query_tool.py,sha256=1xY1ONNY2sIFJxoILlnNzBnwGVgzEF7vVJ2ccqR9auA,10879
|
|
64
64
|
tree_sitter_analyzer/mcp/tools/read_partial_tool.py,sha256=BMAJF205hTIrYTQJG6N1-vVuKSby2CSm9nWzSMMWceI,11339
|
|
65
|
-
tree_sitter_analyzer/mcp/tools/search_content_tool.py,sha256=
|
|
65
|
+
tree_sitter_analyzer/mcp/tools/search_content_tool.py,sha256=CmyZNtVFMXvwVHeGQdNqPdUB2miTDBgp4G_J86Cl5So,21597
|
|
66
66
|
tree_sitter_analyzer/mcp/tools/table_format_tool.py,sha256=NDIiCtmZSbCmaQOp7ED83jGE5DuJhx4mcUketVHrkjs,16024
|
|
67
67
|
tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py,sha256=-zZnqN9WcoyRTKM_16ADH859LSebzi34BGYwQL2zCOs,25084
|
|
68
68
|
tree_sitter_analyzer/mcp/utils/__init__.py,sha256=TgTTKsRJAqF95g1fAp5SR_zQVDkImpc_5R0Dw529UUw,3126
|
|
69
69
|
tree_sitter_analyzer/mcp/utils/error_handler.py,sha256=msrQHX67K3vhJsEc3OPRz5mmWU_yoHz55Lnxy0IZuy4,18404
|
|
70
|
+
tree_sitter_analyzer/mcp/utils/gitignore_detector.py,sha256=VmO35Xj1fWiKVs4Y9aiD1gILm_8Kf1R8mhvtiF-wcfg,11027
|
|
70
71
|
tree_sitter_analyzer/mcp/utils/path_resolver.py,sha256=7pZvJ1CjKnLKTGvBBOitCLxgWaHNVQo2SwQrxuyqXkI,14976
|
|
72
|
+
tree_sitter_analyzer/mcp/utils/search_cache.py,sha256=ZNv84st6PeejDY1B50AKTbItpXs9HS6JrpR-Ozjyc1c,12991
|
|
71
73
|
tree_sitter_analyzer/plugins/__init__.py,sha256=ITE9bTz7NO4axnn8g5Z-1_ydhSLT0RnY6Y1J9OhUP3E,10326
|
|
72
74
|
tree_sitter_analyzer/plugins/base.py,sha256=FMRAOtjtDutNV8RnB6cmFgdvcjxKRAbrrzqldBBT1yk,17167
|
|
73
75
|
tree_sitter_analyzer/plugins/manager.py,sha256=PyEY3jeuCBpDVqguWhaAu7nzUZM17_pI6wml2e0Hamo,12535
|
|
@@ -80,7 +82,7 @@ tree_sitter_analyzer/security/__init__.py,sha256=ZTqTt24hsljCpTXAZpJC57L7MU5lJLT
|
|
|
80
82
|
tree_sitter_analyzer/security/boundary_manager.py,sha256=3eeENRKWtz2pyZHzd8DiVaq8fdeC6s1eVOuBylSmQPg,9347
|
|
81
83
|
tree_sitter_analyzer/security/regex_checker.py,sha256=jWK6H8PTPgzbwRPfK_RZ8bBTS6rtEbgjY5vr3YWjQ_U,9616
|
|
82
84
|
tree_sitter_analyzer/security/validator.py,sha256=yR4qTWEcXpR--bSFwtWvSgY0AzqujOFAqlc1Z7dlTdk,9809
|
|
83
|
-
tree_sitter_analyzer-1.2.
|
|
84
|
-
tree_sitter_analyzer-1.2.
|
|
85
|
-
tree_sitter_analyzer-1.2.
|
|
86
|
-
tree_sitter_analyzer-1.2.
|
|
85
|
+
tree_sitter_analyzer-1.3.2.dist-info/METADATA,sha256=PD6f3dx4MgWX0FhCcDgQ5bc3l8mhbEI6rVI9iSY6LVE,39710
|
|
86
|
+
tree_sitter_analyzer-1.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
87
|
+
tree_sitter_analyzer-1.3.2.dist-info/entry_points.txt,sha256=U4tfLGXgCWubKm2PyEb3zxhQ2pm7zVotMyfyS0CodD8,486
|
|
88
|
+
tree_sitter_analyzer-1.3.2.dist-info/RECORD,,
|
|
File without changes
|
{tree_sitter_analyzer-1.2.5.dist-info → tree_sitter_analyzer-1.3.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|