tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
find_and_grep MCP Tool (fd → ripgrep)
|
|
4
|
+
|
|
5
|
+
First narrow files with fd, then search contents with ripgrep, with caps & meta.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import pathlib
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from ..utils.error_handler import handle_mcp_errors
|
|
16
|
+
from ..utils.file_output_manager import FileOutputManager
|
|
17
|
+
from ..utils.gitignore_detector import get_default_detector
|
|
18
|
+
from . import fd_rg_utils
|
|
19
|
+
from .base_tool import BaseMCPTool
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FindAndGrepTool(BaseMCPTool):
|
|
25
|
+
"""MCP tool that composes fd and ripgrep with safety limits and metadata."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, project_root: str | None = None) -> None:
|
|
28
|
+
"""Initialize the find and grep tool."""
|
|
29
|
+
super().__init__(project_root)
|
|
30
|
+
self.file_output_manager = FileOutputManager.get_managed_instance(project_root)
|
|
31
|
+
|
|
32
|
+
def set_project_path(self, project_path: str) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Update the project path for all components.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
project_path: New project root directory
|
|
38
|
+
"""
|
|
39
|
+
super().set_project_path(project_path)
|
|
40
|
+
self.file_output_manager = FileOutputManager.get_managed_instance(project_path)
|
|
41
|
+
logger.info(f"FindAndGrepTool project path updated to: {project_path}")
|
|
42
|
+
|
|
43
|
+
def get_tool_definition(self) -> dict[str, Any]:
|
|
44
|
+
return {
|
|
45
|
+
"name": "find_and_grep",
|
|
46
|
+
"description": "Two-stage search: first use fd to find files matching criteria, then use ripgrep to search content within those files. Combines file filtering with content search for precise results with advanced token optimization (summary_only, group_by_file, total_only, suppress_output).",
|
|
47
|
+
"inputSchema": {
|
|
48
|
+
"type": "object",
|
|
49
|
+
"properties": {
|
|
50
|
+
# === FILE DISCOVERY STAGE (fd parameters) ===
|
|
51
|
+
"roots": {
|
|
52
|
+
"type": "array",
|
|
53
|
+
"items": {"type": "string"},
|
|
54
|
+
"description": "Directory paths to search in. Must be within project boundaries. Example: ['.', 'src/', 'tests/']",
|
|
55
|
+
},
|
|
56
|
+
"pattern": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"description": "[FILE STAGE] Filename pattern to match. Use with 'glob' for shell patterns. Example: '*.py', 'test_*', 'main.js'",
|
|
59
|
+
},
|
|
60
|
+
"glob": {
|
|
61
|
+
"type": "boolean",
|
|
62
|
+
"default": False,
|
|
63
|
+
"description": "[FILE STAGE] Treat filename pattern as glob instead of regex. True for '*.py', False for '.*\\.py$'",
|
|
64
|
+
},
|
|
65
|
+
"types": {
|
|
66
|
+
"type": "array",
|
|
67
|
+
"items": {"type": "string"},
|
|
68
|
+
"description": "[FILE STAGE] File types to include. 'f'=files, 'd'=directories, 'l'=symlinks, 'x'=executable, 'e'=empty",
|
|
69
|
+
},
|
|
70
|
+
"extensions": {
|
|
71
|
+
"type": "array",
|
|
72
|
+
"items": {"type": "string"},
|
|
73
|
+
"description": "[FILE STAGE] File extensions to include (without dots). Example: ['py', 'js'] for Python and JavaScript files",
|
|
74
|
+
},
|
|
75
|
+
"exclude": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": {"type": "string"},
|
|
78
|
+
"description": "[FILE STAGE] File patterns to exclude. Example: ['*.tmp', '__pycache__'] to skip temporary files",
|
|
79
|
+
},
|
|
80
|
+
"depth": {
|
|
81
|
+
"type": "integer",
|
|
82
|
+
"description": "[FILE STAGE] Maximum directory depth to search. 1=current level only, 2=one level deep, etc.",
|
|
83
|
+
},
|
|
84
|
+
"follow_symlinks": {
|
|
85
|
+
"type": "boolean",
|
|
86
|
+
"default": False,
|
|
87
|
+
"description": "[FILE STAGE] Follow symbolic links. False=safer, True=may cause loops",
|
|
88
|
+
},
|
|
89
|
+
"hidden": {
|
|
90
|
+
"type": "boolean",
|
|
91
|
+
"default": False,
|
|
92
|
+
"description": "[FILE STAGE] Include hidden files/directories (starting with dot). False=skip .git, .env",
|
|
93
|
+
},
|
|
94
|
+
"no_ignore": {
|
|
95
|
+
"type": "boolean",
|
|
96
|
+
"default": False,
|
|
97
|
+
"description": "[FILE STAGE] Ignore .gitignore files. False=respect ignore rules, True=search everything",
|
|
98
|
+
},
|
|
99
|
+
"size": {
|
|
100
|
+
"type": "array",
|
|
101
|
+
"items": {"type": "string"},
|
|
102
|
+
"description": "[FILE STAGE] File size filters. Format: '+10M'=larger than 10MB, '-1K'=smaller than 1KB. Units: B, K, M, G",
|
|
103
|
+
},
|
|
104
|
+
"changed_within": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"description": "[FILE STAGE] Files modified within timeframe. Format: '1d'=1 day, '2h'=2 hours, '30m'=30 minutes",
|
|
107
|
+
},
|
|
108
|
+
"changed_before": {
|
|
109
|
+
"type": "string",
|
|
110
|
+
"description": "[FILE STAGE] Files modified before timeframe. Same format as changed_within",
|
|
111
|
+
},
|
|
112
|
+
"full_path_match": {
|
|
113
|
+
"type": "boolean",
|
|
114
|
+
"default": False,
|
|
115
|
+
"description": "[FILE STAGE] Match pattern against full path instead of just filename",
|
|
116
|
+
},
|
|
117
|
+
"file_limit": {
|
|
118
|
+
"type": "integer",
|
|
119
|
+
"description": "[FILE STAGE] Maximum number of files to find before content search. Default 2000, prevents overwhelming searches",
|
|
120
|
+
},
|
|
121
|
+
"sort": {
|
|
122
|
+
"type": "string",
|
|
123
|
+
"enum": ["path", "mtime", "size"],
|
|
124
|
+
"description": "[FILE STAGE] Sort found files by: 'path'=alphabetical, 'mtime'=modification time, 'size'=file size",
|
|
125
|
+
},
|
|
126
|
+
# === CONTENT SEARCH STAGE (ripgrep parameters) ===
|
|
127
|
+
"query": {
|
|
128
|
+
"type": "string",
|
|
129
|
+
"description": "[CONTENT STAGE] Text pattern to search for in the found files. Can be literal text or regex",
|
|
130
|
+
},
|
|
131
|
+
"case": {
|
|
132
|
+
"type": "string",
|
|
133
|
+
"enum": ["smart", "insensitive", "sensitive"],
|
|
134
|
+
"default": "smart",
|
|
135
|
+
"description": "[CONTENT STAGE] Case sensitivity. 'smart'=case-insensitive unless uppercase present, 'insensitive'=ignore case, 'sensitive'=exact case",
|
|
136
|
+
},
|
|
137
|
+
"fixed_strings": {
|
|
138
|
+
"type": "boolean",
|
|
139
|
+
"default": False,
|
|
140
|
+
"description": "[CONTENT STAGE] Treat query as literal string instead of regex. True for exact text, False for patterns",
|
|
141
|
+
},
|
|
142
|
+
"word": {
|
|
143
|
+
"type": "boolean",
|
|
144
|
+
"default": False,
|
|
145
|
+
"description": "[CONTENT STAGE] Match whole words only. True finds 'test' but not 'testing'",
|
|
146
|
+
},
|
|
147
|
+
"multiline": {
|
|
148
|
+
"type": "boolean",
|
|
149
|
+
"default": False,
|
|
150
|
+
"description": "[CONTENT STAGE] Allow patterns to match across multiple lines. Useful for multi-line code blocks",
|
|
151
|
+
},
|
|
152
|
+
"include_globs": {
|
|
153
|
+
"type": "array",
|
|
154
|
+
"items": {"type": "string"},
|
|
155
|
+
"description": "[CONTENT STAGE] Additional file patterns to include in content search. Example: ['*.py', '*.js']",
|
|
156
|
+
},
|
|
157
|
+
"exclude_globs": {
|
|
158
|
+
"type": "array",
|
|
159
|
+
"items": {"type": "string"},
|
|
160
|
+
"description": "[CONTENT STAGE] File patterns to exclude from content search. Example: ['*.log', '__pycache__/*']",
|
|
161
|
+
},
|
|
162
|
+
"max_filesize": {
|
|
163
|
+
"type": "string",
|
|
164
|
+
"description": "[CONTENT STAGE] Maximum file size to search content. Format: '10M'=10MB, '500K'=500KB",
|
|
165
|
+
},
|
|
166
|
+
"context_before": {
|
|
167
|
+
"type": "integer",
|
|
168
|
+
"description": "[CONTENT STAGE] Lines to show before each match for context. Example: 3 shows 3 lines before",
|
|
169
|
+
},
|
|
170
|
+
"context_after": {
|
|
171
|
+
"type": "integer",
|
|
172
|
+
"description": "[CONTENT STAGE] Lines to show after each match for context. Example: 3 shows 3 lines after",
|
|
173
|
+
},
|
|
174
|
+
"encoding": {
|
|
175
|
+
"type": "string",
|
|
176
|
+
"description": "[CONTENT STAGE] Text encoding for files. Default auto-detect. Example: 'utf-8', 'latin1'",
|
|
177
|
+
},
|
|
178
|
+
"max_count": {
|
|
179
|
+
"type": "integer",
|
|
180
|
+
"description": "[CONTENT STAGE] Maximum matches per file. Prevents overwhelming output from files with many matches",
|
|
181
|
+
},
|
|
182
|
+
"timeout_ms": {
|
|
183
|
+
"type": "integer",
|
|
184
|
+
"description": "[CONTENT STAGE] Search timeout in milliseconds. Example: 5000 for 5 second timeout",
|
|
185
|
+
},
|
|
186
|
+
"count_only_matches": {
|
|
187
|
+
"type": "boolean",
|
|
188
|
+
"default": False,
|
|
189
|
+
"description": "Return only match counts per file instead of full match details. Faster for statistics",
|
|
190
|
+
},
|
|
191
|
+
"summary_only": {
|
|
192
|
+
"type": "boolean",
|
|
193
|
+
"default": False,
|
|
194
|
+
"description": "Return condensed summary of results. Shows top files and sample matches to reduce context size",
|
|
195
|
+
},
|
|
196
|
+
"optimize_paths": {
|
|
197
|
+
"type": "boolean",
|
|
198
|
+
"default": False,
|
|
199
|
+
"description": "Optimize file paths in results by removing common prefixes and shortening long paths. Saves tokens in output",
|
|
200
|
+
},
|
|
201
|
+
"group_by_file": {
|
|
202
|
+
"type": "boolean",
|
|
203
|
+
"default": False,
|
|
204
|
+
"description": "Group results by file to eliminate file path duplication when multiple matches exist in the same file. Significantly reduces tokens",
|
|
205
|
+
},
|
|
206
|
+
"total_only": {
|
|
207
|
+
"type": "boolean",
|
|
208
|
+
"default": False,
|
|
209
|
+
"description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
|
|
210
|
+
},
|
|
211
|
+
"output_file": {
|
|
212
|
+
"type": "string",
|
|
213
|
+
"description": "Optional filename to save output to file (extension auto-detected based on content)",
|
|
214
|
+
},
|
|
215
|
+
"suppress_output": {
|
|
216
|
+
"type": "boolean",
|
|
217
|
+
"description": "When true and output_file is specified, suppress detailed output in response to save tokens",
|
|
218
|
+
"default": False,
|
|
219
|
+
},
|
|
220
|
+
},
|
|
221
|
+
"required": ["roots", "query"],
|
|
222
|
+
"additionalProperties": False,
|
|
223
|
+
},
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
def _validate_roots(self, roots: list[str]) -> list[str]:
|
|
227
|
+
validated: list[str] = []
|
|
228
|
+
for r in roots:
|
|
229
|
+
resolved = self.path_resolver.resolve(r)
|
|
230
|
+
ok, err = self.security_validator.validate_directory_path(
|
|
231
|
+
resolved, must_exist=True
|
|
232
|
+
)
|
|
233
|
+
if not ok:
|
|
234
|
+
raise ValueError(f"Invalid root '{r}': {err}")
|
|
235
|
+
validated.append(resolved)
|
|
236
|
+
return validated
|
|
237
|
+
|
|
238
|
+
def validate_arguments(self, arguments: dict[str, Any]) -> bool:
|
|
239
|
+
if "roots" not in arguments or not isinstance(arguments["roots"], list):
|
|
240
|
+
raise ValueError("roots is required and must be an array")
|
|
241
|
+
if (
|
|
242
|
+
"query" not in arguments
|
|
243
|
+
or not isinstance(arguments["query"], str)
|
|
244
|
+
or not arguments["query"].strip()
|
|
245
|
+
):
|
|
246
|
+
raise ValueError("query is required and must be a non-empty string")
|
|
247
|
+
if "file_limit" in arguments and not isinstance(arguments["file_limit"], int):
|
|
248
|
+
raise ValueError("file_limit must be an integer")
|
|
249
|
+
return True
|
|
250
|
+
|
|
251
|
+
@handle_mcp_errors("find_and_grep")
|
|
252
|
+
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
|
|
253
|
+
# Check if both fd and rg commands are available
|
|
254
|
+
missing_commands = fd_rg_utils.get_missing_commands()
|
|
255
|
+
if missing_commands:
|
|
256
|
+
return {
|
|
257
|
+
"success": False,
|
|
258
|
+
"error": f"Required commands not found: {', '.join(missing_commands)}. Please install fd (https://github.com/sharkdp/fd) and ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
|
|
259
|
+
"count": 0,
|
|
260
|
+
"results": [],
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
self.validate_arguments(arguments)
|
|
264
|
+
roots = self._validate_roots(arguments["roots"]) # absolute validated
|
|
265
|
+
|
|
266
|
+
# fd step
|
|
267
|
+
fd_limit = fd_rg_utils.clamp_int(
|
|
268
|
+
arguments.get("file_limit"),
|
|
269
|
+
fd_rg_utils.DEFAULT_RESULTS_LIMIT,
|
|
270
|
+
fd_rg_utils.MAX_RESULTS_HARD_CAP,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Smart .gitignore detection for fd stage
|
|
274
|
+
no_ignore = bool(arguments.get("no_ignore", False))
|
|
275
|
+
if not no_ignore:
|
|
276
|
+
# Auto-detect if we should use --no-ignore
|
|
277
|
+
detector = get_default_detector()
|
|
278
|
+
original_roots = arguments.get("roots", [])
|
|
279
|
+
should_ignore = detector.should_use_no_ignore(
|
|
280
|
+
original_roots, self.project_root
|
|
281
|
+
)
|
|
282
|
+
if should_ignore:
|
|
283
|
+
no_ignore = True
|
|
284
|
+
# Log the auto-detection for debugging
|
|
285
|
+
detection_info = detector.get_detection_info(
|
|
286
|
+
original_roots, self.project_root
|
|
287
|
+
)
|
|
288
|
+
logger.info(
|
|
289
|
+
f"Auto-enabled --no-ignore due to .gitignore interference: {detection_info['reason']}"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
fd_cmd = fd_rg_utils.build_fd_command(
|
|
293
|
+
pattern=arguments.get("pattern"),
|
|
294
|
+
glob=bool(arguments.get("glob", False)),
|
|
295
|
+
types=arguments.get("types"),
|
|
296
|
+
extensions=arguments.get("extensions"),
|
|
297
|
+
exclude=arguments.get("exclude"),
|
|
298
|
+
depth=arguments.get("depth"),
|
|
299
|
+
follow_symlinks=bool(arguments.get("follow_symlinks", False)),
|
|
300
|
+
hidden=bool(arguments.get("hidden", False)),
|
|
301
|
+
no_ignore=no_ignore,
|
|
302
|
+
size=arguments.get("size"),
|
|
303
|
+
changed_within=arguments.get("changed_within"),
|
|
304
|
+
changed_before=arguments.get("changed_before"),
|
|
305
|
+
full_path_match=bool(arguments.get("full_path_match", False)),
|
|
306
|
+
absolute=True,
|
|
307
|
+
limit=fd_limit,
|
|
308
|
+
roots=roots,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
fd_started = time.time()
|
|
312
|
+
fd_rc, fd_out, fd_err = await fd_rg_utils.run_command_capture(fd_cmd)
|
|
313
|
+
fd_elapsed_ms = int((time.time() - fd_started) * 1000)
|
|
314
|
+
|
|
315
|
+
if fd_rc != 0:
|
|
316
|
+
return {
|
|
317
|
+
"success": False,
|
|
318
|
+
"error": (
|
|
319
|
+
fd_err.decode("utf-8", errors="replace").strip() or "fd failed"
|
|
320
|
+
),
|
|
321
|
+
"returncode": fd_rc,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
files = [
|
|
325
|
+
line.strip()
|
|
326
|
+
for line in fd_out.decode("utf-8", errors="replace").splitlines()
|
|
327
|
+
if line.strip()
|
|
328
|
+
]
|
|
329
|
+
|
|
330
|
+
# Truncate by file_limit safety again
|
|
331
|
+
truncated_fd = False
|
|
332
|
+
if len(files) > fd_limit:
|
|
333
|
+
files = files[:fd_limit]
|
|
334
|
+
truncated_fd = True
|
|
335
|
+
|
|
336
|
+
# Optional sorting
|
|
337
|
+
sort_mode = arguments.get("sort")
|
|
338
|
+
if sort_mode in ("path", "mtime", "size"):
|
|
339
|
+
try:
|
|
340
|
+
if sort_mode == "path":
|
|
341
|
+
files.sort()
|
|
342
|
+
elif sort_mode == "mtime":
|
|
343
|
+
|
|
344
|
+
def get_mtime(p: str) -> float:
|
|
345
|
+
path_obj = pathlib.Path(p)
|
|
346
|
+
return path_obj.stat().st_mtime if path_obj.exists() else 0
|
|
347
|
+
|
|
348
|
+
files.sort(key=get_mtime, reverse=True)
|
|
349
|
+
elif sort_mode == "size":
|
|
350
|
+
|
|
351
|
+
def get_size(p: str) -> int:
|
|
352
|
+
path_obj = pathlib.Path(p)
|
|
353
|
+
return path_obj.stat().st_size if path_obj.exists() else 0
|
|
354
|
+
|
|
355
|
+
files.sort(key=get_size, reverse=True)
|
|
356
|
+
except (OSError, ValueError): # nosec B110
|
|
357
|
+
pass
|
|
358
|
+
|
|
359
|
+
searched_file_count = len(files)
|
|
360
|
+
if searched_file_count == 0:
|
|
361
|
+
return {
|
|
362
|
+
"success": True,
|
|
363
|
+
"results": [],
|
|
364
|
+
"count": 0,
|
|
365
|
+
"meta": {
|
|
366
|
+
"searched_file_count": 0,
|
|
367
|
+
"truncated": truncated_fd,
|
|
368
|
+
"fd_elapsed_ms": fd_elapsed_ms,
|
|
369
|
+
"rg_elapsed_ms": 0,
|
|
370
|
+
},
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# rg step on files list
|
|
374
|
+
# Create specific file globs to limit search to only the files found by fd
|
|
375
|
+
from pathlib import Path
|
|
376
|
+
|
|
377
|
+
parent_dirs = set()
|
|
378
|
+
file_globs = []
|
|
379
|
+
|
|
380
|
+
for file_path in files:
|
|
381
|
+
parent_dir = str(Path(file_path).parent)
|
|
382
|
+
parent_dirs.add(parent_dir)
|
|
383
|
+
|
|
384
|
+
# Create a specific glob pattern for this exact file
|
|
385
|
+
file_name = Path(file_path).name
|
|
386
|
+
# Escape special characters in filename for glob pattern
|
|
387
|
+
escaped_name = file_name.replace("[", "[[]").replace("]", "[]]")
|
|
388
|
+
file_globs.append(escaped_name)
|
|
389
|
+
|
|
390
|
+
# Use parent directories as roots but limit to specific files via globs
|
|
391
|
+
rg_roots = list(parent_dirs)
|
|
392
|
+
|
|
393
|
+
# Combine user-provided include_globs with our file-specific globs
|
|
394
|
+
combined_include_globs = arguments.get("include_globs", []) or []
|
|
395
|
+
combined_include_globs.extend(file_globs)
|
|
396
|
+
|
|
397
|
+
rg_cmd = fd_rg_utils.build_rg_command(
|
|
398
|
+
query=arguments["query"],
|
|
399
|
+
case=arguments.get("case", "smart"),
|
|
400
|
+
fixed_strings=bool(arguments.get("fixed_strings", False)),
|
|
401
|
+
word=bool(arguments.get("word", False)),
|
|
402
|
+
multiline=bool(arguments.get("multiline", False)),
|
|
403
|
+
include_globs=combined_include_globs,
|
|
404
|
+
exclude_globs=arguments.get("exclude_globs"),
|
|
405
|
+
follow_symlinks=bool(arguments.get("follow_symlinks", False)),
|
|
406
|
+
hidden=bool(arguments.get("hidden", False)),
|
|
407
|
+
no_ignore=no_ignore, # Use the same no_ignore flag from fd stage
|
|
408
|
+
max_filesize=arguments.get("max_filesize"),
|
|
409
|
+
context_before=arguments.get("context_before"),
|
|
410
|
+
context_after=arguments.get("context_after"),
|
|
411
|
+
encoding=arguments.get("encoding"),
|
|
412
|
+
max_count=arguments.get("max_count"),
|
|
413
|
+
timeout_ms=arguments.get("timeout_ms"),
|
|
414
|
+
roots=rg_roots,
|
|
415
|
+
files_from=None,
|
|
416
|
+
count_only_matches=bool(arguments.get("count_only_matches", False))
|
|
417
|
+
or bool(arguments.get("total_only", False)),
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
rg_started = time.time()
|
|
421
|
+
rg_rc, rg_out, rg_err = await fd_rg_utils.run_command_capture(
|
|
422
|
+
rg_cmd, timeout_ms=arguments.get("timeout_ms")
|
|
423
|
+
)
|
|
424
|
+
rg_elapsed_ms = int((time.time() - rg_started) * 1000)
|
|
425
|
+
|
|
426
|
+
if rg_rc not in (0, 1):
|
|
427
|
+
return {
|
|
428
|
+
"success": False,
|
|
429
|
+
"error": (
|
|
430
|
+
rg_err.decode("utf-8", errors="replace").strip() or "ripgrep failed"
|
|
431
|
+
),
|
|
432
|
+
"returncode": rg_rc,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
# Handle total-only mode (highest priority for count queries)
|
|
436
|
+
if arguments.get("total_only", False):
|
|
437
|
+
# Parse count output and return only the total
|
|
438
|
+
count_data = fd_rg_utils.parse_rg_count_output(rg_out)
|
|
439
|
+
total_matches = count_data.pop("__total__", 0)
|
|
440
|
+
return total_matches
|
|
441
|
+
|
|
442
|
+
if arguments.get("count_only_matches", False):
|
|
443
|
+
# Parse count-only output
|
|
444
|
+
count_data = fd_rg_utils.parse_rg_count_output(rg_out)
|
|
445
|
+
total_matches = count_data.pop("__total__", 0)
|
|
446
|
+
|
|
447
|
+
return {
|
|
448
|
+
"success": True,
|
|
449
|
+
"count_only": True,
|
|
450
|
+
"total_matches": total_matches,
|
|
451
|
+
"file_counts": count_data,
|
|
452
|
+
"meta": {
|
|
453
|
+
"searched_file_count": searched_file_count,
|
|
454
|
+
"truncated": truncated_fd,
|
|
455
|
+
"fd_elapsed_ms": fd_elapsed_ms,
|
|
456
|
+
"rg_elapsed_ms": rg_elapsed_ms,
|
|
457
|
+
},
|
|
458
|
+
}
|
|
459
|
+
else:
|
|
460
|
+
# Parse full match details
|
|
461
|
+
matches = fd_rg_utils.parse_rg_json_lines_to_matches(rg_out)
|
|
462
|
+
|
|
463
|
+
# Apply user-specified max_count limit if provided
|
|
464
|
+
# Note: ripgrep's -m option limits matches per file, not total matches
|
|
465
|
+
# So we need to apply the total limit here in post-processing
|
|
466
|
+
user_max_count = arguments.get("max_count")
|
|
467
|
+
if user_max_count is not None and len(matches) > user_max_count:
|
|
468
|
+
matches = matches[:user_max_count]
|
|
469
|
+
truncated_rg = True
|
|
470
|
+
else:
|
|
471
|
+
truncated_rg = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
|
|
472
|
+
if truncated_rg:
|
|
473
|
+
matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
|
|
474
|
+
|
|
475
|
+
# Apply path optimization if requested
|
|
476
|
+
optimize_paths = arguments.get("optimize_paths", False)
|
|
477
|
+
if optimize_paths and matches:
|
|
478
|
+
matches = fd_rg_utils.optimize_match_paths(matches)
|
|
479
|
+
|
|
480
|
+
# Apply file grouping if requested (takes priority over other formats)
|
|
481
|
+
group_by_file = arguments.get("group_by_file", False)
|
|
482
|
+
if group_by_file and matches:
|
|
483
|
+
grouped_result = fd_rg_utils.group_matches_by_file(matches)
|
|
484
|
+
|
|
485
|
+
# If summary_only is also requested, add summary to grouped result
|
|
486
|
+
if arguments.get("summary_only", False):
|
|
487
|
+
summary = fd_rg_utils.summarize_search_results(matches)
|
|
488
|
+
grouped_result["summary"] = summary
|
|
489
|
+
|
|
490
|
+
grouped_result["meta"] = {
|
|
491
|
+
"searched_file_count": searched_file_count,
|
|
492
|
+
"truncated": (truncated_fd or truncated_rg),
|
|
493
|
+
"fd_elapsed_ms": fd_elapsed_ms,
|
|
494
|
+
"rg_elapsed_ms": rg_elapsed_ms,
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
# Handle output suppression and file output for grouped results
|
|
498
|
+
output_file = arguments.get("output_file")
|
|
499
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
500
|
+
|
|
501
|
+
# Handle file output if requested
|
|
502
|
+
if output_file:
|
|
503
|
+
try:
|
|
504
|
+
# Save full result to file
|
|
505
|
+
import json
|
|
506
|
+
|
|
507
|
+
json_content = json.dumps(
|
|
508
|
+
grouped_result, indent=2, ensure_ascii=False
|
|
509
|
+
)
|
|
510
|
+
file_path = self.file_output_manager.save_to_file(
|
|
511
|
+
content=json_content, base_name=output_file
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# If suppress_output is True, return minimal response
|
|
515
|
+
if suppress_output:
|
|
516
|
+
minimal_result = {
|
|
517
|
+
"success": grouped_result.get("success", True),
|
|
518
|
+
"count": grouped_result.get("count", 0),
|
|
519
|
+
"output_file": output_file,
|
|
520
|
+
"file_saved": f"Results saved to {file_path}",
|
|
521
|
+
}
|
|
522
|
+
return minimal_result
|
|
523
|
+
else:
|
|
524
|
+
# Include file info in full response
|
|
525
|
+
grouped_result["output_file"] = output_file
|
|
526
|
+
grouped_result["file_saved"] = (
|
|
527
|
+
f"Results saved to {file_path}"
|
|
528
|
+
)
|
|
529
|
+
except Exception as e:
|
|
530
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
531
|
+
grouped_result["file_save_error"] = str(e)
|
|
532
|
+
grouped_result["file_saved"] = False
|
|
533
|
+
elif suppress_output:
|
|
534
|
+
# If suppress_output is True but no output_file, remove detailed results
|
|
535
|
+
minimal_result = {
|
|
536
|
+
"success": grouped_result.get("success", True),
|
|
537
|
+
"count": grouped_result.get("count", 0),
|
|
538
|
+
"summary": grouped_result.get("summary", {}),
|
|
539
|
+
"meta": grouped_result.get("meta", {}),
|
|
540
|
+
}
|
|
541
|
+
return minimal_result
|
|
542
|
+
|
|
543
|
+
return grouped_result
|
|
544
|
+
|
|
545
|
+
# Check if summary_only mode is requested
|
|
546
|
+
if arguments.get("summary_only", False):
|
|
547
|
+
summary = fd_rg_utils.summarize_search_results(matches)
|
|
548
|
+
result = {
|
|
549
|
+
"success": True,
|
|
550
|
+
"summary_only": True,
|
|
551
|
+
"summary": summary,
|
|
552
|
+
"meta": {
|
|
553
|
+
"searched_file_count": searched_file_count,
|
|
554
|
+
"truncated": (truncated_fd or truncated_rg),
|
|
555
|
+
"fd_elapsed_ms": fd_elapsed_ms,
|
|
556
|
+
"rg_elapsed_ms": rg_elapsed_ms,
|
|
557
|
+
},
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
# Handle output suppression and file output for summary results
|
|
561
|
+
output_file = arguments.get("output_file")
|
|
562
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
563
|
+
|
|
564
|
+
# Handle file output if requested
|
|
565
|
+
if output_file:
|
|
566
|
+
try:
|
|
567
|
+
# Save full result to file
|
|
568
|
+
import json
|
|
569
|
+
|
|
570
|
+
json_content = json.dumps(result, indent=2, ensure_ascii=False)
|
|
571
|
+
file_path = self.file_output_manager.save_to_file(
|
|
572
|
+
content=json_content, base_name=output_file
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# If suppress_output is True, return minimal response
|
|
576
|
+
if suppress_output:
|
|
577
|
+
minimal_result = {
|
|
578
|
+
"success": result.get("success", True),
|
|
579
|
+
"count": len(matches),
|
|
580
|
+
"output_file": output_file,
|
|
581
|
+
"file_saved": f"Results saved to {file_path}",
|
|
582
|
+
}
|
|
583
|
+
return minimal_result
|
|
584
|
+
else:
|
|
585
|
+
# Include file info in full response
|
|
586
|
+
result["output_file"] = output_file
|
|
587
|
+
result["file_saved"] = f"Results saved to {file_path}"
|
|
588
|
+
except Exception as e:
|
|
589
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
590
|
+
result["file_save_error"] = str(e)
|
|
591
|
+
result["file_saved"] = False
|
|
592
|
+
elif suppress_output:
|
|
593
|
+
# If suppress_output is True but no output_file, remove detailed results
|
|
594
|
+
minimal_result = {
|
|
595
|
+
"success": result.get("success", True),
|
|
596
|
+
"count": len(matches),
|
|
597
|
+
"summary": result.get("summary", {}),
|
|
598
|
+
"meta": result.get("meta", {}),
|
|
599
|
+
}
|
|
600
|
+
return minimal_result
|
|
601
|
+
|
|
602
|
+
return result
|
|
603
|
+
else:
|
|
604
|
+
result = {
|
|
605
|
+
"success": True,
|
|
606
|
+
"count": len(matches),
|
|
607
|
+
"meta": {
|
|
608
|
+
"searched_file_count": searched_file_count,
|
|
609
|
+
"truncated": (truncated_fd or truncated_rg),
|
|
610
|
+
"fd_elapsed_ms": fd_elapsed_ms,
|
|
611
|
+
"rg_elapsed_ms": rg_elapsed_ms,
|
|
612
|
+
},
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
# Handle output suppression and file output
|
|
616
|
+
output_file = arguments.get("output_file")
|
|
617
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
618
|
+
|
|
619
|
+
# Add results to response unless suppressed
|
|
620
|
+
if not suppress_output or not output_file:
|
|
621
|
+
result["results"] = matches
|
|
622
|
+
|
|
623
|
+
# Handle file output if requested
|
|
624
|
+
if output_file:
|
|
625
|
+
try:
|
|
626
|
+
# Create detailed output for file
|
|
627
|
+
file_content = {
|
|
628
|
+
"success": True,
|
|
629
|
+
"results": matches,
|
|
630
|
+
"count": len(matches),
|
|
631
|
+
"files": (
|
|
632
|
+
fd_rg_utils.group_matches_by_file(matches)["files"]
|
|
633
|
+
if matches
|
|
634
|
+
else []
|
|
635
|
+
),
|
|
636
|
+
"summary": fd_rg_utils.summarize_search_results(matches),
|
|
637
|
+
"meta": result["meta"],
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
# Convert to JSON for file output
|
|
641
|
+
# Save full result to file using FileOutputManager
|
|
642
|
+
import json
|
|
643
|
+
|
|
644
|
+
json_content = json.dumps(
|
|
645
|
+
file_content, indent=2, ensure_ascii=False
|
|
646
|
+
)
|
|
647
|
+
file_path = self.file_output_manager.save_to_file(
|
|
648
|
+
content=json_content, base_name=output_file
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# Check if suppress_output is enabled
|
|
652
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
653
|
+
if suppress_output:
|
|
654
|
+
# Return minimal response to save tokens
|
|
655
|
+
minimal_result = {
|
|
656
|
+
"success": result.get("success", True),
|
|
657
|
+
"count": result.get("count", 0),
|
|
658
|
+
"output_file": output_file,
|
|
659
|
+
"file_saved": f"Results saved to {file_path}",
|
|
660
|
+
}
|
|
661
|
+
return minimal_result
|
|
662
|
+
else:
|
|
663
|
+
# Include file info in full response
|
|
664
|
+
result["output_file"] = output_file
|
|
665
|
+
result["file_saved"] = f"Results saved to {file_path}"
|
|
666
|
+
|
|
667
|
+
logger.info(f"Search results saved to: {file_path}")
|
|
668
|
+
|
|
669
|
+
except Exception as e:
|
|
670
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
671
|
+
result["file_save_error"] = str(e)
|
|
672
|
+
result["file_saved"] = False
|
|
673
|
+
else:
|
|
674
|
+
# Handle suppress_output without file output
|
|
675
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
676
|
+
if suppress_output:
|
|
677
|
+
# Return minimal response without detailed match results
|
|
678
|
+
minimal_result = {
|
|
679
|
+
"success": result.get("success", True),
|
|
680
|
+
"count": result.get("count", 0),
|
|
681
|
+
"summary": result.get("summary", {}),
|
|
682
|
+
"meta": result.get("meta", {}),
|
|
683
|
+
}
|
|
684
|
+
return minimal_result
|
|
685
|
+
|
|
686
|
+
return result
|