tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,686 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ find_and_grep MCP Tool (fd → ripgrep)
4
+
5
+ First narrow files with fd, then search contents with ripgrep, with caps & meta.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import pathlib
12
+ import time
13
+ from typing import Any
14
+
15
+ from ..utils.error_handler import handle_mcp_errors
16
+ from ..utils.file_output_manager import FileOutputManager
17
+ from ..utils.gitignore_detector import get_default_detector
18
+ from . import fd_rg_utils
19
+ from .base_tool import BaseMCPTool
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class FindAndGrepTool(BaseMCPTool):
25
+ """MCP tool that composes fd and ripgrep with safety limits and metadata."""
26
+
27
+ def __init__(self, project_root: str | None = None) -> None:
28
+ """Initialize the find and grep tool."""
29
+ super().__init__(project_root)
30
+ self.file_output_manager = FileOutputManager.get_managed_instance(project_root)
31
+
32
+ def set_project_path(self, project_path: str) -> None:
33
+ """
34
+ Update the project path for all components.
35
+
36
+ Args:
37
+ project_path: New project root directory
38
+ """
39
+ super().set_project_path(project_path)
40
+ self.file_output_manager = FileOutputManager.get_managed_instance(project_path)
41
+ logger.info(f"FindAndGrepTool project path updated to: {project_path}")
42
+
43
+ def get_tool_definition(self) -> dict[str, Any]:
44
+ return {
45
+ "name": "find_and_grep",
46
+ "description": "Two-stage search: first use fd to find files matching criteria, then use ripgrep to search content within those files. Combines file filtering with content search for precise results with advanced token optimization (summary_only, group_by_file, total_only, suppress_output).",
47
+ "inputSchema": {
48
+ "type": "object",
49
+ "properties": {
50
+ # === FILE DISCOVERY STAGE (fd parameters) ===
51
+ "roots": {
52
+ "type": "array",
53
+ "items": {"type": "string"},
54
+ "description": "Directory paths to search in. Must be within project boundaries. Example: ['.', 'src/', 'tests/']",
55
+ },
56
+ "pattern": {
57
+ "type": "string",
58
+ "description": "[FILE STAGE] Filename pattern to match. Use with 'glob' for shell patterns. Example: '*.py', 'test_*', 'main.js'",
59
+ },
60
+ "glob": {
61
+ "type": "boolean",
62
+ "default": False,
63
+ "description": "[FILE STAGE] Treat filename pattern as glob instead of regex. True for '*.py', False for '.*\\.py$'",
64
+ },
65
+ "types": {
66
+ "type": "array",
67
+ "items": {"type": "string"},
68
+ "description": "[FILE STAGE] File types to include. 'f'=files, 'd'=directories, 'l'=symlinks, 'x'=executable, 'e'=empty",
69
+ },
70
+ "extensions": {
71
+ "type": "array",
72
+ "items": {"type": "string"},
73
+ "description": "[FILE STAGE] File extensions to include (without dots). Example: ['py', 'js'] for Python and JavaScript files",
74
+ },
75
+ "exclude": {
76
+ "type": "array",
77
+ "items": {"type": "string"},
78
+ "description": "[FILE STAGE] File patterns to exclude. Example: ['*.tmp', '__pycache__'] to skip temporary files",
79
+ },
80
+ "depth": {
81
+ "type": "integer",
82
+ "description": "[FILE STAGE] Maximum directory depth to search. 1=current level only, 2=one level deep, etc.",
83
+ },
84
+ "follow_symlinks": {
85
+ "type": "boolean",
86
+ "default": False,
87
+ "description": "[FILE STAGE] Follow symbolic links. False=safer, True=may cause loops",
88
+ },
89
+ "hidden": {
90
+ "type": "boolean",
91
+ "default": False,
92
+ "description": "[FILE STAGE] Include hidden files/directories (starting with dot). False=skip .git, .env",
93
+ },
94
+ "no_ignore": {
95
+ "type": "boolean",
96
+ "default": False,
97
+ "description": "[FILE STAGE] Ignore .gitignore files. False=respect ignore rules, True=search everything",
98
+ },
99
+ "size": {
100
+ "type": "array",
101
+ "items": {"type": "string"},
102
+ "description": "[FILE STAGE] File size filters. Format: '+10M'=larger than 10MB, '-1K'=smaller than 1KB. Units: B, K, M, G",
103
+ },
104
+ "changed_within": {
105
+ "type": "string",
106
+ "description": "[FILE STAGE] Files modified within timeframe. Format: '1d'=1 day, '2h'=2 hours, '30m'=30 minutes",
107
+ },
108
+ "changed_before": {
109
+ "type": "string",
110
+ "description": "[FILE STAGE] Files modified before timeframe. Same format as changed_within",
111
+ },
112
+ "full_path_match": {
113
+ "type": "boolean",
114
+ "default": False,
115
+ "description": "[FILE STAGE] Match pattern against full path instead of just filename",
116
+ },
117
+ "file_limit": {
118
+ "type": "integer",
119
+ "description": "[FILE STAGE] Maximum number of files to find before content search. Default 2000, prevents overwhelming searches",
120
+ },
121
+ "sort": {
122
+ "type": "string",
123
+ "enum": ["path", "mtime", "size"],
124
+ "description": "[FILE STAGE] Sort found files by: 'path'=alphabetical, 'mtime'=modification time, 'size'=file size",
125
+ },
126
+ # === CONTENT SEARCH STAGE (ripgrep parameters) ===
127
+ "query": {
128
+ "type": "string",
129
+ "description": "[CONTENT STAGE] Text pattern to search for in the found files. Can be literal text or regex",
130
+ },
131
+ "case": {
132
+ "type": "string",
133
+ "enum": ["smart", "insensitive", "sensitive"],
134
+ "default": "smart",
135
+ "description": "[CONTENT STAGE] Case sensitivity. 'smart'=case-insensitive unless uppercase present, 'insensitive'=ignore case, 'sensitive'=exact case",
136
+ },
137
+ "fixed_strings": {
138
+ "type": "boolean",
139
+ "default": False,
140
+ "description": "[CONTENT STAGE] Treat query as literal string instead of regex. True for exact text, False for patterns",
141
+ },
142
+ "word": {
143
+ "type": "boolean",
144
+ "default": False,
145
+ "description": "[CONTENT STAGE] Match whole words only. True finds 'test' but not 'testing'",
146
+ },
147
+ "multiline": {
148
+ "type": "boolean",
149
+ "default": False,
150
+ "description": "[CONTENT STAGE] Allow patterns to match across multiple lines. Useful for multi-line code blocks",
151
+ },
152
+ "include_globs": {
153
+ "type": "array",
154
+ "items": {"type": "string"},
155
+ "description": "[CONTENT STAGE] Additional file patterns to include in content search. Example: ['*.py', '*.js']",
156
+ },
157
+ "exclude_globs": {
158
+ "type": "array",
159
+ "items": {"type": "string"},
160
+ "description": "[CONTENT STAGE] File patterns to exclude from content search. Example: ['*.log', '__pycache__/*']",
161
+ },
162
+ "max_filesize": {
163
+ "type": "string",
164
+ "description": "[CONTENT STAGE] Maximum file size to search content. Format: '10M'=10MB, '500K'=500KB",
165
+ },
166
+ "context_before": {
167
+ "type": "integer",
168
+ "description": "[CONTENT STAGE] Lines to show before each match for context. Example: 3 shows 3 lines before",
169
+ },
170
+ "context_after": {
171
+ "type": "integer",
172
+ "description": "[CONTENT STAGE] Lines to show after each match for context. Example: 3 shows 3 lines after",
173
+ },
174
+ "encoding": {
175
+ "type": "string",
176
+ "description": "[CONTENT STAGE] Text encoding for files. Default auto-detect. Example: 'utf-8', 'latin1'",
177
+ },
178
+ "max_count": {
179
+ "type": "integer",
180
+ "description": "[CONTENT STAGE] Maximum matches per file. Prevents overwhelming output from files with many matches",
181
+ },
182
+ "timeout_ms": {
183
+ "type": "integer",
184
+ "description": "[CONTENT STAGE] Search timeout in milliseconds. Example: 5000 for 5 second timeout",
185
+ },
186
+ "count_only_matches": {
187
+ "type": "boolean",
188
+ "default": False,
189
+ "description": "Return only match counts per file instead of full match details. Faster for statistics",
190
+ },
191
+ "summary_only": {
192
+ "type": "boolean",
193
+ "default": False,
194
+ "description": "Return condensed summary of results. Shows top files and sample matches to reduce context size",
195
+ },
196
+ "optimize_paths": {
197
+ "type": "boolean",
198
+ "default": False,
199
+ "description": "Optimize file paths in results by removing common prefixes and shortening long paths. Saves tokens in output",
200
+ },
201
+ "group_by_file": {
202
+ "type": "boolean",
203
+ "default": False,
204
+ "description": "Group results by file to eliminate file path duplication when multiple matches exist in the same file. Significantly reduces tokens",
205
+ },
206
+ "total_only": {
207
+ "type": "boolean",
208
+ "default": False,
209
+ "description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
210
+ },
211
+ "output_file": {
212
+ "type": "string",
213
+ "description": "Optional filename to save output to file (extension auto-detected based on content)",
214
+ },
215
+ "suppress_output": {
216
+ "type": "boolean",
217
+ "description": "When true and output_file is specified, suppress detailed output in response to save tokens",
218
+ "default": False,
219
+ },
220
+ },
221
+ "required": ["roots", "query"],
222
+ "additionalProperties": False,
223
+ },
224
+ }
225
+
226
+ def _validate_roots(self, roots: list[str]) -> list[str]:
227
+ validated: list[str] = []
228
+ for r in roots:
229
+ resolved = self.path_resolver.resolve(r)
230
+ ok, err = self.security_validator.validate_directory_path(
231
+ resolved, must_exist=True
232
+ )
233
+ if not ok:
234
+ raise ValueError(f"Invalid root '{r}': {err}")
235
+ validated.append(resolved)
236
+ return validated
237
+
238
+ def validate_arguments(self, arguments: dict[str, Any]) -> bool:
239
+ if "roots" not in arguments or not isinstance(arguments["roots"], list):
240
+ raise ValueError("roots is required and must be an array")
241
+ if (
242
+ "query" not in arguments
243
+ or not isinstance(arguments["query"], str)
244
+ or not arguments["query"].strip()
245
+ ):
246
+ raise ValueError("query is required and must be a non-empty string")
247
+ if "file_limit" in arguments and not isinstance(arguments["file_limit"], int):
248
+ raise ValueError("file_limit must be an integer")
249
+ return True
250
+
251
+ @handle_mcp_errors("find_and_grep")
252
+ async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
253
+ # Check if both fd and rg commands are available
254
+ missing_commands = fd_rg_utils.get_missing_commands()
255
+ if missing_commands:
256
+ return {
257
+ "success": False,
258
+ "error": f"Required commands not found: {', '.join(missing_commands)}. Please install fd (https://github.com/sharkdp/fd) and ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
259
+ "count": 0,
260
+ "results": [],
261
+ }
262
+
263
+ self.validate_arguments(arguments)
264
+ roots = self._validate_roots(arguments["roots"]) # absolute validated
265
+
266
+ # fd step
267
+ fd_limit = fd_rg_utils.clamp_int(
268
+ arguments.get("file_limit"),
269
+ fd_rg_utils.DEFAULT_RESULTS_LIMIT,
270
+ fd_rg_utils.MAX_RESULTS_HARD_CAP,
271
+ )
272
+
273
+ # Smart .gitignore detection for fd stage
274
+ no_ignore = bool(arguments.get("no_ignore", False))
275
+ if not no_ignore:
276
+ # Auto-detect if we should use --no-ignore
277
+ detector = get_default_detector()
278
+ original_roots = arguments.get("roots", [])
279
+ should_ignore = detector.should_use_no_ignore(
280
+ original_roots, self.project_root
281
+ )
282
+ if should_ignore:
283
+ no_ignore = True
284
+ # Log the auto-detection for debugging
285
+ detection_info = detector.get_detection_info(
286
+ original_roots, self.project_root
287
+ )
288
+ logger.info(
289
+ f"Auto-enabled --no-ignore due to .gitignore interference: {detection_info['reason']}"
290
+ )
291
+
292
+ fd_cmd = fd_rg_utils.build_fd_command(
293
+ pattern=arguments.get("pattern"),
294
+ glob=bool(arguments.get("glob", False)),
295
+ types=arguments.get("types"),
296
+ extensions=arguments.get("extensions"),
297
+ exclude=arguments.get("exclude"),
298
+ depth=arguments.get("depth"),
299
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
300
+ hidden=bool(arguments.get("hidden", False)),
301
+ no_ignore=no_ignore,
302
+ size=arguments.get("size"),
303
+ changed_within=arguments.get("changed_within"),
304
+ changed_before=arguments.get("changed_before"),
305
+ full_path_match=bool(arguments.get("full_path_match", False)),
306
+ absolute=True,
307
+ limit=fd_limit,
308
+ roots=roots,
309
+ )
310
+
311
+ fd_started = time.time()
312
+ fd_rc, fd_out, fd_err = await fd_rg_utils.run_command_capture(fd_cmd)
313
+ fd_elapsed_ms = int((time.time() - fd_started) * 1000)
314
+
315
+ if fd_rc != 0:
316
+ return {
317
+ "success": False,
318
+ "error": (
319
+ fd_err.decode("utf-8", errors="replace").strip() or "fd failed"
320
+ ),
321
+ "returncode": fd_rc,
322
+ }
323
+
324
+ files = [
325
+ line.strip()
326
+ for line in fd_out.decode("utf-8", errors="replace").splitlines()
327
+ if line.strip()
328
+ ]
329
+
330
+ # Truncate by file_limit safety again
331
+ truncated_fd = False
332
+ if len(files) > fd_limit:
333
+ files = files[:fd_limit]
334
+ truncated_fd = True
335
+
336
+ # Optional sorting
337
+ sort_mode = arguments.get("sort")
338
+ if sort_mode in ("path", "mtime", "size"):
339
+ try:
340
+ if sort_mode == "path":
341
+ files.sort()
342
+ elif sort_mode == "mtime":
343
+
344
+ def get_mtime(p: str) -> float:
345
+ path_obj = pathlib.Path(p)
346
+ return path_obj.stat().st_mtime if path_obj.exists() else 0
347
+
348
+ files.sort(key=get_mtime, reverse=True)
349
+ elif sort_mode == "size":
350
+
351
+ def get_size(p: str) -> int:
352
+ path_obj = pathlib.Path(p)
353
+ return path_obj.stat().st_size if path_obj.exists() else 0
354
+
355
+ files.sort(key=get_size, reverse=True)
356
+ except (OSError, ValueError): # nosec B110
357
+ pass
358
+
359
+ searched_file_count = len(files)
360
+ if searched_file_count == 0:
361
+ return {
362
+ "success": True,
363
+ "results": [],
364
+ "count": 0,
365
+ "meta": {
366
+ "searched_file_count": 0,
367
+ "truncated": truncated_fd,
368
+ "fd_elapsed_ms": fd_elapsed_ms,
369
+ "rg_elapsed_ms": 0,
370
+ },
371
+ }
372
+
373
+ # rg step on files list
374
+ # Create specific file globs to limit search to only the files found by fd
375
+ from pathlib import Path
376
+
377
+ parent_dirs = set()
378
+ file_globs = []
379
+
380
+ for file_path in files:
381
+ parent_dir = str(Path(file_path).parent)
382
+ parent_dirs.add(parent_dir)
383
+
384
+ # Create a specific glob pattern for this exact file
385
+ file_name = Path(file_path).name
386
+ # Escape special characters in filename for glob pattern
387
+ escaped_name = file_name.replace("[", "[[]").replace("]", "[]]")
388
+ file_globs.append(escaped_name)
389
+
390
+ # Use parent directories as roots but limit to specific files via globs
391
+ rg_roots = list(parent_dirs)
392
+
393
+ # Combine user-provided include_globs with our file-specific globs
394
+ combined_include_globs = arguments.get("include_globs", []) or []
395
+ combined_include_globs.extend(file_globs)
396
+
397
+ rg_cmd = fd_rg_utils.build_rg_command(
398
+ query=arguments["query"],
399
+ case=arguments.get("case", "smart"),
400
+ fixed_strings=bool(arguments.get("fixed_strings", False)),
401
+ word=bool(arguments.get("word", False)),
402
+ multiline=bool(arguments.get("multiline", False)),
403
+ include_globs=combined_include_globs,
404
+ exclude_globs=arguments.get("exclude_globs"),
405
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
406
+ hidden=bool(arguments.get("hidden", False)),
407
+ no_ignore=no_ignore, # Use the same no_ignore flag from fd stage
408
+ max_filesize=arguments.get("max_filesize"),
409
+ context_before=arguments.get("context_before"),
410
+ context_after=arguments.get("context_after"),
411
+ encoding=arguments.get("encoding"),
412
+ max_count=arguments.get("max_count"),
413
+ timeout_ms=arguments.get("timeout_ms"),
414
+ roots=rg_roots,
415
+ files_from=None,
416
+ count_only_matches=bool(arguments.get("count_only_matches", False))
417
+ or bool(arguments.get("total_only", False)),
418
+ )
419
+
420
+ rg_started = time.time()
421
+ rg_rc, rg_out, rg_err = await fd_rg_utils.run_command_capture(
422
+ rg_cmd, timeout_ms=arguments.get("timeout_ms")
423
+ )
424
+ rg_elapsed_ms = int((time.time() - rg_started) * 1000)
425
+
426
+ if rg_rc not in (0, 1):
427
+ return {
428
+ "success": False,
429
+ "error": (
430
+ rg_err.decode("utf-8", errors="replace").strip() or "ripgrep failed"
431
+ ),
432
+ "returncode": rg_rc,
433
+ }
434
+
435
+ # Handle total-only mode (highest priority for count queries)
436
+ if arguments.get("total_only", False):
437
+ # Parse count output and return only the total
438
+ count_data = fd_rg_utils.parse_rg_count_output(rg_out)
439
+ total_matches = count_data.pop("__total__", 0)
440
+ return total_matches
441
+
442
+ if arguments.get("count_only_matches", False):
443
+ # Parse count-only output
444
+ count_data = fd_rg_utils.parse_rg_count_output(rg_out)
445
+ total_matches = count_data.pop("__total__", 0)
446
+
447
+ return {
448
+ "success": True,
449
+ "count_only": True,
450
+ "total_matches": total_matches,
451
+ "file_counts": count_data,
452
+ "meta": {
453
+ "searched_file_count": searched_file_count,
454
+ "truncated": truncated_fd,
455
+ "fd_elapsed_ms": fd_elapsed_ms,
456
+ "rg_elapsed_ms": rg_elapsed_ms,
457
+ },
458
+ }
459
+ else:
460
+ # Parse full match details
461
+ matches = fd_rg_utils.parse_rg_json_lines_to_matches(rg_out)
462
+
463
+ # Apply user-specified max_count limit if provided
464
+ # Note: ripgrep's -m option limits matches per file, not total matches
465
+ # So we need to apply the total limit here in post-processing
466
+ user_max_count = arguments.get("max_count")
467
+ if user_max_count is not None and len(matches) > user_max_count:
468
+ matches = matches[:user_max_count]
469
+ truncated_rg = True
470
+ else:
471
+ truncated_rg = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
472
+ if truncated_rg:
473
+ matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
474
+
475
+ # Apply path optimization if requested
476
+ optimize_paths = arguments.get("optimize_paths", False)
477
+ if optimize_paths and matches:
478
+ matches = fd_rg_utils.optimize_match_paths(matches)
479
+
480
+ # Apply file grouping if requested (takes priority over other formats)
481
+ group_by_file = arguments.get("group_by_file", False)
482
+ if group_by_file and matches:
483
+ grouped_result = fd_rg_utils.group_matches_by_file(matches)
484
+
485
+ # If summary_only is also requested, add summary to grouped result
486
+ if arguments.get("summary_only", False):
487
+ summary = fd_rg_utils.summarize_search_results(matches)
488
+ grouped_result["summary"] = summary
489
+
490
+ grouped_result["meta"] = {
491
+ "searched_file_count": searched_file_count,
492
+ "truncated": (truncated_fd or truncated_rg),
493
+ "fd_elapsed_ms": fd_elapsed_ms,
494
+ "rg_elapsed_ms": rg_elapsed_ms,
495
+ }
496
+
497
+ # Handle output suppression and file output for grouped results
498
+ output_file = arguments.get("output_file")
499
+ suppress_output = arguments.get("suppress_output", False)
500
+
501
+ # Handle file output if requested
502
+ if output_file:
503
+ try:
504
+ # Save full result to file
505
+ import json
506
+
507
+ json_content = json.dumps(
508
+ grouped_result, indent=2, ensure_ascii=False
509
+ )
510
+ file_path = self.file_output_manager.save_to_file(
511
+ content=json_content, base_name=output_file
512
+ )
513
+
514
+ # If suppress_output is True, return minimal response
515
+ if suppress_output:
516
+ minimal_result = {
517
+ "success": grouped_result.get("success", True),
518
+ "count": grouped_result.get("count", 0),
519
+ "output_file": output_file,
520
+ "file_saved": f"Results saved to {file_path}",
521
+ }
522
+ return minimal_result
523
+ else:
524
+ # Include file info in full response
525
+ grouped_result["output_file"] = output_file
526
+ grouped_result["file_saved"] = (
527
+ f"Results saved to {file_path}"
528
+ )
529
+ except Exception as e:
530
+ logger.error(f"Failed to save output to file: {e}")
531
+ grouped_result["file_save_error"] = str(e)
532
+ grouped_result["file_saved"] = False
533
+ elif suppress_output:
534
+ # If suppress_output is True but no output_file, remove detailed results
535
+ minimal_result = {
536
+ "success": grouped_result.get("success", True),
537
+ "count": grouped_result.get("count", 0),
538
+ "summary": grouped_result.get("summary", {}),
539
+ "meta": grouped_result.get("meta", {}),
540
+ }
541
+ return minimal_result
542
+
543
+ return grouped_result
544
+
545
+ # Check if summary_only mode is requested
546
+ if arguments.get("summary_only", False):
547
+ summary = fd_rg_utils.summarize_search_results(matches)
548
+ result = {
549
+ "success": True,
550
+ "summary_only": True,
551
+ "summary": summary,
552
+ "meta": {
553
+ "searched_file_count": searched_file_count,
554
+ "truncated": (truncated_fd or truncated_rg),
555
+ "fd_elapsed_ms": fd_elapsed_ms,
556
+ "rg_elapsed_ms": rg_elapsed_ms,
557
+ },
558
+ }
559
+
560
+ # Handle output suppression and file output for summary results
561
+ output_file = arguments.get("output_file")
562
+ suppress_output = arguments.get("suppress_output", False)
563
+
564
+ # Handle file output if requested
565
+ if output_file:
566
+ try:
567
+ # Save full result to file
568
+ import json
569
+
570
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
571
+ file_path = self.file_output_manager.save_to_file(
572
+ content=json_content, base_name=output_file
573
+ )
574
+
575
+ # If suppress_output is True, return minimal response
576
+ if suppress_output:
577
+ minimal_result = {
578
+ "success": result.get("success", True),
579
+ "count": len(matches),
580
+ "output_file": output_file,
581
+ "file_saved": f"Results saved to {file_path}",
582
+ }
583
+ return minimal_result
584
+ else:
585
+ # Include file info in full response
586
+ result["output_file"] = output_file
587
+ result["file_saved"] = f"Results saved to {file_path}"
588
+ except Exception as e:
589
+ logger.error(f"Failed to save output to file: {e}")
590
+ result["file_save_error"] = str(e)
591
+ result["file_saved"] = False
592
+ elif suppress_output:
593
+ # If suppress_output is True but no output_file, remove detailed results
594
+ minimal_result = {
595
+ "success": result.get("success", True),
596
+ "count": len(matches),
597
+ "summary": result.get("summary", {}),
598
+ "meta": result.get("meta", {}),
599
+ }
600
+ return minimal_result
601
+
602
+ return result
603
+ else:
604
+ result = {
605
+ "success": True,
606
+ "count": len(matches),
607
+ "meta": {
608
+ "searched_file_count": searched_file_count,
609
+ "truncated": (truncated_fd or truncated_rg),
610
+ "fd_elapsed_ms": fd_elapsed_ms,
611
+ "rg_elapsed_ms": rg_elapsed_ms,
612
+ },
613
+ }
614
+
615
+ # Handle output suppression and file output
616
+ output_file = arguments.get("output_file")
617
+ suppress_output = arguments.get("suppress_output", False)
618
+
619
+ # Add results to response unless suppressed
620
+ if not suppress_output or not output_file:
621
+ result["results"] = matches
622
+
623
+ # Handle file output if requested
624
+ if output_file:
625
+ try:
626
+ # Create detailed output for file
627
+ file_content = {
628
+ "success": True,
629
+ "results": matches,
630
+ "count": len(matches),
631
+ "files": (
632
+ fd_rg_utils.group_matches_by_file(matches)["files"]
633
+ if matches
634
+ else []
635
+ ),
636
+ "summary": fd_rg_utils.summarize_search_results(matches),
637
+ "meta": result["meta"],
638
+ }
639
+
640
+ # Convert to JSON for file output
641
+ # Save full result to file using FileOutputManager
642
+ import json
643
+
644
+ json_content = json.dumps(
645
+ file_content, indent=2, ensure_ascii=False
646
+ )
647
+ file_path = self.file_output_manager.save_to_file(
648
+ content=json_content, base_name=output_file
649
+ )
650
+
651
+ # Check if suppress_output is enabled
652
+ suppress_output = arguments.get("suppress_output", False)
653
+ if suppress_output:
654
+ # Return minimal response to save tokens
655
+ minimal_result = {
656
+ "success": result.get("success", True),
657
+ "count": result.get("count", 0),
658
+ "output_file": output_file,
659
+ "file_saved": f"Results saved to {file_path}",
660
+ }
661
+ return minimal_result
662
+ else:
663
+ # Include file info in full response
664
+ result["output_file"] = output_file
665
+ result["file_saved"] = f"Results saved to {file_path}"
666
+
667
+ logger.info(f"Search results saved to: {file_path}")
668
+
669
+ except Exception as e:
670
+ logger.error(f"Failed to save output to file: {e}")
671
+ result["file_save_error"] = str(e)
672
+ result["file_saved"] = False
673
+ else:
674
+ # Handle suppress_output without file output
675
+ suppress_output = arguments.get("suppress_output", False)
676
+ if suppress_output:
677
+ # Return minimal response without detailed match results
678
+ minimal_result = {
679
+ "success": result.get("success", True),
680
+ "count": result.get("count", 0),
681
+ "summary": result.get("summary", {}),
682
+ "meta": result.get("meta", {}),
683
+ }
684
+ return minimal_result
685
+
686
+ return result