tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,836 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ search_content MCP Tool (ripgrep wrapper)
4
+
5
+ Search content in files under roots or an explicit file list using ripgrep --json.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from ..utils.error_handler import handle_mcp_errors
16
+ from ..utils.file_output_manager import FileOutputManager
17
+ from ..utils.gitignore_detector import get_default_detector
18
+ from ..utils.search_cache import get_default_cache
19
+ from . import fd_rg_utils
20
+ from .base_tool import BaseMCPTool
21
+ from .output_format_validator import get_default_validator
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class SearchContentTool(BaseMCPTool):
27
+ """MCP tool that wraps ripgrep to search content with safety limits."""
28
+
29
+ def __init__(
30
+ self, project_root: str | None = None, enable_cache: bool = True
31
+ ) -> None:
32
+ """
33
+ Initialize the search content tool.
34
+
35
+ Args:
36
+ project_root: Optional project root directory
37
+ enable_cache: Whether to enable search result caching (default: True)
38
+ """
39
+ super().__init__(project_root)
40
+ self.cache = get_default_cache() if enable_cache else None
41
+ self.file_output_manager = FileOutputManager.get_managed_instance(project_root)
42
+
43
+ def set_project_path(self, project_path: str) -> None:
44
+ """
45
+ Update the project path for all components.
46
+
47
+ Args:
48
+ project_path: New project root directory
49
+ """
50
+ super().set_project_path(project_path)
51
+ self.file_output_manager = FileOutputManager.get_managed_instance(project_path)
52
+ logger.info(f"SearchContentTool project path updated to: {project_path}")
53
+
54
+ def get_tool_definition(self) -> dict[str, Any]:
55
+ return {
56
+ "name": "search_content",
57
+ "description": """Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files.
58
+
59
+ ⚡ IMPORTANT: Token Efficiency Guide
60
+ Choose output format parameters based on your needs to minimize token usage and maximize performance with efficient search strategies:
61
+
62
+ 📋 RECOMMENDED WORKFLOW (Most Efficient Approach):
63
+ 1. START with total_only=true parameter for initial count validation (~10 tokens)
64
+ 2. IF more detail needed, use count_only_matches=true parameter for file distribution (~50-200 tokens)
65
+ 3. IF context needed, use summary_only=true parameter for overview (~500-2000 tokens)
66
+ 4. ONLY use full results when specific content review is required (~2000-50000+ tokens)
67
+
68
+ ⚡ TOKEN EFFICIENCY COMPARISON:
69
+ - total_only: ~10 tokens (single number) - MOST EFFICIENT for count queries
70
+ - count_only_matches: ~50-200 tokens (file counts) - Good for file distribution analysis
71
+ - summary_only: ~500-2000 tokens (condensed overview) - initial investigation
72
+ - group_by_file: ~2000-10000 tokens (organized by file) - Context-aware review
73
+ - optimize_paths: 10-30% reduction (path compression) - Use with deep directory structures
74
+ - Full results: ~2000-50000+ tokens - Use sparingly for detailed analysis
75
+
76
+ ⚠️ MUTUALLY EXCLUSIVE: Only one output format parameter can be true at a time. Cannot be combined with other format parameters.""",
77
+ "inputSchema": {
78
+ "type": "object",
79
+ "properties": {
80
+ "roots": {
81
+ "type": "array",
82
+ "items": {"type": "string"},
83
+ "description": "Directory paths to search in recursively. Alternative to 'files'. Example: ['.', 'src/', 'tests/']",
84
+ },
85
+ "files": {
86
+ "type": "array",
87
+ "items": {"type": "string"},
88
+ "description": "Specific file paths to search in. Alternative to 'roots'. Example: ['main.py', 'config.json']",
89
+ },
90
+ "query": {
91
+ "type": "string",
92
+ "description": "Text pattern to search for. Can be literal text or regex depending on settings. Example: 'function', 'class\\s+\\w+', 'TODO:'",
93
+ },
94
+ "case": {
95
+ "type": "string",
96
+ "enum": ["smart", "insensitive", "sensitive"],
97
+ "default": "smart",
98
+ "description": "Case sensitivity mode. 'smart'=case-insensitive unless uppercase letters present, 'insensitive'=always ignore case, 'sensitive'=exact case match",
99
+ },
100
+ "fixed_strings": {
101
+ "type": "boolean",
102
+ "default": False,
103
+ "description": "Treat query as literal string instead of regex. True for exact text matching, False for regex patterns",
104
+ },
105
+ "word": {
106
+ "type": "boolean",
107
+ "default": False,
108
+ "description": "Match whole words only. True finds 'test' but not 'testing', False finds both",
109
+ },
110
+ "multiline": {
111
+ "type": "boolean",
112
+ "default": False,
113
+ "description": "Allow patterns to match across multiple lines. Useful for finding multi-line code blocks or comments",
114
+ },
115
+ "include_globs": {
116
+ "type": "array",
117
+ "items": {"type": "string"},
118
+ "description": "File patterns to include in search. Example: ['*.py', '*.js'] to search only Python and JavaScript files",
119
+ },
120
+ "exclude_globs": {
121
+ "type": "array",
122
+ "items": {"type": "string"},
123
+ "description": "File patterns to exclude from search. Example: ['*.log', '__pycache__/*'] to skip log files and cache directories",
124
+ },
125
+ "follow_symlinks": {
126
+ "type": "boolean",
127
+ "default": False,
128
+ "description": "Follow symbolic links during search. False=safer, True=may cause infinite loops",
129
+ },
130
+ "hidden": {
131
+ "type": "boolean",
132
+ "default": False,
133
+ "description": "Search in hidden files (starting with dot). False=skip .git, .env files, True=search all",
134
+ },
135
+ "no_ignore": {
136
+ "type": "boolean",
137
+ "default": False,
138
+ "description": "Ignore .gitignore and similar ignore files. False=respect ignore rules, True=search all files",
139
+ },
140
+ "max_filesize": {
141
+ "type": "string",
142
+ "description": "Maximum file size to search. Format: '10M'=10MB, '500K'=500KB, '1G'=1GB. Prevents searching huge files",
143
+ },
144
+ "context_before": {
145
+ "type": "integer",
146
+ "description": "Number of lines to show before each match. Useful for understanding match context. Example: 3 shows 3 lines before",
147
+ },
148
+ "context_after": {
149
+ "type": "integer",
150
+ "description": "Number of lines to show after each match. Useful for understanding match context. Example: 3 shows 3 lines after",
151
+ },
152
+ "encoding": {
153
+ "type": "string",
154
+ "description": "Text encoding to assume for files. Default is auto-detect. Example: 'utf-8', 'latin1', 'ascii'",
155
+ },
156
+ "max_count": {
157
+ "type": "integer",
158
+ "description": "Maximum number of matches per file. Useful to prevent overwhelming output from files with many matches",
159
+ },
160
+ "timeout_ms": {
161
+ "type": "integer",
162
+ "description": "Search timeout in milliseconds. Prevents long-running searches. Example: 5000 for 5 second timeout",
163
+ },
164
+ "count_only_matches": {
165
+ "type": "boolean",
166
+ "default": False,
167
+ "description": "⚡ EXCLUSIVE: Return only match counts per file (~50-200 tokens). RECOMMENDED for: File distribution analysis, understanding match spread across files. Cannot be combined with other output formats.",
168
+ },
169
+ "summary_only": {
170
+ "type": "boolean",
171
+ "default": False,
172
+ "description": "⚡ EXCLUSIVE: Return condensed overview with top files and sample matches (~500-2000 tokens). RECOMMENDED for: Initial investigation, scope confirmation, pattern validation. Cannot be combined with other output formats.",
173
+ },
174
+ "optimize_paths": {
175
+ "type": "boolean",
176
+ "default": False,
177
+ "description": "⚡ EXCLUSIVE: Optimize file paths by removing common prefixes (10-30% token reduction). RECOMMENDED for: Deep directory structures, large codebases. Cannot be combined with other output formats.",
178
+ },
179
+ "group_by_file": {
180
+ "type": "boolean",
181
+ "default": False,
182
+ "description": "⚡ EXCLUSIVE: Group results by file, eliminating path duplication (~2000-10000 tokens). RECOMMENDED for: Context-aware review, analyzing matches within specific files. Cannot be combined with other output formats.",
183
+ },
184
+ "total_only": {
185
+ "type": "boolean",
186
+ "default": False,
187
+ "description": "⚡ EXCLUSIVE: Return only total match count as single number (~10 tokens - MOST EFFICIENT). RECOMMENDED for: Count validation, filtering decisions, existence checks. Takes priority over all other formats. Cannot be combined with other output formats.",
188
+ },
189
+ "output_file": {
190
+ "type": "string",
191
+ "description": "Optional filename to save output to file (extension auto-detected based on content)",
192
+ },
193
+ "suppress_output": {
194
+ "type": "boolean",
195
+ "description": "When true and output_file is specified, suppress detailed output in response to save tokens",
196
+ "default": False,
197
+ },
198
+ "enable_parallel": {
199
+ "type": "boolean",
200
+ "description": "Enable parallel processing for multiple root directories to improve performance. Default: True",
201
+ "default": True,
202
+ },
203
+ },
204
+ "required": ["query"],
205
+ "additionalProperties": False,
206
+ },
207
+ }
208
+
209
+ def _validate_roots(self, roots: list[str]) -> list[str]:
210
+ validated: list[str] = []
211
+ for r in roots:
212
+ resolved = self.path_resolver.resolve(r)
213
+ is_valid, error = self.security_validator.validate_directory_path(
214
+ resolved, must_exist=True
215
+ )
216
+ if not is_valid:
217
+ raise ValueError(f"Invalid root '{r}': {error}")
218
+ validated.append(resolved)
219
+ return validated
220
+
221
+ def _validate_files(self, files: list[str]) -> list[str]:
222
+ validated: list[str] = []
223
+ for p in files:
224
+ if not isinstance(p, str) or not p.strip():
225
+ raise ValueError("files entries must be non-empty strings")
226
+ resolved = self.path_resolver.resolve(p)
227
+ ok, err = self.security_validator.validate_file_path(resolved)
228
+ if not ok:
229
+ raise ValueError(f"Invalid file path '{p}': {err}")
230
+ if not Path(resolved).exists() or not Path(resolved).is_file():
231
+ raise ValueError(f"File not found: {p}")
232
+ validated.append(resolved)
233
+ return validated
234
+
235
+ def validate_arguments(self, arguments: dict[str, Any]) -> bool:
236
+ # Validate output format exclusion first
237
+ validator = get_default_validator()
238
+ validator.validate_output_format_exclusion(arguments)
239
+
240
+ if (
241
+ "query" not in arguments
242
+ or not isinstance(arguments["query"], str)
243
+ or not arguments["query"].strip()
244
+ ):
245
+ raise ValueError("query is required and must be a non-empty string")
246
+ if "roots" not in arguments and "files" not in arguments:
247
+ raise ValueError("Either roots or files must be provided")
248
+ for key in [
249
+ "case",
250
+ "encoding",
251
+ "max_filesize",
252
+ ]:
253
+ if key in arguments and not isinstance(arguments[key], str):
254
+ raise ValueError(f"{key} must be a string")
255
+ for key in [
256
+ "fixed_strings",
257
+ "word",
258
+ "multiline",
259
+ "follow_symlinks",
260
+ "hidden",
261
+ "no_ignore",
262
+ "count_only_matches",
263
+ "summary_only",
264
+ "enable_parallel",
265
+ ]:
266
+ if key in arguments and not isinstance(arguments[key], bool):
267
+ raise ValueError(f"{key} must be a boolean")
268
+ for key in ["context_before", "context_after", "max_count", "timeout_ms"]:
269
+ if key in arguments and not isinstance(arguments[key], int):
270
+ raise ValueError(f"{key} must be an integer")
271
+ for key in ["include_globs", "exclude_globs"]:
272
+ if key in arguments:
273
+ v = arguments[key]
274
+ if not isinstance(v, list) or not all(isinstance(x, str) for x in v):
275
+ raise ValueError(f"{key} must be an array of strings")
276
+
277
+ # Validate roots and files if provided
278
+ if "roots" in arguments:
279
+ self._validate_roots(arguments["roots"])
280
+ if "files" in arguments:
281
+ self._validate_files(arguments["files"])
282
+
283
+ return True
284
+
285
+ def _determine_requested_format(self, arguments: dict[str, Any]) -> str:
286
+ """Determine the requested output format based on arguments."""
287
+ if arguments.get("total_only", False):
288
+ return "total_only"
289
+ elif arguments.get("count_only_matches", False):
290
+ return "count_only"
291
+ elif arguments.get("summary_only", False):
292
+ return "summary"
293
+ elif arguments.get("group_by_file", False):
294
+ return "group_by_file"
295
+ else:
296
+ return "normal"
297
+
298
+ def _create_count_only_cache_key(
299
+ self, total_only_cache_key: str, arguments: dict[str, Any]
300
+ ) -> str | None:
301
+ """
302
+ Create a count_only_matches cache key from a total_only cache key.
303
+
304
+ This enables cross-format caching where total_only results can serve
305
+ future count_only_matches queries.
306
+ """
307
+ if not self.cache:
308
+ return None
309
+
310
+ # Create modified arguments with count_only_matches instead of total_only
311
+ count_only_args = arguments.copy()
312
+ count_only_args.pop("total_only", None)
313
+ count_only_args["count_only_matches"] = True
314
+
315
+ # Generate cache key for count_only_matches version
316
+ cache_params = {
317
+ k: v
318
+ for k, v in count_only_args.items()
319
+ if k not in ["query", "roots", "files"]
320
+ }
321
+
322
+ roots = arguments.get("roots", [])
323
+ return self.cache.create_cache_key(
324
+ query=arguments["query"], roots=roots, **cache_params
325
+ )
326
+
327
+ @handle_mcp_errors("search_content")
328
+ async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
329
+ # Check if rg command is available
330
+ if not fd_rg_utils.check_external_command("rg"):
331
+ return {
332
+ "success": False,
333
+ "error": "rg (ripgrep) command not found. Please install ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
334
+ "count": 0,
335
+ "results": [],
336
+ }
337
+
338
+ self.validate_arguments(arguments)
339
+
340
+ roots = arguments.get("roots")
341
+ files = arguments.get("files")
342
+ if roots:
343
+ roots = self._validate_roots(roots)
344
+ if files:
345
+ files = self._validate_files(files)
346
+
347
+ # Check cache if enabled (simplified for performance)
348
+ cache_key = None
349
+ if self.cache:
350
+ # Create simplified cache key for better performance
351
+ cache_params = {
352
+ k: v
353
+ for k, v in arguments.items()
354
+ if k
355
+ not in ["query", "roots", "files", "output_file", "suppress_output"]
356
+ }
357
+ cache_key = self.cache.create_cache_key(
358
+ query=arguments["query"], roots=roots or [], **cache_params
359
+ )
360
+
361
+ # Simple cache lookup without complex cross-format logic for performance
362
+ cached_result = self.cache.get(cache_key)
363
+ if cached_result is not None:
364
+ # Check if this is a total_only request
365
+ total_only_requested = arguments.get("total_only", False)
366
+
367
+ if total_only_requested:
368
+ # For total_only mode, always return integer if possible
369
+ if isinstance(cached_result, int):
370
+ return cached_result
371
+ elif (
372
+ isinstance(cached_result, dict)
373
+ and "total_matches" in cached_result
374
+ ):
375
+ total_matches = cached_result["total_matches"]
376
+ return (
377
+ int(total_matches)
378
+ if isinstance(total_matches, int | float)
379
+ else 0
380
+ )
381
+ elif isinstance(cached_result, dict) and "count" in cached_result:
382
+ count = cached_result["count"]
383
+ return int(count) if isinstance(count, int | float) else 0
384
+ else:
385
+ # Fallback: extract count from dict or return 0
386
+ return 0
387
+ else:
388
+ # For non-total_only modes, return dict format
389
+ if isinstance(cached_result, dict):
390
+ cached_result = cached_result.copy()
391
+ cached_result["cache_hit"] = True
392
+ return cached_result
393
+ elif isinstance(cached_result, int):
394
+ # Convert integer to dict format for non-total_only modes
395
+ return {
396
+ "success": True,
397
+ "count": cached_result,
398
+ "total_matches": cached_result,
399
+ "cache_hit": True,
400
+ }
401
+ else:
402
+ # For other types, convert to dict format
403
+ return {
404
+ "success": True,
405
+ "cached_result": cached_result,
406
+ "cache_hit": True,
407
+ }
408
+
409
+ # Handle max_count parameter properly
410
+ # If user specifies max_count, use it directly (with reasonable upper limit)
411
+ # If not specified, use None to let ripgrep return all matches (subject to hard cap later)
412
+ max_count = arguments.get("max_count")
413
+ if max_count is not None:
414
+ # Clamp user-specified max_count to reasonable limits
415
+ # Use 1 as minimum default, but respect user's small values
416
+ max_count = fd_rg_utils.clamp_int(
417
+ max_count,
418
+ 1, # Minimum default value
419
+ fd_rg_utils.DEFAULT_RESULTS_LIMIT, # Upper limit for safety
420
+ )
421
+ timeout_ms = arguments.get("timeout_ms")
422
+
423
+ # Note: --files-from is not supported in this ripgrep version
424
+ # For files mode, we'll search in the parent directories of the files
425
+ # and use glob patterns to restrict search to specific files
426
+ if files:
427
+ # Extract unique parent directories from file paths
428
+ parent_dirs = set()
429
+ file_globs = []
430
+ for file_path in files:
431
+ resolved = self.path_resolver.resolve(file_path)
432
+ parent_dir = str(Path(resolved).parent)
433
+ parent_dirs.add(parent_dir)
434
+
435
+ # Create glob pattern for this specific file
436
+ file_name = Path(resolved).name
437
+ # Escape special characters in filename for glob pattern
438
+ escaped_name = file_name.replace("[", "[[]").replace("]", "[]]")
439
+ file_globs.append(escaped_name)
440
+
441
+ # Use parent directories as roots for compatibility
442
+ roots = list(parent_dirs)
443
+
444
+ # Add file-specific glob patterns to include_globs
445
+ if not arguments.get("include_globs"):
446
+ arguments["include_globs"] = []
447
+ arguments["include_globs"].extend(file_globs)
448
+
449
+ # Check for count-only mode (total_only also requires count mode)
450
+ total_only = bool(arguments.get("total_only", False))
451
+ count_only_matches = (
452
+ bool(arguments.get("count_only_matches", False)) or total_only
453
+ )
454
+ summary_only = bool(arguments.get("summary_only", False))
455
+
456
+ # Smart .gitignore detection
457
+ no_ignore = bool(arguments.get("no_ignore", False))
458
+ if not no_ignore and roots: # Only for roots mode, not files mode
459
+ # Auto-detect if we should use --no-ignore
460
+ detector = get_default_detector()
461
+ original_roots = arguments.get("roots", [])
462
+ should_ignore = detector.should_use_no_ignore(
463
+ original_roots, self.project_root
464
+ )
465
+ if should_ignore:
466
+ no_ignore = True
467
+ # Log the auto-detection for debugging
468
+ # Logger already defined at module level
469
+ detection_info = detector.get_detection_info(
470
+ original_roots, self.project_root
471
+ )
472
+ logger.info(
473
+ f"Auto-enabled --no-ignore due to .gitignore interference: {detection_info['reason']}"
474
+ )
475
+
476
+ # Roots mode
477
+ # Determine if we should use parallel processing
478
+ use_parallel = (
479
+ roots is not None
480
+ and len(roots) > 1
481
+ and arguments.get("enable_parallel", True)
482
+ )
483
+
484
+ started = time.time()
485
+
486
+ if use_parallel and roots is not None:
487
+ # Split roots for parallel processing
488
+ root_chunks = fd_rg_utils.split_roots_for_parallel_processing(
489
+ roots, max_chunks=4
490
+ )
491
+
492
+ # Build commands for each chunk
493
+ commands = []
494
+ for chunk in root_chunks:
495
+ cmd = fd_rg_utils.build_rg_command(
496
+ query=arguments["query"],
497
+ case=arguments.get("case", "smart"),
498
+ fixed_strings=bool(arguments.get("fixed_strings", False)),
499
+ word=bool(arguments.get("word", False)),
500
+ multiline=bool(arguments.get("multiline", False)),
501
+ include_globs=arguments.get("include_globs"),
502
+ exclude_globs=arguments.get("exclude_globs"),
503
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
504
+ hidden=bool(arguments.get("hidden", False)),
505
+ no_ignore=no_ignore,
506
+ max_filesize=arguments.get("max_filesize"),
507
+ context_before=arguments.get("context_before"),
508
+ context_after=arguments.get("context_after"),
509
+ encoding=arguments.get("encoding"),
510
+ max_count=max_count,
511
+ timeout_ms=timeout_ms,
512
+ roots=chunk,
513
+ files_from=None,
514
+ count_only_matches=count_only_matches,
515
+ )
516
+ commands.append(cmd)
517
+
518
+ # Execute commands in parallel
519
+ results = await fd_rg_utils.run_parallel_rg_searches(
520
+ commands, timeout_ms=timeout_ms, max_concurrent=4
521
+ )
522
+
523
+ # Merge results
524
+ rc, out, err = fd_rg_utils.merge_rg_results(results, count_only_matches)
525
+ else:
526
+ # Single command execution (original behavior)
527
+ cmd = fd_rg_utils.build_rg_command(
528
+ query=arguments["query"],
529
+ case=arguments.get("case", "smart"),
530
+ fixed_strings=bool(arguments.get("fixed_strings", False)),
531
+ word=bool(arguments.get("word", False)),
532
+ multiline=bool(arguments.get("multiline", False)),
533
+ include_globs=arguments.get("include_globs"),
534
+ exclude_globs=arguments.get("exclude_globs"),
535
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
536
+ hidden=bool(arguments.get("hidden", False)),
537
+ no_ignore=no_ignore,
538
+ max_filesize=arguments.get("max_filesize"),
539
+ context_before=arguments.get("context_before"),
540
+ context_after=arguments.get("context_after"),
541
+ encoding=arguments.get("encoding"),
542
+ max_count=max_count,
543
+ timeout_ms=timeout_ms,
544
+ roots=roots,
545
+ files_from=None,
546
+ count_only_matches=count_only_matches,
547
+ )
548
+
549
+ rc, out, err = await fd_rg_utils.run_command_capture(
550
+ cmd, timeout_ms=timeout_ms
551
+ )
552
+
553
+ elapsed_ms = int((time.time() - started) * 1000)
554
+
555
+ if rc not in (0, 1):
556
+ message = err.decode("utf-8", errors="replace").strip() or "ripgrep failed"
557
+ return {"success": False, "error": message, "returncode": rc}
558
+
559
+ # Handle total-only mode (highest priority for count queries)
560
+ total_only = arguments.get("total_only", False)
561
+ if total_only:
562
+ # Parse count output and return only the total
563
+ file_counts = fd_rg_utils.parse_rg_count_output(out)
564
+ total_matches = file_counts.get("__total__", 0)
565
+
566
+ # Cache the FULL count data for future cross-format optimization
567
+ # This allows count_only_matches queries to be served from this cache
568
+ if self.cache and cache_key:
569
+ # Cache both the simple total and the detailed count structure
570
+ self.cache.set(cache_key, total_matches)
571
+
572
+ # Also cache the equivalent count_only_matches result for cross-format optimization
573
+ count_only_cache_key = self._create_count_only_cache_key(
574
+ cache_key, arguments
575
+ )
576
+ if count_only_cache_key:
577
+ # Create a copy of file_counts without __total__ for the detailed result
578
+ file_counts_copy = {
579
+ k: v for k, v in file_counts.items() if k != "__total__"
580
+ }
581
+ detailed_count_result = {
582
+ "success": True,
583
+ "count_only": True,
584
+ "total_matches": total_matches,
585
+ "file_counts": file_counts_copy, # Keep the file-level data (without __total__)
586
+ "elapsed_ms": elapsed_ms,
587
+ "derived_from_total_only": True, # Mark as derived
588
+ }
589
+ self.cache.set(count_only_cache_key, detailed_count_result)
590
+ logger.debug(
591
+ "Cross-cached total_only result as count_only_matches for future optimization"
592
+ )
593
+
594
+ return total_matches
595
+
596
+ # Handle count-only mode
597
+ if count_only_matches:
598
+ file_counts = fd_rg_utils.parse_rg_count_output(out)
599
+ total_matches = file_counts.pop("__total__", 0)
600
+ result = {
601
+ "success": True,
602
+ "count_only": True,
603
+ "total_matches": total_matches,
604
+ "file_counts": file_counts,
605
+ "elapsed_ms": elapsed_ms,
606
+ }
607
+
608
+ # Cache the result
609
+ if self.cache and cache_key:
610
+ self.cache.set(cache_key, result)
611
+
612
+ return result
613
+
614
+ # Handle normal mode
615
+ matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
616
+
617
+ # Apply user-specified max_count limit if provided
618
+ # Note: ripgrep's -m option limits matches per file, not total matches
619
+ # So we need to apply the total limit here in post-processing
620
+ user_max_count = arguments.get("max_count")
621
+ if user_max_count is not None and len(matches) > user_max_count:
622
+ matches = matches[:user_max_count]
623
+ truncated = True
624
+ else:
625
+ truncated = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
626
+ if truncated:
627
+ matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
628
+
629
+ # Apply path optimization if requested
630
+ optimize_paths = arguments.get("optimize_paths", False)
631
+ if optimize_paths and matches:
632
+ matches = fd_rg_utils.optimize_match_paths(matches)
633
+
634
+ # Apply file grouping if requested (takes priority over other formats)
635
+ group_by_file = arguments.get("group_by_file", False)
636
+ if group_by_file and matches:
637
+ result = fd_rg_utils.group_matches_by_file(matches)
638
+
639
+ # Handle output suppression and file output for grouped results
640
+ output_file = arguments.get("output_file")
641
+ suppress_output = arguments.get("suppress_output", False)
642
+
643
+ # Handle file output if requested
644
+ if output_file:
645
+ try:
646
+ # Save full result to file
647
+ import json
648
+
649
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
650
+ file_path = self.file_output_manager.save_to_file(
651
+ content=json_content, base_name=output_file
652
+ )
653
+
654
+ # If suppress_output is True, return minimal response
655
+ if suppress_output:
656
+ minimal_result = {
657
+ "success": result.get("success", True),
658
+ "count": result.get("count", 0),
659
+ "output_file": output_file,
660
+ "file_saved": f"Results saved to {file_path}",
661
+ }
662
+ # Cache the full result, not the minimal one
663
+ if self.cache and cache_key:
664
+ self.cache.set(cache_key, result)
665
+ return minimal_result
666
+ else:
667
+ # Include file info in full response
668
+ result["output_file"] = output_file
669
+ result["file_saved"] = f"Results saved to {file_path}"
670
+ except Exception as e:
671
+ logger.error(f"Failed to save output to file: {e}")
672
+ result["file_save_error"] = str(e)
673
+ result["file_saved"] = False
674
+ elif suppress_output:
675
+ # If suppress_output is True but no output_file, remove detailed results
676
+ minimal_result = {
677
+ "success": result.get("success", True),
678
+ "count": result.get("count", 0),
679
+ "summary": result.get("summary", {}),
680
+ "meta": result.get("meta", {}),
681
+ }
682
+ # Cache the full result, not the minimal one
683
+ if self.cache and cache_key:
684
+ self.cache.set(cache_key, result)
685
+ return minimal_result
686
+
687
+ # Cache the result
688
+ if self.cache and cache_key:
689
+ self.cache.set(cache_key, result)
690
+
691
+ return result
692
+
693
+ # Handle summary mode
694
+ if summary_only:
695
+ summary = fd_rg_utils.summarize_search_results(matches)
696
+ result = {
697
+ "success": True,
698
+ "count": len(matches),
699
+ "truncated": truncated,
700
+ "elapsed_ms": elapsed_ms,
701
+ "summary": summary,
702
+ }
703
+
704
+ # Handle output suppression and file output for summary results
705
+ output_file = arguments.get("output_file")
706
+ suppress_output = arguments.get("suppress_output", False)
707
+
708
+ # Handle file output if requested
709
+ if output_file:
710
+ try:
711
+ # Save full result to file
712
+ import json
713
+
714
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
715
+ file_path = self.file_output_manager.save_to_file(
716
+ content=json_content, base_name=output_file
717
+ )
718
+
719
+ # If suppress_output is True, return minimal response
720
+ if suppress_output:
721
+ minimal_result = {
722
+ "success": result.get("success", True),
723
+ "count": result.get("count", 0),
724
+ "output_file": output_file,
725
+ "file_saved": f"Results saved to {file_path}",
726
+ }
727
+ # Cache the full result, not the minimal one
728
+ if self.cache and cache_key:
729
+ self.cache.set(cache_key, result)
730
+ return minimal_result
731
+ else:
732
+ # Include file info in full response
733
+ result["output_file"] = output_file
734
+ result["file_saved"] = f"Results saved to {file_path}"
735
+ except Exception as e:
736
+ logger.error(f"Failed to save output to file: {e}")
737
+ result["file_save_error"] = str(e)
738
+ result["file_saved"] = False
739
+ elif suppress_output:
740
+ # If suppress_output is True but no output_file, remove detailed results
741
+ minimal_result = {
742
+ "success": result.get("success", True),
743
+ "count": result.get("count", 0),
744
+ "summary": result.get("summary", {}),
745
+ "elapsed_ms": result.get("elapsed_ms", 0),
746
+ }
747
+ # Cache the full result, not the minimal one
748
+ if self.cache and cache_key:
749
+ self.cache.set(cache_key, result)
750
+ return minimal_result
751
+
752
+ # Cache the result
753
+ if self.cache and cache_key:
754
+ self.cache.set(cache_key, result)
755
+
756
+ return result
757
+
758
+ result = {
759
+ "success": True,
760
+ "count": len(matches),
761
+ "truncated": truncated,
762
+ "elapsed_ms": elapsed_ms,
763
+ }
764
+
765
+ # Handle output suppression and file output
766
+ output_file = arguments.get("output_file")
767
+ suppress_output = arguments.get("suppress_output", False)
768
+
769
+ # Always add results to the base result for caching
770
+ result["results"] = matches
771
+
772
+ # Handle file output if requested
773
+ if output_file:
774
+ try:
775
+ # Create detailed output for file
776
+ file_content = {
777
+ "success": True,
778
+ "count": len(matches),
779
+ "truncated": truncated,
780
+ "elapsed_ms": elapsed_ms,
781
+ "results": matches,
782
+ "summary": fd_rg_utils.summarize_search_results(matches),
783
+ "grouped_by_file": (
784
+ fd_rg_utils.group_matches_by_file(matches)["files"]
785
+ if matches
786
+ else []
787
+ ),
788
+ }
789
+
790
+ # Convert to JSON for file output
791
+ import json
792
+
793
+ json_content = json.dumps(file_content, indent=2, ensure_ascii=False)
794
+
795
+ # Save to file
796
+ saved_file_path = self.file_output_manager.save_to_file(
797
+ content=json_content, base_name=output_file
798
+ )
799
+
800
+ result["output_file"] = output_file
801
+ result["output_file_path"] = saved_file_path
802
+ result["file_saved"] = True
803
+
804
+ logger.info(f"Search results saved to: {saved_file_path}")
805
+
806
+ # If suppress_output is True, return minimal response
807
+ if suppress_output:
808
+ minimal_result = {
809
+ "success": result.get("success", True),
810
+ "count": result.get("count", 0),
811
+ "output_file": output_file,
812
+ "file_saved": f"Results saved to {saved_file_path}",
813
+ }
814
+ # Cache the full result, not the minimal one
815
+ if self.cache and cache_key:
816
+ self.cache.set(cache_key, result)
817
+ return minimal_result
818
+
819
+ except Exception as e:
820
+ logger.error(f"Failed to save output to file: {e}")
821
+ result["file_save_error"] = str(e)
822
+ result["file_saved"] = False
823
+ elif suppress_output:
824
+ # If suppress_output is True but no output_file, remove results from response
825
+ result_copy = result.copy()
826
+ result_copy.pop("results", None)
827
+ # Cache the full result, not the minimal one
828
+ if self.cache and cache_key:
829
+ self.cache.set(cache_key, result)
830
+ return result_copy
831
+
832
+ # Cache the result
833
+ if self.cache and cache_key:
834
+ self.cache.set(cache_key, result)
835
+
836
+ return result