tree-sitter-analyzer 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +216 -8
  3. tree_sitter_analyzer/cli/argument_validator.py +1 -1
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +3 -6
  5. tree_sitter_analyzer/cli/commands/query_command.py +3 -1
  6. tree_sitter_analyzer/cli/commands/table_command.py +3 -3
  7. tree_sitter_analyzer/constants.py +5 -3
  8. tree_sitter_analyzer/core/analysis_engine.py +1 -1
  9. tree_sitter_analyzer/core/cache_service.py +1 -1
  10. tree_sitter_analyzer/core/engine.py +34 -10
  11. tree_sitter_analyzer/core/query.py +82 -2
  12. tree_sitter_analyzer/encoding_utils.py +64 -0
  13. tree_sitter_analyzer/exceptions.py +1 -1
  14. tree_sitter_analyzer/file_handler.py +49 -33
  15. tree_sitter_analyzer/formatters/base_formatter.py +1 -1
  16. tree_sitter_analyzer/formatters/html_formatter.py +24 -14
  17. tree_sitter_analyzer/formatters/javascript_formatter.py +28 -21
  18. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -4
  19. tree_sitter_analyzer/formatters/markdown_formatter.py +4 -4
  20. tree_sitter_analyzer/formatters/python_formatter.py +4 -4
  21. tree_sitter_analyzer/formatters/typescript_formatter.py +1 -1
  22. tree_sitter_analyzer/interfaces/mcp_adapter.py +4 -2
  23. tree_sitter_analyzer/interfaces/mcp_server.py +10 -10
  24. tree_sitter_analyzer/language_detector.py +30 -5
  25. tree_sitter_analyzer/language_loader.py +46 -26
  26. tree_sitter_analyzer/languages/css_plugin.py +6 -6
  27. tree_sitter_analyzer/languages/html_plugin.py +12 -8
  28. tree_sitter_analyzer/languages/java_plugin.py +330 -520
  29. tree_sitter_analyzer/languages/javascript_plugin.py +22 -78
  30. tree_sitter_analyzer/languages/markdown_plugin.py +277 -297
  31. tree_sitter_analyzer/languages/python_plugin.py +47 -85
  32. tree_sitter_analyzer/languages/typescript_plugin.py +48 -123
  33. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +14 -8
  34. tree_sitter_analyzer/mcp/server.py +38 -23
  35. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +10 -7
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +51 -7
  37. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +11 -7
  38. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +8 -6
  39. tree_sitter_analyzer/mcp/tools/list_files_tool.py +6 -6
  40. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  41. tree_sitter_analyzer/mcp/tools/search_content_tool.py +48 -15
  42. tree_sitter_analyzer/mcp/tools/table_format_tool.py +13 -8
  43. tree_sitter_analyzer/mcp/utils/file_output_manager.py +8 -3
  44. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +24 -12
  45. tree_sitter_analyzer/mcp/utils/path_resolver.py +2 -2
  46. tree_sitter_analyzer/models.py +16 -0
  47. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  48. tree_sitter_analyzer/plugins/base.py +66 -0
  49. tree_sitter_analyzer/queries/java.py +9 -3
  50. tree_sitter_analyzer/queries/javascript.py +3 -8
  51. tree_sitter_analyzer/queries/markdown.py +1 -1
  52. tree_sitter_analyzer/queries/python.py +2 -2
  53. tree_sitter_analyzer/security/boundary_manager.py +2 -5
  54. tree_sitter_analyzer/security/regex_checker.py +2 -2
  55. tree_sitter_analyzer/security/validator.py +5 -1
  56. tree_sitter_analyzer/table_formatter.py +4 -4
  57. tree_sitter_analyzer/utils/__init__.py +27 -116
  58. tree_sitter_analyzer/{utils.py → utils/logging.py} +2 -2
  59. tree_sitter_analyzer/utils/tree_sitter_compat.py +2 -2
  60. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/METADATA +87 -45
  61. tree_sitter_analyzer-1.9.4.dist-info/RECORD +111 -0
  62. tree_sitter_analyzer-1.9.2.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/entry_points.txt +0 -0
@@ -25,23 +25,23 @@ except ImportError:
25
25
  MCP_AVAILABLE = False
26
26
 
27
27
  # Fallback types for development without MCP
28
- class Server:
28
+ class Server: # type: ignore
29
29
  pass
30
30
 
31
- class InitializationOptions:
31
+ class InitializationOptions: # type: ignore
32
32
  def __init__(self, **kwargs: Any) -> None:
33
33
  pass
34
34
 
35
- class Tool:
35
+ class Tool: # type: ignore
36
36
  pass
37
37
 
38
- class Resource:
38
+ class Resource: # type: ignore
39
39
  pass
40
40
 
41
- class TextContent:
41
+ class TextContent: # type: ignore
42
42
  pass
43
43
 
44
- def stdio_server() -> None:
44
+ def stdio_server() -> None: # type: ignore[misc]
45
45
  pass
46
46
 
47
47
 
@@ -70,8 +70,11 @@ from .tools.table_format_tool import TableFormatTool
70
70
  # Import UniversalAnalyzeTool at module level for test compatibility
71
71
  try:
72
72
  from .tools.universal_analyze_tool import UniversalAnalyzeTool
73
+
74
+ UNIVERSAL_TOOL_AVAILABLE = True
73
75
  except ImportError:
74
- UniversalAnalyzeTool: type[Any] | None = None
76
+ UniversalAnalyzeTool = None # type: ignore
77
+ UNIVERSAL_TOOL_AVAILABLE = False
75
78
 
76
79
  # Set up logging
77
80
  logger = setup_logger(__name__)
@@ -112,13 +115,15 @@ class TreeSitterAnalyzerMCPServer:
112
115
 
113
116
  # Optional universal tool to satisfy initialization tests
114
117
  # Allow tests to control initialization by checking if UniversalAnalyzeTool is available
115
- if UniversalAnalyzeTool is not None:
118
+ if UNIVERSAL_TOOL_AVAILABLE and UniversalAnalyzeTool is not None:
116
119
  try:
117
- self.universal_analyze_tool = UniversalAnalyzeTool(project_root)
120
+ self.universal_analyze_tool: UniversalAnalyzeTool | None = (
121
+ UniversalAnalyzeTool(project_root)
122
+ )
118
123
  except Exception:
119
- self.universal_analyze_tool: Any = None
124
+ self.universal_analyze_tool = None
120
125
  else:
121
- self.universal_analyze_tool: Any = None
126
+ self.universal_analyze_tool = None
122
127
 
123
128
  # Initialize MCP resources
124
129
  self.code_file_resource = CodeFileResource()
@@ -162,9 +167,11 @@ class TreeSitterAnalyzerMCPServer:
162
167
 
163
168
  # For specific initialization tests we allow delegating to universal tool
164
169
  if "file_path" not in arguments:
165
- if getattr(self, "universal_analyze_tool", None) is not None:
170
+ universal_tool = getattr(self, "universal_analyze_tool", None)
171
+ if universal_tool is not None:
166
172
  try:
167
- return await self.universal_analyze_tool.execute(arguments)
173
+ result = await universal_tool.execute(arguments)
174
+ return dict(result) # Ensure proper type casting
168
175
  except ValueError:
169
176
  # Re-raise ValueError as-is for test compatibility
170
177
  raise
@@ -338,8 +345,9 @@ class TreeSitterAnalyzerMCPServer:
338
345
  Dictionary containing file metrics
339
346
  """
340
347
  try:
341
- with open(file_path, encoding="utf-8") as f:
342
- content = f.read()
348
+ from ..encoding_utils import read_file_safe
349
+
350
+ content, _ = read_file_safe(file_path)
343
351
 
344
352
  lines = content.split("\n")
345
353
  total_lines = len(lines)
@@ -400,10 +408,6 @@ class TreeSitterAnalyzerMCPServer:
400
408
  if "-->" not in stripped:
401
409
  in_multiline_comment = True
402
410
  continue
403
- elif in_multiline_comment and "-->" in stripped:
404
- comment_lines += 1
405
- in_multiline_comment = False
406
- continue
407
411
 
408
412
  # If not a comment, it's code
409
413
  code_lines += 1
@@ -444,7 +448,7 @@ class TreeSitterAnalyzerMCPServer:
444
448
  server: Server = Server(self.name)
445
449
 
446
450
  # Register tools using @server decorators (standard MCP pattern)
447
- @server.list_tools()
451
+ @server.list_tools() # type: ignore[misc]
448
452
  async def handle_list_tools() -> list[Tool]:
449
453
  """List all available tools."""
450
454
  logger.info("Client requesting tools list")
@@ -477,7 +481,7 @@ class TreeSitterAnalyzerMCPServer:
477
481
  logger.info(f"Returning {len(tools)} tools: {[t.name for t in tools]}")
478
482
  return tools
479
483
 
480
- @server.call_tool()
484
+ @server.call_tool() # type: ignore[misc]
481
485
  async def handle_call_tool(
482
486
  name: str, arguments: dict[str, Any]
483
487
  ) -> list[TextContent]:
@@ -634,9 +638,10 @@ class TreeSitterAnalyzerMCPServer:
634
638
  pass # Silently ignore logging errors during shutdown
635
639
  raise
636
640
 
641
+ # Some clients may request prompts; explicitly return empty list
637
642
  # Some clients may request prompts; explicitly return empty list
638
643
  try:
639
- from mcp.types import Prompt # type: ignore
644
+ from mcp.types import Prompt
640
645
 
641
646
  @server.list_prompts() # type: ignore
642
647
  async def handle_list_prompts() -> list[Prompt]:
@@ -701,10 +706,20 @@ class TreeSitterAnalyzerMCPServer:
701
706
  server = self.create_server()
702
707
 
703
708
  # Initialize server options with required capabilities field
709
+ from mcp.server.models import ServerCapabilities
710
+ from mcp.types import ToolsCapability, ResourcesCapability, PromptsCapability, LoggingCapability
711
+
712
+ capabilities = ServerCapabilities(
713
+ tools=ToolsCapability(listChanged=True),
714
+ resources=ResourcesCapability(subscribe=True, listChanged=True),
715
+ prompts=PromptsCapability(listChanged=True),
716
+ logging=LoggingCapability()
717
+ )
718
+
704
719
  options = InitializationOptions(
705
720
  server_name=self.name,
706
721
  server_version=self.version,
707
- capabilities={"tools": {}, "resources": {}, "prompts": {}, "logging": {}},
722
+ capabilities=capabilities,
708
723
  )
709
724
 
710
725
  try:
@@ -65,8 +65,9 @@ class AnalyzeScaleTool(BaseMCPTool):
65
65
  Dictionary containing file metrics
66
66
  """
67
67
  try:
68
- with open(file_path, encoding="utf-8") as f:
69
- content = f.read()
68
+ from ...encoding_utils import read_file_safe
69
+
70
+ content, _ = read_file_safe(file_path)
70
71
 
71
72
  lines = content.split("\n")
72
73
  total_lines = len(lines)
@@ -736,11 +737,13 @@ class AnalyzeScaleTool(BaseMCPTool):
736
737
  "methods": [],
737
738
  "fields": [],
738
739
  },
739
- "scale_category": "small"
740
- if file_metrics["total_lines"] < 100
741
- else "medium"
742
- if file_metrics["total_lines"] < 1000
743
- else "large",
740
+ "scale_category": (
741
+ "small"
742
+ if file_metrics["total_lines"] < 100
743
+ else "medium"
744
+ if file_metrics["total_lines"] < 1000
745
+ else "large"
746
+ ),
744
747
  "analysis_recommendations": {
745
748
  "suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
746
749
  "recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
@@ -137,19 +137,63 @@ class AnalyzeScaleToolCLICompatible:
137
137
  else None
138
138
  ),
139
139
  "element_counts": {
140
- "imports": len(analysis_result.imports),
141
- "classes": len(analysis_result.classes),
142
- "methods": len(analysis_result.methods),
143
- "fields": len(analysis_result.fields),
144
- "annotations": len(getattr(analysis_result, "annotations", [])),
140
+ "imports": len(
141
+ [
142
+ e
143
+ for e in analysis_result.elements
144
+ if getattr(e, "element_type", "") == "import"
145
+ ]
146
+ ),
147
+ "classes": len(
148
+ [
149
+ e
150
+ for e in analysis_result.elements
151
+ if getattr(e, "element_type", "") == "class"
152
+ ]
153
+ ),
154
+ "methods": len(
155
+ [
156
+ e
157
+ for e in analysis_result.elements
158
+ if getattr(e, "element_type", "") == "function"
159
+ ]
160
+ ),
161
+ "fields": len(
162
+ [
163
+ e
164
+ for e in analysis_result.elements
165
+ if getattr(e, "element_type", "") == "variable"
166
+ ]
167
+ ),
168
+ "annotations": len(
169
+ [
170
+ e
171
+ for e in analysis_result.elements
172
+ if getattr(e, "element_type", "") == "annotation"
173
+ ]
174
+ ),
145
175
  },
146
176
  "analysis_time_ms": analysis_time_ms,
147
177
  "error_message": None,
148
178
  }
149
179
 
180
+ classes_count = len(
181
+ [
182
+ e
183
+ for e in analysis_result.elements
184
+ if getattr(e, "element_type", "") == "class"
185
+ ]
186
+ )
187
+ methods_count = len(
188
+ [
189
+ e
190
+ for e in analysis_result.elements
191
+ if getattr(e, "element_type", "") == "function"
192
+ ]
193
+ )
150
194
  logger.info(
151
- f"Successfully analyzed {file_path}: {len(analysis_result.classes)} classes, "
152
- f"{len(analysis_result.methods)} methods, {analysis_time_ms}ms"
195
+ f"Successfully analyzed {file_path}: {classes_count} classes, "
196
+ f"{methods_count} methods, {analysis_time_ms}ms"
153
197
  )
154
198
 
155
199
  return result
@@ -397,11 +397,13 @@ def group_matches_by_file(matches: list[dict[str, Any]]) -> dict[str, Any]:
397
397
  # Convert to grouped structure
398
398
  files = []
399
399
  for file_path, file_matches in file_groups.items():
400
- files.append({
401
- "file": file_path,
402
- "matches": file_matches,
403
- "match_count": len(file_matches)
404
- })
400
+ files.append(
401
+ {
402
+ "file": file_path,
403
+ "matches": file_matches,
404
+ "match_count": len(file_matches),
405
+ }
406
+ )
405
407
 
406
408
  return {"success": True, "count": total_matches, "files": files}
407
409
 
@@ -519,7 +521,7 @@ def summarize_search_results(
519
521
  truncated_line += "..."
520
522
  sample_lines.append(f"L{line_num}: {truncated_line}")
521
523
  remaining_lines -= 1
522
-
524
+
523
525
  # Ensure we have at least some sample lines if matches exist
524
526
  if not sample_lines and file_matches:
525
527
  # Fallback: create a simple summary line
@@ -637,7 +639,9 @@ def write_files_to_temp(files: list[str]) -> TempFileList:
637
639
  fd, temp_path = tempfile.mkstemp(prefix="rg-files-", suffix=".lst")
638
640
  os.close(fd)
639
641
  content = "\n".join(files)
640
- Path(temp_path).write_text(content, encoding="utf-8")
642
+ from ...encoding_utils import write_file_safe
643
+
644
+ write_file_safe(temp_path, content)
641
645
  return TempFileList(path=temp_path)
642
646
 
643
647
 
@@ -249,7 +249,7 @@ class FindAndGrepTool(BaseMCPTool):
249
249
  return True
250
250
 
251
251
  @handle_mcp_errors("find_and_grep")
252
- async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
252
+ async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
253
253
  # Check if both fd and rg commands are available
254
254
  missing_commands = fd_rg_utils.get_missing_commands()
255
255
  if missing_commands:
@@ -341,14 +341,14 @@ class FindAndGrepTool(BaseMCPTool):
341
341
  files.sort()
342
342
  elif sort_mode == "mtime":
343
343
 
344
- def get_mtime(p):
344
+ def get_mtime(p: str) -> float:
345
345
  path_obj = pathlib.Path(p)
346
346
  return path_obj.stat().st_mtime if path_obj.exists() else 0
347
347
 
348
348
  files.sort(key=get_mtime, reverse=True)
349
349
  elif sort_mode == "size":
350
350
 
351
- def get_size(p):
351
+ def get_size(p: str) -> int:
352
352
  path_obj = pathlib.Path(p)
353
353
  return path_obj.stat().st_size if path_obj.exists() else 0
354
354
 
@@ -628,9 +628,11 @@ class FindAndGrepTool(BaseMCPTool):
628
628
  "success": True,
629
629
  "results": matches,
630
630
  "count": len(matches),
631
- "files": fd_rg_utils.group_matches_by_file(matches)["files"]
632
- if matches
633
- else [],
631
+ "files": (
632
+ fd_rg_utils.group_matches_by_file(matches)["files"]
633
+ if matches
634
+ else []
635
+ ),
634
636
  "summary": fd_rg_utils.summarize_search_results(matches),
635
637
  "meta": result["meta"],
636
638
  }
@@ -301,7 +301,7 @@ class ListFilesTool(BaseMCPTool):
301
301
  saved_path = file_manager.save_to_file(
302
302
  content=json_content, base_name=output_file
303
303
  )
304
- result["output_file"] = saved_path
304
+ result["output_file"] = saved_path # type: ignore[assignment]
305
305
 
306
306
  if suppress_output:
307
307
  # Return minimal response to save tokens
@@ -314,7 +314,7 @@ class ListFilesTool(BaseMCPTool):
314
314
  }
315
315
  except Exception as e:
316
316
  logger.warning(f"Failed to save output file: {e}")
317
- result["output_file_error"] = str(e)
317
+ result["output_file_error"] = str(e) # type: ignore[assignment]
318
318
 
319
319
  return result
320
320
 
@@ -350,7 +350,7 @@ class ListFilesTool(BaseMCPTool):
350
350
  except (OSError, ValueError): # nosec B112
351
351
  continue
352
352
 
353
- result = {
353
+ final_result: dict[str, Any] = {
354
354
  "success": True,
355
355
  "count": len(results),
356
356
  "truncated": truncated,
@@ -396,7 +396,7 @@ class ListFilesTool(BaseMCPTool):
396
396
  saved_path = file_manager.save_to_file(
397
397
  content=json_content, base_name=output_file
398
398
  )
399
- result["output_file"] = saved_path
399
+ final_result["output_file"] = saved_path
400
400
 
401
401
  if suppress_output:
402
402
  # Return minimal response to save tokens
@@ -408,6 +408,6 @@ class ListFilesTool(BaseMCPTool):
408
408
  }
409
409
  except Exception as e:
410
410
  logger.warning(f"Failed to save output file: {e}")
411
- result["output_file_error"] = str(e)
411
+ final_result["output_file_error"] = str(e)
412
412
 
413
- return result
413
+ return final_result
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Output format parameter validation for search_content tool.
4
+
5
+ Ensures mutual exclusion of output format parameters to prevent conflicts
6
+ and provides multilingual error messages with token efficiency guidance.
7
+ """
8
+
9
+ import locale
10
+ import os
11
+ from typing import Any
12
+
13
+
14
+ class OutputFormatValidator:
15
+ """Validator for output format parameters mutual exclusion."""
16
+
17
+ # Output format parameters that are mutually exclusive
18
+ OUTPUT_FORMAT_PARAMS = {
19
+ "total_only",
20
+ "count_only_matches",
21
+ "summary_only",
22
+ "group_by_file",
23
+ "suppress_output",
24
+ }
25
+
26
+ # Token efficiency guidance for error messages
27
+ FORMAT_EFFICIENCY_GUIDE = {
28
+ "total_only": "~10 tokens (most efficient for count queries)",
29
+ "count_only_matches": "~50-200 tokens (file distribution analysis)",
30
+ "summary_only": "~500-2000 tokens (initial investigation)",
31
+ "group_by_file": "~2000-10000 tokens (context-aware review)",
32
+ "suppress_output": "0 tokens (cache only, no output)",
33
+ }
34
+
35
+ def _detect_language(self) -> str:
36
+ """Detect preferred language from environment."""
37
+ # Check environment variables for language preference
38
+ lang = os.environ.get("LANG", "")
39
+ if lang.startswith("ja"):
40
+ return "ja"
41
+
42
+ # Check locale
43
+ try:
44
+ current_locale = locale.getlocale()[0]
45
+ if current_locale and current_locale.startswith("ja"):
46
+ return "ja"
47
+ except Exception:
48
+ pass
49
+
50
+ # Default to English
51
+ return "en"
52
+
53
+ def _get_error_message(self, specified_formats: list[str]) -> str:
54
+ """Generate localized error message with usage examples."""
55
+ lang = self._detect_language()
56
+ format_list = ", ".join(specified_formats)
57
+
58
+ if lang == "ja":
59
+ # Japanese error message
60
+ base_message = (
61
+ f"⚠️ 出力形式パラメータエラー: 相互排他的なパラメータが同時に指定されています: {format_list}\n\n"
62
+ f"🔒 相互排他的パラメータ: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
63
+ f"💡 トークン効率ガイド:\n"
64
+ )
65
+
66
+ for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
67
+ base_message += f" • {param}: {desc}\n"
68
+
69
+ base_message += (
70
+ "\n📋 推奨使用パターン:\n"
71
+ " • 件数確認: total_only=true\n"
72
+ " • ファイル分布: count_only_matches=true\n"
73
+ " • 初期調査: summary_only=true\n"
74
+ " • 詳細レビュー: group_by_file=true\n"
75
+ " • キャッシュのみ: suppress_output=true\n\n"
76
+ "❌ 間違った例: {\"total_only\": true, \"summary_only\": true}\n"
77
+ "✅ 正しい例: {\"total_only\": true}"
78
+ )
79
+ else:
80
+ # English error message
81
+ base_message = (
82
+ f"⚠️ Output Format Parameter Error: Multiple mutually exclusive formats specified: {format_list}\n\n"
83
+ f"🔒 Mutually Exclusive Parameters: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
84
+ f"💡 Token Efficiency Guide:\n"
85
+ )
86
+
87
+ for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
88
+ base_message += f" • {param}: {desc}\n"
89
+
90
+ base_message += (
91
+ "\n📋 Recommended Usage Patterns:\n"
92
+ " • Count validation: total_only=true\n"
93
+ " • File distribution: count_only_matches=true\n"
94
+ " • Initial investigation: summary_only=true\n"
95
+ " • Detailed review: group_by_file=true\n"
96
+ " • Cache only: suppress_output=true\n\n"
97
+ "❌ Incorrect: {\"total_only\": true, \"summary_only\": true}\n"
98
+ "✅ Correct: {\"total_only\": true}"
99
+ )
100
+
101
+ return base_message
102
+
103
+ def validate_output_format_exclusion(self, arguments: dict[str, Any]) -> None:
104
+ """
105
+ Validate that only one output format parameter is specified.
106
+
107
+ Args:
108
+ arguments: Tool arguments dictionary
109
+
110
+ Raises:
111
+ ValueError: If multiple output format parameters are specified
112
+ """
113
+ specified_formats = []
114
+
115
+ for param in self.OUTPUT_FORMAT_PARAMS:
116
+ if arguments.get(param, False):
117
+ specified_formats.append(param)
118
+
119
+ if len(specified_formats) > 1:
120
+ error_message = self._get_error_message(specified_formats)
121
+ raise ValueError(error_message)
122
+
123
+ def get_active_format(self, arguments: dict[str, Any]) -> str:
124
+ """
125
+ Get the active output format from arguments.
126
+
127
+ Args:
128
+ arguments: Tool arguments dictionary
129
+
130
+ Returns:
131
+ Active format name or "normal" if none specified
132
+ """
133
+ for param in self.OUTPUT_FORMAT_PARAMS:
134
+ if arguments.get(param, False):
135
+ return param
136
+ return "normal"
137
+
138
+
139
+ # Global validator instance
140
+ _default_validator: OutputFormatValidator | None = None
141
+
142
+
143
+ def get_default_validator() -> OutputFormatValidator:
144
+ """Get the default output format validator instance."""
145
+ global _default_validator
146
+ if _default_validator is None:
147
+ _default_validator = OutputFormatValidator()
148
+ return _default_validator
@@ -18,6 +18,7 @@ from ..utils.gitignore_detector import get_default_detector
18
18
  from ..utils.search_cache import get_default_cache
19
19
  from . import fd_rg_utils
20
20
  from .base_tool import BaseMCPTool
21
+ from .output_format_validator import get_default_validator
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -53,7 +54,26 @@ class SearchContentTool(BaseMCPTool):
53
54
  def get_tool_definition(self) -> dict[str, Any]:
54
55
  return {
55
56
  "name": "search_content",
56
- "description": "Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files with advanced token optimization (summary_only, group_by_file, total_only, suppress_output).",
57
+ "description": """Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files.
58
+
59
+ ⚡ IMPORTANT: Token Efficiency Guide
60
+ Choose output format parameters based on your needs to minimize token usage and maximize performance with efficient search strategies:
61
+
62
+ 📋 RECOMMENDED WORKFLOW (Most Efficient Approach):
63
+ 1. START with total_only=true parameter for initial count validation (~10 tokens)
64
+ 2. IF more detail needed, use count_only_matches=true parameter for file distribution (~50-200 tokens)
65
+ 3. IF context needed, use summary_only=true parameter for overview (~500-2000 tokens)
66
+ 4. ONLY use full results when specific content review is required (~2000-50000+ tokens)
67
+
68
+ ⚡ TOKEN EFFICIENCY COMPARISON:
69
+ - total_only: ~10 tokens (single number) - MOST EFFICIENT for count queries
70
+ - count_only_matches: ~50-200 tokens (file counts) - Good for file distribution analysis
71
+ - summary_only: ~500-2000 tokens (condensed overview) - initial investigation
72
+ - group_by_file: ~2000-10000 tokens (organized by file) - Context-aware review
73
+ - optimize_paths: 10-30% reduction (path compression) - Use with deep directory structures
74
+ - Full results: ~2000-50000+ tokens - Use sparingly for detailed analysis
75
+
76
+ ⚠️ MUTUALLY EXCLUSIVE: Only one output format parameter can be true at a time. Cannot be combined with other format parameters.""",
57
77
  "inputSchema": {
58
78
  "type": "object",
59
79
  "properties": {
@@ -144,27 +164,27 @@ class SearchContentTool(BaseMCPTool):
144
164
  "count_only_matches": {
145
165
  "type": "boolean",
146
166
  "default": False,
147
- "description": "Return only match counts per file instead of full match details. Useful for statistics and performance",
167
+ "description": "⚡ EXCLUSIVE: Return only match counts per file (~50-200 tokens). RECOMMENDED for: File distribution analysis, understanding match spread across files. Cannot be combined with other output formats.",
148
168
  },
149
169
  "summary_only": {
150
170
  "type": "boolean",
151
171
  "default": False,
152
- "description": "Return a condensed summary of results to reduce context size. Shows top files and sample matches",
172
+ "description": "⚡ EXCLUSIVE: Return condensed overview with top files and sample matches (~500-2000 tokens). RECOMMENDED for: Initial investigation, scope confirmation, pattern validation. Cannot be combined with other output formats.",
153
173
  },
154
174
  "optimize_paths": {
155
175
  "type": "boolean",
156
176
  "default": False,
157
- "description": "Optimize file paths in results by removing common prefixes and shortening long paths. Saves tokens in output",
177
+ "description": "⚡ EXCLUSIVE: Optimize file paths by removing common prefixes (10-30% token reduction). RECOMMENDED for: Deep directory structures, large codebases. Cannot be combined with other output formats.",
158
178
  },
159
179
  "group_by_file": {
160
180
  "type": "boolean",
161
181
  "default": False,
162
- "description": "Group results by file to eliminate file path duplication when multiple matches exist in the same file. Significantly reduces tokens",
182
+ "description": "⚡ EXCLUSIVE: Group results by file, eliminating path duplication (~2000-10000 tokens). RECOMMENDED for: Context-aware review, analyzing matches within specific files. Cannot be combined with other output formats.",
163
183
  },
164
184
  "total_only": {
165
185
  "type": "boolean",
166
186
  "default": False,
167
- "description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
187
+ "description": "⚡ EXCLUSIVE: Return only total match count as single number (~10 tokens - MOST EFFICIENT). RECOMMENDED for: Count validation, filtering decisions, existence checks. Takes priority over all other formats. Cannot be combined with other output formats.",
168
188
  },
169
189
  "output_file": {
170
190
  "type": "string",
@@ -217,6 +237,10 @@ class SearchContentTool(BaseMCPTool):
217
237
  return validated
218
238
 
219
239
  def validate_arguments(self, arguments: dict[str, Any]) -> bool:
240
+ # Validate output format exclusion first
241
+ validator = get_default_validator()
242
+ validator.validate_output_format_exclusion(arguments)
243
+
220
244
  if (
221
245
  "query" not in arguments
222
246
  or not isinstance(arguments["query"], str)
@@ -343,15 +367,24 @@ class SearchContentTool(BaseMCPTool):
343
367
  if cached_result is not None:
344
368
  # Check if this is a total_only request
345
369
  total_only_requested = arguments.get("total_only", False)
346
-
370
+
347
371
  if total_only_requested:
348
372
  # For total_only mode, always return integer if possible
349
373
  if isinstance(cached_result, int):
350
374
  return cached_result
351
- elif isinstance(cached_result, dict) and "total_matches" in cached_result:
352
- return cached_result["total_matches"]
375
+ elif (
376
+ isinstance(cached_result, dict)
377
+ and "total_matches" in cached_result
378
+ ):
379
+ total_matches = cached_result["total_matches"]
380
+ return (
381
+ int(total_matches)
382
+ if isinstance(total_matches, (int, float))
383
+ else 0
384
+ )
353
385
  elif isinstance(cached_result, dict) and "count" in cached_result:
354
- return cached_result["count"]
386
+ count = cached_result["count"]
387
+ return int(count) if isinstance(count, (int, float)) else 0
355
388
  else:
356
389
  # Fallback: extract count from dict or return 0
357
390
  return 0
@@ -751,11 +784,11 @@ class SearchContentTool(BaseMCPTool):
751
784
  "elapsed_ms": elapsed_ms,
752
785
  "results": matches,
753
786
  "summary": fd_rg_utils.summarize_search_results(matches),
754
- "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)[
755
- "files"
756
- ]
757
- if matches
758
- else [],
787
+ "grouped_by_file": (
788
+ fd_rg_utils.group_matches_by_file(matches)["files"]
789
+ if matches
790
+ else []
791
+ ),
759
792
  }
760
793
 
761
794
  # Convert to JSON for file output