tree-sitter-analyzer 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +216 -8
- tree_sitter_analyzer/cli/argument_validator.py +1 -1
- tree_sitter_analyzer/cli/commands/advanced_command.py +3 -6
- tree_sitter_analyzer/cli/commands/query_command.py +3 -1
- tree_sitter_analyzer/cli/commands/table_command.py +3 -3
- tree_sitter_analyzer/constants.py +5 -3
- tree_sitter_analyzer/core/analysis_engine.py +1 -1
- tree_sitter_analyzer/core/cache_service.py +1 -1
- tree_sitter_analyzer/core/engine.py +34 -10
- tree_sitter_analyzer/core/query.py +82 -2
- tree_sitter_analyzer/encoding_utils.py +64 -0
- tree_sitter_analyzer/exceptions.py +1 -1
- tree_sitter_analyzer/file_handler.py +49 -33
- tree_sitter_analyzer/formatters/base_formatter.py +1 -1
- tree_sitter_analyzer/formatters/html_formatter.py +24 -14
- tree_sitter_analyzer/formatters/javascript_formatter.py +28 -21
- tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -4
- tree_sitter_analyzer/formatters/markdown_formatter.py +4 -4
- tree_sitter_analyzer/formatters/python_formatter.py +4 -4
- tree_sitter_analyzer/formatters/typescript_formatter.py +1 -1
- tree_sitter_analyzer/interfaces/mcp_adapter.py +4 -2
- tree_sitter_analyzer/interfaces/mcp_server.py +10 -10
- tree_sitter_analyzer/language_detector.py +30 -5
- tree_sitter_analyzer/language_loader.py +46 -26
- tree_sitter_analyzer/languages/css_plugin.py +6 -6
- tree_sitter_analyzer/languages/html_plugin.py +12 -8
- tree_sitter_analyzer/languages/java_plugin.py +330 -520
- tree_sitter_analyzer/languages/javascript_plugin.py +22 -78
- tree_sitter_analyzer/languages/markdown_plugin.py +277 -297
- tree_sitter_analyzer/languages/python_plugin.py +47 -85
- tree_sitter_analyzer/languages/typescript_plugin.py +48 -123
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +14 -8
- tree_sitter_analyzer/mcp/server.py +38 -23
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +10 -7
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +51 -7
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +11 -7
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +8 -6
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +6 -6
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +48 -15
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +13 -8
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +8 -3
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +24 -12
- tree_sitter_analyzer/mcp/utils/path_resolver.py +2 -2
- tree_sitter_analyzer/models.py +16 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/plugins/base.py +66 -0
- tree_sitter_analyzer/queries/java.py +9 -3
- tree_sitter_analyzer/queries/javascript.py +3 -8
- tree_sitter_analyzer/queries/markdown.py +1 -1
- tree_sitter_analyzer/queries/python.py +2 -2
- tree_sitter_analyzer/security/boundary_manager.py +2 -5
- tree_sitter_analyzer/security/regex_checker.py +2 -2
- tree_sitter_analyzer/security/validator.py +5 -1
- tree_sitter_analyzer/table_formatter.py +4 -4
- tree_sitter_analyzer/utils/__init__.py +27 -116
- tree_sitter_analyzer/{utils.py → utils/logging.py} +2 -2
- tree_sitter_analyzer/utils/tree_sitter_compat.py +2 -2
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/METADATA +87 -45
- tree_sitter_analyzer-1.9.4.dist-info/RECORD +111 -0
- tree_sitter_analyzer-1.9.2.dist-info/RECORD +0 -109
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/entry_points.txt +0 -0
|
@@ -25,23 +25,23 @@ except ImportError:
|
|
|
25
25
|
MCP_AVAILABLE = False
|
|
26
26
|
|
|
27
27
|
# Fallback types for development without MCP
|
|
28
|
-
class Server:
|
|
28
|
+
class Server: # type: ignore
|
|
29
29
|
pass
|
|
30
30
|
|
|
31
|
-
class InitializationOptions:
|
|
31
|
+
class InitializationOptions: # type: ignore
|
|
32
32
|
def __init__(self, **kwargs: Any) -> None:
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
|
-
class Tool:
|
|
35
|
+
class Tool: # type: ignore
|
|
36
36
|
pass
|
|
37
37
|
|
|
38
|
-
class Resource:
|
|
38
|
+
class Resource: # type: ignore
|
|
39
39
|
pass
|
|
40
40
|
|
|
41
|
-
class TextContent:
|
|
41
|
+
class TextContent: # type: ignore
|
|
42
42
|
pass
|
|
43
43
|
|
|
44
|
-
def stdio_server() -> None:
|
|
44
|
+
def stdio_server() -> None: # type: ignore[misc]
|
|
45
45
|
pass
|
|
46
46
|
|
|
47
47
|
|
|
@@ -70,8 +70,11 @@ from .tools.table_format_tool import TableFormatTool
|
|
|
70
70
|
# Import UniversalAnalyzeTool at module level for test compatibility
|
|
71
71
|
try:
|
|
72
72
|
from .tools.universal_analyze_tool import UniversalAnalyzeTool
|
|
73
|
+
|
|
74
|
+
UNIVERSAL_TOOL_AVAILABLE = True
|
|
73
75
|
except ImportError:
|
|
74
|
-
UniversalAnalyzeTool
|
|
76
|
+
UniversalAnalyzeTool = None # type: ignore
|
|
77
|
+
UNIVERSAL_TOOL_AVAILABLE = False
|
|
75
78
|
|
|
76
79
|
# Set up logging
|
|
77
80
|
logger = setup_logger(__name__)
|
|
@@ -112,13 +115,15 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
112
115
|
|
|
113
116
|
# Optional universal tool to satisfy initialization tests
|
|
114
117
|
# Allow tests to control initialization by checking if UniversalAnalyzeTool is available
|
|
115
|
-
if UniversalAnalyzeTool is not None:
|
|
118
|
+
if UNIVERSAL_TOOL_AVAILABLE and UniversalAnalyzeTool is not None:
|
|
116
119
|
try:
|
|
117
|
-
self.universal_analyze_tool =
|
|
120
|
+
self.universal_analyze_tool: UniversalAnalyzeTool | None = (
|
|
121
|
+
UniversalAnalyzeTool(project_root)
|
|
122
|
+
)
|
|
118
123
|
except Exception:
|
|
119
|
-
self.universal_analyze_tool
|
|
124
|
+
self.universal_analyze_tool = None
|
|
120
125
|
else:
|
|
121
|
-
self.universal_analyze_tool
|
|
126
|
+
self.universal_analyze_tool = None
|
|
122
127
|
|
|
123
128
|
# Initialize MCP resources
|
|
124
129
|
self.code_file_resource = CodeFileResource()
|
|
@@ -162,9 +167,11 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
162
167
|
|
|
163
168
|
# For specific initialization tests we allow delegating to universal tool
|
|
164
169
|
if "file_path" not in arguments:
|
|
165
|
-
|
|
170
|
+
universal_tool = getattr(self, "universal_analyze_tool", None)
|
|
171
|
+
if universal_tool is not None:
|
|
166
172
|
try:
|
|
167
|
-
|
|
173
|
+
result = await universal_tool.execute(arguments)
|
|
174
|
+
return dict(result) # Ensure proper type casting
|
|
168
175
|
except ValueError:
|
|
169
176
|
# Re-raise ValueError as-is for test compatibility
|
|
170
177
|
raise
|
|
@@ -338,8 +345,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
338
345
|
Dictionary containing file metrics
|
|
339
346
|
"""
|
|
340
347
|
try:
|
|
341
|
-
|
|
342
|
-
|
|
348
|
+
from ..encoding_utils import read_file_safe
|
|
349
|
+
|
|
350
|
+
content, _ = read_file_safe(file_path)
|
|
343
351
|
|
|
344
352
|
lines = content.split("\n")
|
|
345
353
|
total_lines = len(lines)
|
|
@@ -400,10 +408,6 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
400
408
|
if "-->" not in stripped:
|
|
401
409
|
in_multiline_comment = True
|
|
402
410
|
continue
|
|
403
|
-
elif in_multiline_comment and "-->" in stripped:
|
|
404
|
-
comment_lines += 1
|
|
405
|
-
in_multiline_comment = False
|
|
406
|
-
continue
|
|
407
411
|
|
|
408
412
|
# If not a comment, it's code
|
|
409
413
|
code_lines += 1
|
|
@@ -444,7 +448,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
444
448
|
server: Server = Server(self.name)
|
|
445
449
|
|
|
446
450
|
# Register tools using @server decorators (standard MCP pattern)
|
|
447
|
-
@server.list_tools()
|
|
451
|
+
@server.list_tools() # type: ignore[misc]
|
|
448
452
|
async def handle_list_tools() -> list[Tool]:
|
|
449
453
|
"""List all available tools."""
|
|
450
454
|
logger.info("Client requesting tools list")
|
|
@@ -477,7 +481,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
477
481
|
logger.info(f"Returning {len(tools)} tools: {[t.name for t in tools]}")
|
|
478
482
|
return tools
|
|
479
483
|
|
|
480
|
-
@server.call_tool()
|
|
484
|
+
@server.call_tool() # type: ignore[misc]
|
|
481
485
|
async def handle_call_tool(
|
|
482
486
|
name: str, arguments: dict[str, Any]
|
|
483
487
|
) -> list[TextContent]:
|
|
@@ -634,9 +638,10 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
634
638
|
pass # Silently ignore logging errors during shutdown
|
|
635
639
|
raise
|
|
636
640
|
|
|
641
|
+
# Some clients may request prompts; explicitly return empty list
|
|
637
642
|
# Some clients may request prompts; explicitly return empty list
|
|
638
643
|
try:
|
|
639
|
-
from mcp.types import Prompt
|
|
644
|
+
from mcp.types import Prompt
|
|
640
645
|
|
|
641
646
|
@server.list_prompts() # type: ignore
|
|
642
647
|
async def handle_list_prompts() -> list[Prompt]:
|
|
@@ -701,10 +706,20 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
701
706
|
server = self.create_server()
|
|
702
707
|
|
|
703
708
|
# Initialize server options with required capabilities field
|
|
709
|
+
from mcp.server.models import ServerCapabilities
|
|
710
|
+
from mcp.types import ToolsCapability, ResourcesCapability, PromptsCapability, LoggingCapability
|
|
711
|
+
|
|
712
|
+
capabilities = ServerCapabilities(
|
|
713
|
+
tools=ToolsCapability(listChanged=True),
|
|
714
|
+
resources=ResourcesCapability(subscribe=True, listChanged=True),
|
|
715
|
+
prompts=PromptsCapability(listChanged=True),
|
|
716
|
+
logging=LoggingCapability()
|
|
717
|
+
)
|
|
718
|
+
|
|
704
719
|
options = InitializationOptions(
|
|
705
720
|
server_name=self.name,
|
|
706
721
|
server_version=self.version,
|
|
707
|
-
capabilities=
|
|
722
|
+
capabilities=capabilities,
|
|
708
723
|
)
|
|
709
724
|
|
|
710
725
|
try:
|
|
@@ -65,8 +65,9 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
65
65
|
Dictionary containing file metrics
|
|
66
66
|
"""
|
|
67
67
|
try:
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
from ...encoding_utils import read_file_safe
|
|
69
|
+
|
|
70
|
+
content, _ = read_file_safe(file_path)
|
|
70
71
|
|
|
71
72
|
lines = content.split("\n")
|
|
72
73
|
total_lines = len(lines)
|
|
@@ -736,11 +737,13 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
736
737
|
"methods": [],
|
|
737
738
|
"fields": [],
|
|
738
739
|
},
|
|
739
|
-
"scale_category":
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
740
|
+
"scale_category": (
|
|
741
|
+
"small"
|
|
742
|
+
if file_metrics["total_lines"] < 100
|
|
743
|
+
else "medium"
|
|
744
|
+
if file_metrics["total_lines"] < 1000
|
|
745
|
+
else "large"
|
|
746
|
+
),
|
|
744
747
|
"analysis_recommendations": {
|
|
745
748
|
"suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
|
|
746
749
|
"recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
|
|
@@ -137,19 +137,63 @@ class AnalyzeScaleToolCLICompatible:
|
|
|
137
137
|
else None
|
|
138
138
|
),
|
|
139
139
|
"element_counts": {
|
|
140
|
-
"imports": len(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
140
|
+
"imports": len(
|
|
141
|
+
[
|
|
142
|
+
e
|
|
143
|
+
for e in analysis_result.elements
|
|
144
|
+
if getattr(e, "element_type", "") == "import"
|
|
145
|
+
]
|
|
146
|
+
),
|
|
147
|
+
"classes": len(
|
|
148
|
+
[
|
|
149
|
+
e
|
|
150
|
+
for e in analysis_result.elements
|
|
151
|
+
if getattr(e, "element_type", "") == "class"
|
|
152
|
+
]
|
|
153
|
+
),
|
|
154
|
+
"methods": len(
|
|
155
|
+
[
|
|
156
|
+
e
|
|
157
|
+
for e in analysis_result.elements
|
|
158
|
+
if getattr(e, "element_type", "") == "function"
|
|
159
|
+
]
|
|
160
|
+
),
|
|
161
|
+
"fields": len(
|
|
162
|
+
[
|
|
163
|
+
e
|
|
164
|
+
for e in analysis_result.elements
|
|
165
|
+
if getattr(e, "element_type", "") == "variable"
|
|
166
|
+
]
|
|
167
|
+
),
|
|
168
|
+
"annotations": len(
|
|
169
|
+
[
|
|
170
|
+
e
|
|
171
|
+
for e in analysis_result.elements
|
|
172
|
+
if getattr(e, "element_type", "") == "annotation"
|
|
173
|
+
]
|
|
174
|
+
),
|
|
145
175
|
},
|
|
146
176
|
"analysis_time_ms": analysis_time_ms,
|
|
147
177
|
"error_message": None,
|
|
148
178
|
}
|
|
149
179
|
|
|
180
|
+
classes_count = len(
|
|
181
|
+
[
|
|
182
|
+
e
|
|
183
|
+
for e in analysis_result.elements
|
|
184
|
+
if getattr(e, "element_type", "") == "class"
|
|
185
|
+
]
|
|
186
|
+
)
|
|
187
|
+
methods_count = len(
|
|
188
|
+
[
|
|
189
|
+
e
|
|
190
|
+
for e in analysis_result.elements
|
|
191
|
+
if getattr(e, "element_type", "") == "function"
|
|
192
|
+
]
|
|
193
|
+
)
|
|
150
194
|
logger.info(
|
|
151
|
-
f"Successfully analyzed {file_path}: {
|
|
152
|
-
f"{
|
|
195
|
+
f"Successfully analyzed {file_path}: {classes_count} classes, "
|
|
196
|
+
f"{methods_count} methods, {analysis_time_ms}ms"
|
|
153
197
|
)
|
|
154
198
|
|
|
155
199
|
return result
|
|
@@ -397,11 +397,13 @@ def group_matches_by_file(matches: list[dict[str, Any]]) -> dict[str, Any]:
|
|
|
397
397
|
# Convert to grouped structure
|
|
398
398
|
files = []
|
|
399
399
|
for file_path, file_matches in file_groups.items():
|
|
400
|
-
files.append(
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
400
|
+
files.append(
|
|
401
|
+
{
|
|
402
|
+
"file": file_path,
|
|
403
|
+
"matches": file_matches,
|
|
404
|
+
"match_count": len(file_matches),
|
|
405
|
+
}
|
|
406
|
+
)
|
|
405
407
|
|
|
406
408
|
return {"success": True, "count": total_matches, "files": files}
|
|
407
409
|
|
|
@@ -519,7 +521,7 @@ def summarize_search_results(
|
|
|
519
521
|
truncated_line += "..."
|
|
520
522
|
sample_lines.append(f"L{line_num}: {truncated_line}")
|
|
521
523
|
remaining_lines -= 1
|
|
522
|
-
|
|
524
|
+
|
|
523
525
|
# Ensure we have at least some sample lines if matches exist
|
|
524
526
|
if not sample_lines and file_matches:
|
|
525
527
|
# Fallback: create a simple summary line
|
|
@@ -637,7 +639,9 @@ def write_files_to_temp(files: list[str]) -> TempFileList:
|
|
|
637
639
|
fd, temp_path = tempfile.mkstemp(prefix="rg-files-", suffix=".lst")
|
|
638
640
|
os.close(fd)
|
|
639
641
|
content = "\n".join(files)
|
|
640
|
-
|
|
642
|
+
from ...encoding_utils import write_file_safe
|
|
643
|
+
|
|
644
|
+
write_file_safe(temp_path, content)
|
|
641
645
|
return TempFileList(path=temp_path)
|
|
642
646
|
|
|
643
647
|
|
|
@@ -249,7 +249,7 @@ class FindAndGrepTool(BaseMCPTool):
|
|
|
249
249
|
return True
|
|
250
250
|
|
|
251
251
|
@handle_mcp_errors("find_and_grep")
|
|
252
|
-
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
252
|
+
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
|
|
253
253
|
# Check if both fd and rg commands are available
|
|
254
254
|
missing_commands = fd_rg_utils.get_missing_commands()
|
|
255
255
|
if missing_commands:
|
|
@@ -341,14 +341,14 @@ class FindAndGrepTool(BaseMCPTool):
|
|
|
341
341
|
files.sort()
|
|
342
342
|
elif sort_mode == "mtime":
|
|
343
343
|
|
|
344
|
-
def get_mtime(p):
|
|
344
|
+
def get_mtime(p: str) -> float:
|
|
345
345
|
path_obj = pathlib.Path(p)
|
|
346
346
|
return path_obj.stat().st_mtime if path_obj.exists() else 0
|
|
347
347
|
|
|
348
348
|
files.sort(key=get_mtime, reverse=True)
|
|
349
349
|
elif sort_mode == "size":
|
|
350
350
|
|
|
351
|
-
def get_size(p):
|
|
351
|
+
def get_size(p: str) -> int:
|
|
352
352
|
path_obj = pathlib.Path(p)
|
|
353
353
|
return path_obj.stat().st_size if path_obj.exists() else 0
|
|
354
354
|
|
|
@@ -628,9 +628,11 @@ class FindAndGrepTool(BaseMCPTool):
|
|
|
628
628
|
"success": True,
|
|
629
629
|
"results": matches,
|
|
630
630
|
"count": len(matches),
|
|
631
|
-
"files":
|
|
632
|
-
|
|
633
|
-
|
|
631
|
+
"files": (
|
|
632
|
+
fd_rg_utils.group_matches_by_file(matches)["files"]
|
|
633
|
+
if matches
|
|
634
|
+
else []
|
|
635
|
+
),
|
|
634
636
|
"summary": fd_rg_utils.summarize_search_results(matches),
|
|
635
637
|
"meta": result["meta"],
|
|
636
638
|
}
|
|
@@ -301,7 +301,7 @@ class ListFilesTool(BaseMCPTool):
|
|
|
301
301
|
saved_path = file_manager.save_to_file(
|
|
302
302
|
content=json_content, base_name=output_file
|
|
303
303
|
)
|
|
304
|
-
result["output_file"] = saved_path
|
|
304
|
+
result["output_file"] = saved_path # type: ignore[assignment]
|
|
305
305
|
|
|
306
306
|
if suppress_output:
|
|
307
307
|
# Return minimal response to save tokens
|
|
@@ -314,7 +314,7 @@ class ListFilesTool(BaseMCPTool):
|
|
|
314
314
|
}
|
|
315
315
|
except Exception as e:
|
|
316
316
|
logger.warning(f"Failed to save output file: {e}")
|
|
317
|
-
result["output_file_error"] = str(e)
|
|
317
|
+
result["output_file_error"] = str(e) # type: ignore[assignment]
|
|
318
318
|
|
|
319
319
|
return result
|
|
320
320
|
|
|
@@ -350,7 +350,7 @@ class ListFilesTool(BaseMCPTool):
|
|
|
350
350
|
except (OSError, ValueError): # nosec B112
|
|
351
351
|
continue
|
|
352
352
|
|
|
353
|
-
|
|
353
|
+
final_result: dict[str, Any] = {
|
|
354
354
|
"success": True,
|
|
355
355
|
"count": len(results),
|
|
356
356
|
"truncated": truncated,
|
|
@@ -396,7 +396,7 @@ class ListFilesTool(BaseMCPTool):
|
|
|
396
396
|
saved_path = file_manager.save_to_file(
|
|
397
397
|
content=json_content, base_name=output_file
|
|
398
398
|
)
|
|
399
|
-
|
|
399
|
+
final_result["output_file"] = saved_path
|
|
400
400
|
|
|
401
401
|
if suppress_output:
|
|
402
402
|
# Return minimal response to save tokens
|
|
@@ -408,6 +408,6 @@ class ListFilesTool(BaseMCPTool):
|
|
|
408
408
|
}
|
|
409
409
|
except Exception as e:
|
|
410
410
|
logger.warning(f"Failed to save output file: {e}")
|
|
411
|
-
|
|
411
|
+
final_result["output_file_error"] = str(e)
|
|
412
412
|
|
|
413
|
-
return
|
|
413
|
+
return final_result
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Output format parameter validation for search_content tool.
|
|
4
|
+
|
|
5
|
+
Ensures mutual exclusion of output format parameters to prevent conflicts
|
|
6
|
+
and provides multilingual error messages with token efficiency guidance.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import locale
|
|
10
|
+
import os
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OutputFormatValidator:
|
|
15
|
+
"""Validator for output format parameters mutual exclusion."""
|
|
16
|
+
|
|
17
|
+
# Output format parameters that are mutually exclusive
|
|
18
|
+
OUTPUT_FORMAT_PARAMS = {
|
|
19
|
+
"total_only",
|
|
20
|
+
"count_only_matches",
|
|
21
|
+
"summary_only",
|
|
22
|
+
"group_by_file",
|
|
23
|
+
"suppress_output",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Token efficiency guidance for error messages
|
|
27
|
+
FORMAT_EFFICIENCY_GUIDE = {
|
|
28
|
+
"total_only": "~10 tokens (most efficient for count queries)",
|
|
29
|
+
"count_only_matches": "~50-200 tokens (file distribution analysis)",
|
|
30
|
+
"summary_only": "~500-2000 tokens (initial investigation)",
|
|
31
|
+
"group_by_file": "~2000-10000 tokens (context-aware review)",
|
|
32
|
+
"suppress_output": "0 tokens (cache only, no output)",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def _detect_language(self) -> str:
|
|
36
|
+
"""Detect preferred language from environment."""
|
|
37
|
+
# Check environment variables for language preference
|
|
38
|
+
lang = os.environ.get("LANG", "")
|
|
39
|
+
if lang.startswith("ja"):
|
|
40
|
+
return "ja"
|
|
41
|
+
|
|
42
|
+
# Check locale
|
|
43
|
+
try:
|
|
44
|
+
current_locale = locale.getlocale()[0]
|
|
45
|
+
if current_locale and current_locale.startswith("ja"):
|
|
46
|
+
return "ja"
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
# Default to English
|
|
51
|
+
return "en"
|
|
52
|
+
|
|
53
|
+
def _get_error_message(self, specified_formats: list[str]) -> str:
|
|
54
|
+
"""Generate localized error message with usage examples."""
|
|
55
|
+
lang = self._detect_language()
|
|
56
|
+
format_list = ", ".join(specified_formats)
|
|
57
|
+
|
|
58
|
+
if lang == "ja":
|
|
59
|
+
# Japanese error message
|
|
60
|
+
base_message = (
|
|
61
|
+
f"⚠️ 出力形式パラメータエラー: 相互排他的なパラメータが同時に指定されています: {format_list}\n\n"
|
|
62
|
+
f"🔒 相互排他的パラメータ: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
|
|
63
|
+
f"💡 トークン効率ガイド:\n"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
|
|
67
|
+
base_message += f" • {param}: {desc}\n"
|
|
68
|
+
|
|
69
|
+
base_message += (
|
|
70
|
+
"\n📋 推奨使用パターン:\n"
|
|
71
|
+
" • 件数確認: total_only=true\n"
|
|
72
|
+
" • ファイル分布: count_only_matches=true\n"
|
|
73
|
+
" • 初期調査: summary_only=true\n"
|
|
74
|
+
" • 詳細レビュー: group_by_file=true\n"
|
|
75
|
+
" • キャッシュのみ: suppress_output=true\n\n"
|
|
76
|
+
"❌ 間違った例: {\"total_only\": true, \"summary_only\": true}\n"
|
|
77
|
+
"✅ 正しい例: {\"total_only\": true}"
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
# English error message
|
|
81
|
+
base_message = (
|
|
82
|
+
f"⚠️ Output Format Parameter Error: Multiple mutually exclusive formats specified: {format_list}\n\n"
|
|
83
|
+
f"🔒 Mutually Exclusive Parameters: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
|
|
84
|
+
f"💡 Token Efficiency Guide:\n"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
|
|
88
|
+
base_message += f" • {param}: {desc}\n"
|
|
89
|
+
|
|
90
|
+
base_message += (
|
|
91
|
+
"\n📋 Recommended Usage Patterns:\n"
|
|
92
|
+
" • Count validation: total_only=true\n"
|
|
93
|
+
" • File distribution: count_only_matches=true\n"
|
|
94
|
+
" • Initial investigation: summary_only=true\n"
|
|
95
|
+
" • Detailed review: group_by_file=true\n"
|
|
96
|
+
" • Cache only: suppress_output=true\n\n"
|
|
97
|
+
"❌ Incorrect: {\"total_only\": true, \"summary_only\": true}\n"
|
|
98
|
+
"✅ Correct: {\"total_only\": true}"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return base_message
|
|
102
|
+
|
|
103
|
+
def validate_output_format_exclusion(self, arguments: dict[str, Any]) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Validate that only one output format parameter is specified.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
arguments: Tool arguments dictionary
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
ValueError: If multiple output format parameters are specified
|
|
112
|
+
"""
|
|
113
|
+
specified_formats = []
|
|
114
|
+
|
|
115
|
+
for param in self.OUTPUT_FORMAT_PARAMS:
|
|
116
|
+
if arguments.get(param, False):
|
|
117
|
+
specified_formats.append(param)
|
|
118
|
+
|
|
119
|
+
if len(specified_formats) > 1:
|
|
120
|
+
error_message = self._get_error_message(specified_formats)
|
|
121
|
+
raise ValueError(error_message)
|
|
122
|
+
|
|
123
|
+
def get_active_format(self, arguments: dict[str, Any]) -> str:
|
|
124
|
+
"""
|
|
125
|
+
Get the active output format from arguments.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
arguments: Tool arguments dictionary
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Active format name or "normal" if none specified
|
|
132
|
+
"""
|
|
133
|
+
for param in self.OUTPUT_FORMAT_PARAMS:
|
|
134
|
+
if arguments.get(param, False):
|
|
135
|
+
return param
|
|
136
|
+
return "normal"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# Global validator instance
|
|
140
|
+
_default_validator: OutputFormatValidator | None = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_default_validator() -> OutputFormatValidator:
|
|
144
|
+
"""Get the default output format validator instance."""
|
|
145
|
+
global _default_validator
|
|
146
|
+
if _default_validator is None:
|
|
147
|
+
_default_validator = OutputFormatValidator()
|
|
148
|
+
return _default_validator
|
|
@@ -18,6 +18,7 @@ from ..utils.gitignore_detector import get_default_detector
|
|
|
18
18
|
from ..utils.search_cache import get_default_cache
|
|
19
19
|
from . import fd_rg_utils
|
|
20
20
|
from .base_tool import BaseMCPTool
|
|
21
|
+
from .output_format_validator import get_default_validator
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -53,7 +54,26 @@ class SearchContentTool(BaseMCPTool):
|
|
|
53
54
|
def get_tool_definition(self) -> dict[str, Any]:
|
|
54
55
|
return {
|
|
55
56
|
"name": "search_content",
|
|
56
|
-
"description": "Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files
|
|
57
|
+
"description": """Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files.
|
|
58
|
+
|
|
59
|
+
⚡ IMPORTANT: Token Efficiency Guide
|
|
60
|
+
Choose output format parameters based on your needs to minimize token usage and maximize performance with efficient search strategies:
|
|
61
|
+
|
|
62
|
+
📋 RECOMMENDED WORKFLOW (Most Efficient Approach):
|
|
63
|
+
1. START with total_only=true parameter for initial count validation (~10 tokens)
|
|
64
|
+
2. IF more detail needed, use count_only_matches=true parameter for file distribution (~50-200 tokens)
|
|
65
|
+
3. IF context needed, use summary_only=true parameter for overview (~500-2000 tokens)
|
|
66
|
+
4. ONLY use full results when specific content review is required (~2000-50000+ tokens)
|
|
67
|
+
|
|
68
|
+
⚡ TOKEN EFFICIENCY COMPARISON:
|
|
69
|
+
- total_only: ~10 tokens (single number) - MOST EFFICIENT for count queries
|
|
70
|
+
- count_only_matches: ~50-200 tokens (file counts) - Good for file distribution analysis
|
|
71
|
+
- summary_only: ~500-2000 tokens (condensed overview) - initial investigation
|
|
72
|
+
- group_by_file: ~2000-10000 tokens (organized by file) - Context-aware review
|
|
73
|
+
- optimize_paths: 10-30% reduction (path compression) - Use with deep directory structures
|
|
74
|
+
- Full results: ~2000-50000+ tokens - Use sparingly for detailed analysis
|
|
75
|
+
|
|
76
|
+
⚠️ MUTUALLY EXCLUSIVE: Only one output format parameter can be true at a time. Cannot be combined with other format parameters.""",
|
|
57
77
|
"inputSchema": {
|
|
58
78
|
"type": "object",
|
|
59
79
|
"properties": {
|
|
@@ -144,27 +164,27 @@ class SearchContentTool(BaseMCPTool):
|
|
|
144
164
|
"count_only_matches": {
|
|
145
165
|
"type": "boolean",
|
|
146
166
|
"default": False,
|
|
147
|
-
"description": "Return only match counts per file
|
|
167
|
+
"description": "⚡ EXCLUSIVE: Return only match counts per file (~50-200 tokens). RECOMMENDED for: File distribution analysis, understanding match spread across files. Cannot be combined with other output formats.",
|
|
148
168
|
},
|
|
149
169
|
"summary_only": {
|
|
150
170
|
"type": "boolean",
|
|
151
171
|
"default": False,
|
|
152
|
-
"description": "Return
|
|
172
|
+
"description": "⚡ EXCLUSIVE: Return condensed overview with top files and sample matches (~500-2000 tokens). RECOMMENDED for: Initial investigation, scope confirmation, pattern validation. Cannot be combined with other output formats.",
|
|
153
173
|
},
|
|
154
174
|
"optimize_paths": {
|
|
155
175
|
"type": "boolean",
|
|
156
176
|
"default": False,
|
|
157
|
-
"description": "Optimize file paths
|
|
177
|
+
"description": "⚡ EXCLUSIVE: Optimize file paths by removing common prefixes (10-30% token reduction). RECOMMENDED for: Deep directory structures, large codebases. Cannot be combined with other output formats.",
|
|
158
178
|
},
|
|
159
179
|
"group_by_file": {
|
|
160
180
|
"type": "boolean",
|
|
161
181
|
"default": False,
|
|
162
|
-
"description": "Group results by file
|
|
182
|
+
"description": "⚡ EXCLUSIVE: Group results by file, eliminating path duplication (~2000-10000 tokens). RECOMMENDED for: Context-aware review, analyzing matches within specific files. Cannot be combined with other output formats.",
|
|
163
183
|
},
|
|
164
184
|
"total_only": {
|
|
165
185
|
"type": "boolean",
|
|
166
186
|
"default": False,
|
|
167
|
-
"description": "Return only
|
|
187
|
+
"description": "⚡ EXCLUSIVE: Return only total match count as single number (~10 tokens - MOST EFFICIENT). RECOMMENDED for: Count validation, filtering decisions, existence checks. Takes priority over all other formats. Cannot be combined with other output formats.",
|
|
168
188
|
},
|
|
169
189
|
"output_file": {
|
|
170
190
|
"type": "string",
|
|
@@ -217,6 +237,10 @@ class SearchContentTool(BaseMCPTool):
|
|
|
217
237
|
return validated
|
|
218
238
|
|
|
219
239
|
def validate_arguments(self, arguments: dict[str, Any]) -> bool:
|
|
240
|
+
# Validate output format exclusion first
|
|
241
|
+
validator = get_default_validator()
|
|
242
|
+
validator.validate_output_format_exclusion(arguments)
|
|
243
|
+
|
|
220
244
|
if (
|
|
221
245
|
"query" not in arguments
|
|
222
246
|
or not isinstance(arguments["query"], str)
|
|
@@ -343,15 +367,24 @@ class SearchContentTool(BaseMCPTool):
|
|
|
343
367
|
if cached_result is not None:
|
|
344
368
|
# Check if this is a total_only request
|
|
345
369
|
total_only_requested = arguments.get("total_only", False)
|
|
346
|
-
|
|
370
|
+
|
|
347
371
|
if total_only_requested:
|
|
348
372
|
# For total_only mode, always return integer if possible
|
|
349
373
|
if isinstance(cached_result, int):
|
|
350
374
|
return cached_result
|
|
351
|
-
elif
|
|
352
|
-
|
|
375
|
+
elif (
|
|
376
|
+
isinstance(cached_result, dict)
|
|
377
|
+
and "total_matches" in cached_result
|
|
378
|
+
):
|
|
379
|
+
total_matches = cached_result["total_matches"]
|
|
380
|
+
return (
|
|
381
|
+
int(total_matches)
|
|
382
|
+
if isinstance(total_matches, (int, float))
|
|
383
|
+
else 0
|
|
384
|
+
)
|
|
353
385
|
elif isinstance(cached_result, dict) and "count" in cached_result:
|
|
354
|
-
|
|
386
|
+
count = cached_result["count"]
|
|
387
|
+
return int(count) if isinstance(count, (int, float)) else 0
|
|
355
388
|
else:
|
|
356
389
|
# Fallback: extract count from dict or return 0
|
|
357
390
|
return 0
|
|
@@ -751,11 +784,11 @@ class SearchContentTool(BaseMCPTool):
|
|
|
751
784
|
"elapsed_ms": elapsed_ms,
|
|
752
785
|
"results": matches,
|
|
753
786
|
"summary": fd_rg_utils.summarize_search_results(matches),
|
|
754
|
-
"grouped_by_file":
|
|
755
|
-
"files"
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
787
|
+
"grouped_by_file": (
|
|
788
|
+
fd_rg_utils.group_matches_by_file(matches)["files"]
|
|
789
|
+
if matches
|
|
790
|
+
else []
|
|
791
|
+
),
|
|
759
792
|
}
|
|
760
793
|
|
|
761
794
|
# Convert to JSON for file output
|