tree-sitter-analyzer 1.8.4__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +4 -4
- tree_sitter_analyzer/cli/argument_validator.py +29 -17
- tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
- tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
- tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
- tree_sitter_analyzer/cli/commands/table_command.py +8 -7
- tree_sitter_analyzer/cli/info_commands.py +1 -1
- tree_sitter_analyzer/cli_main.py +3 -2
- tree_sitter_analyzer/core/analysis_engine.py +5 -5
- tree_sitter_analyzer/core/cache_service.py +3 -1
- tree_sitter_analyzer/core/query.py +17 -5
- tree_sitter_analyzer/core/query_service.py +1 -1
- tree_sitter_analyzer/encoding_utils.py +3 -3
- tree_sitter_analyzer/exceptions.py +61 -50
- tree_sitter_analyzer/file_handler.py +3 -0
- tree_sitter_analyzer/formatters/base_formatter.py +10 -5
- tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
- tree_sitter_analyzer/formatters/html_formatter.py +90 -54
- tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
- tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
- tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
- tree_sitter_analyzer/formatters/python_formatter.py +61 -38
- tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
- tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
- tree_sitter_analyzer/language_detector.py +6 -6
- tree_sitter_analyzer/language_loader.py +3 -1
- tree_sitter_analyzer/languages/css_plugin.py +120 -61
- tree_sitter_analyzer/languages/html_plugin.py +159 -62
- tree_sitter_analyzer/languages/java_plugin.py +42 -34
- tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
- tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
- tree_sitter_analyzer/languages/python_plugin.py +111 -64
- tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
- tree_sitter_analyzer/mcp/server.py +22 -18
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
- tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
- tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
- tree_sitter_analyzer/models.py +7 -5
- tree_sitter_analyzer/plugins/base.py +9 -7
- tree_sitter_analyzer/plugins/manager.py +1 -0
- tree_sitter_analyzer/queries/css.py +2 -21
- tree_sitter_analyzer/queries/html.py +2 -15
- tree_sitter_analyzer/queries/markdown.py +30 -41
- tree_sitter_analyzer/queries/python.py +20 -5
- tree_sitter_analyzer/query_loader.py +5 -5
- tree_sitter_analyzer/security/validator.py +114 -86
- tree_sitter_analyzer/utils/__init__.py +58 -28
- tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
- tree_sitter_analyzer/utils.py +26 -15
- {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/METADATA +1 -1
- tree_sitter_analyzer-1.9.0.dist-info/RECORD +109 -0
- tree_sitter_analyzer-1.8.4.dist-info/RECORD +0 -109
- {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ except ImportError:
|
|
|
29
29
|
pass
|
|
30
30
|
|
|
31
31
|
class InitializationOptions:
|
|
32
|
-
def __init__(self, **kwargs):
|
|
32
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
35
|
class Tool:
|
|
@@ -41,7 +41,7 @@ except ImportError:
|
|
|
41
41
|
class TextContent:
|
|
42
42
|
pass
|
|
43
43
|
|
|
44
|
-
def stdio_server():
|
|
44
|
+
def stdio_server() -> None:
|
|
45
45
|
pass
|
|
46
46
|
|
|
47
47
|
|
|
@@ -71,7 +71,7 @@ from .tools.table_format_tool import TableFormatTool
|
|
|
71
71
|
try:
|
|
72
72
|
from .tools.universal_analyze_tool import UniversalAnalyzeTool
|
|
73
73
|
except ImportError:
|
|
74
|
-
UniversalAnalyzeTool = None
|
|
74
|
+
UniversalAnalyzeTool: type[Any] | None = None
|
|
75
75
|
|
|
76
76
|
# Set up logging
|
|
77
77
|
logger = setup_logger(__name__)
|
|
@@ -85,7 +85,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
85
85
|
integrating with existing analyzer components.
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
|
-
def __init__(self, project_root: str = None) -> None:
|
|
88
|
+
def __init__(self, project_root: str | None = None) -> None:
|
|
89
89
|
"""Initialize the MCP server with analyzer components."""
|
|
90
90
|
self.server: Server | None = None
|
|
91
91
|
self._initialization_complete = False
|
|
@@ -116,9 +116,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
116
116
|
try:
|
|
117
117
|
self.universal_analyze_tool = UniversalAnalyzeTool(project_root)
|
|
118
118
|
except Exception:
|
|
119
|
-
self.universal_analyze_tool = None
|
|
119
|
+
self.universal_analyze_tool: Any = None
|
|
120
120
|
else:
|
|
121
|
-
self.universal_analyze_tool = None
|
|
121
|
+
self.universal_analyze_tool: Any = None
|
|
122
122
|
|
|
123
123
|
# Initialize MCP resources
|
|
124
124
|
self.code_file_resource = CodeFileResource()
|
|
@@ -132,7 +132,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
132
132
|
|
|
133
133
|
self._initialization_complete = True
|
|
134
134
|
try:
|
|
135
|
-
logger.info(
|
|
135
|
+
logger.info(
|
|
136
|
+
f"MCP server initialization complete: {self.name} v{self.version}"
|
|
137
|
+
)
|
|
136
138
|
except Exception:
|
|
137
139
|
# Gracefully handle logging failures during initialization
|
|
138
140
|
pass
|
|
@@ -215,7 +217,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
215
217
|
|
|
216
218
|
if analysis_result is None or not analysis_result.success:
|
|
217
219
|
error_msg = (
|
|
218
|
-
analysis_result.error_message
|
|
220
|
+
analysis_result.error_message or "Unknown error"
|
|
221
|
+
if analysis_result
|
|
222
|
+
else "Unknown error"
|
|
219
223
|
)
|
|
220
224
|
raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
|
|
221
225
|
|
|
@@ -293,7 +297,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
293
297
|
if hasattr(elem, "__dict__"):
|
|
294
298
|
detailed_elements.append(elem.__dict__)
|
|
295
299
|
else:
|
|
296
|
-
detailed_elements.append(str(elem))
|
|
300
|
+
detailed_elements.append({"element": str(elem)})
|
|
297
301
|
result["detailed_elements"] = detailed_elements
|
|
298
302
|
|
|
299
303
|
return result
|
|
@@ -301,24 +305,24 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
301
305
|
async def _read_resource(self, uri: str) -> dict[str, Any]:
|
|
302
306
|
"""
|
|
303
307
|
Read a resource by URI.
|
|
304
|
-
|
|
308
|
+
|
|
305
309
|
Args:
|
|
306
310
|
uri: Resource URI to read
|
|
307
|
-
|
|
311
|
+
|
|
308
312
|
Returns:
|
|
309
313
|
Resource content
|
|
310
|
-
|
|
314
|
+
|
|
311
315
|
Raises:
|
|
312
316
|
ValueError: If URI is invalid or resource not found
|
|
313
317
|
"""
|
|
314
318
|
if uri.startswith("code://file/"):
|
|
315
319
|
# Extract file path from URI
|
|
316
|
-
|
|
317
|
-
return
|
|
320
|
+
result = await self.code_file_resource.read_resource(uri)
|
|
321
|
+
return {"content": result}
|
|
318
322
|
elif uri.startswith("code://stats/"):
|
|
319
323
|
# Extract stats type from URI
|
|
320
|
-
|
|
321
|
-
return
|
|
324
|
+
result = await self.project_stats_resource.read_resource(uri)
|
|
325
|
+
return {"content": result}
|
|
322
326
|
else:
|
|
323
327
|
raise ValueError(f"Unknown resource URI: {uri}")
|
|
324
328
|
|
|
@@ -727,7 +731,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
727
731
|
pass # Silently ignore logging errors during shutdown
|
|
728
732
|
|
|
729
733
|
|
|
730
|
-
def parse_mcp_args(args=None) -> argparse.Namespace:
|
|
734
|
+
def parse_mcp_args(args: list[str] | None = None) -> argparse.Namespace:
|
|
731
735
|
"""Parse command line arguments for MCP server."""
|
|
732
736
|
parser = argparse.ArgumentParser(
|
|
733
737
|
description="Tree-sitter Analyzer MCP Server",
|
|
@@ -798,7 +802,7 @@ async def main() -> None:
|
|
|
798
802
|
|
|
799
803
|
server = TreeSitterAnalyzerMCPServer(project_root)
|
|
800
804
|
await server.run()
|
|
801
|
-
|
|
805
|
+
|
|
802
806
|
# Exit successfully after server run completes
|
|
803
807
|
sys.exit(0)
|
|
804
808
|
except KeyboardInterrupt:
|
|
@@ -36,7 +36,7 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
36
36
|
for LLM workflow efficiency.
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
-
def __init__(self, project_root: str = None) -> None:
|
|
39
|
+
def __init__(self, project_root: str | None = None) -> None:
|
|
40
40
|
"""Initialize the analyze scale tool."""
|
|
41
41
|
# Use unified analysis engine instead of deprecated AdvancedAnalyzer
|
|
42
42
|
super().__init__(project_root)
|
|
@@ -464,7 +464,7 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
464
464
|
universal_result = await self.analysis_engine.analyze(request)
|
|
465
465
|
if not universal_result or not universal_result.success:
|
|
466
466
|
error_msg = (
|
|
467
|
-
universal_result.error_message
|
|
467
|
+
universal_result.error_message or "Unknown error"
|
|
468
468
|
if universal_result
|
|
469
469
|
else "Unknown error"
|
|
470
470
|
)
|
|
@@ -708,12 +708,12 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
708
708
|
) -> dict[str, Any]:
|
|
709
709
|
"""
|
|
710
710
|
Create analysis result for JSON files.
|
|
711
|
-
|
|
711
|
+
|
|
712
712
|
Args:
|
|
713
713
|
file_path: Path to the JSON file
|
|
714
714
|
file_metrics: Basic file metrics
|
|
715
715
|
include_guidance: Whether to include guidance
|
|
716
|
-
|
|
716
|
+
|
|
717
717
|
Returns:
|
|
718
718
|
Analysis result for JSON file
|
|
719
719
|
"""
|
|
@@ -723,7 +723,8 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
723
723
|
"language": "json",
|
|
724
724
|
"file_size_bytes": file_metrics["file_size_bytes"],
|
|
725
725
|
"total_lines": file_metrics["total_lines"],
|
|
726
|
-
"non_empty_lines": file_metrics["total_lines"]
|
|
726
|
+
"non_empty_lines": file_metrics["total_lines"]
|
|
727
|
+
- file_metrics["blank_lines"],
|
|
727
728
|
"estimated_tokens": file_metrics["estimated_tokens"],
|
|
728
729
|
"complexity_metrics": {
|
|
729
730
|
"total_elements": 0,
|
|
@@ -735,14 +736,18 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
735
736
|
"methods": [],
|
|
736
737
|
"fields": [],
|
|
737
738
|
},
|
|
738
|
-
"scale_category": "small"
|
|
739
|
+
"scale_category": "small"
|
|
740
|
+
if file_metrics["total_lines"] < 100
|
|
741
|
+
else "medium"
|
|
742
|
+
if file_metrics["total_lines"] < 1000
|
|
743
|
+
else "large",
|
|
739
744
|
"analysis_recommendations": {
|
|
740
745
|
"suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
|
|
741
746
|
"recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
|
|
742
747
|
"token_efficiency_notes": "JSON files can be read directly without tree-sitter parsing",
|
|
743
748
|
},
|
|
744
749
|
}
|
|
745
|
-
|
|
750
|
+
|
|
746
751
|
if include_guidance:
|
|
747
752
|
result["llm_analysis_guidance"] = {
|
|
748
753
|
"file_characteristics": "JSON configuration/data file",
|
|
@@ -750,7 +755,7 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
750
755
|
"token_optimization": "Use simple file reading tools for JSON content",
|
|
751
756
|
"analysis_focus": "Data structure and configuration values",
|
|
752
757
|
}
|
|
753
|
-
|
|
758
|
+
|
|
754
759
|
return result
|
|
755
760
|
|
|
756
761
|
def get_tool_definition(self) -> dict[str, Any]:
|
|
@@ -121,7 +121,7 @@ class MCPTool(BaseMCPTool):
|
|
|
121
121
|
Returns:
|
|
122
122
|
Dictionary containing execution results
|
|
123
123
|
"""
|
|
124
|
-
|
|
124
|
+
raise NotImplementedError("Subclasses must implement execute method")
|
|
125
125
|
|
|
126
126
|
def validate_arguments(self, arguments: dict[str, Any]) -> bool:
|
|
127
127
|
"""
|
|
@@ -136,4 +136,4 @@ class MCPTool(BaseMCPTool):
|
|
|
136
136
|
Raises:
|
|
137
137
|
ValueError: If arguments are invalid
|
|
138
138
|
"""
|
|
139
|
-
|
|
139
|
+
raise NotImplementedError("Subclasses must implement validate_arguments method")
|
|
@@ -84,7 +84,7 @@ async def run_command_capture(
|
|
|
84
84
|
if cmd and not check_external_command(cmd[0]):
|
|
85
85
|
error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
|
|
86
86
|
return 127, b"", error_msg.encode()
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
try:
|
|
89
89
|
# Create process
|
|
90
90
|
proc = await asyncio.create_subprocess_exec(
|
|
@@ -106,7 +106,7 @@ async def run_command_capture(
|
|
|
106
106
|
stdout, stderr = await asyncio.wait_for(
|
|
107
107
|
proc.communicate(input=input_data), timeout=timeout_s
|
|
108
108
|
)
|
|
109
|
-
return proc.returncode, stdout, stderr
|
|
109
|
+
return proc.returncode or 0, stdout, stderr
|
|
110
110
|
except asyncio.TimeoutError:
|
|
111
111
|
try:
|
|
112
112
|
proc.kill()
|
|
@@ -222,7 +222,7 @@ def build_rg_command(
|
|
|
222
222
|
"""Build ripgrep command with JSON output and options."""
|
|
223
223
|
if count_only_matches:
|
|
224
224
|
# Use --count-matches for count-only mode (no JSON output)
|
|
225
|
-
cmd
|
|
225
|
+
cmd = [
|
|
226
226
|
"rg",
|
|
227
227
|
"--count-matches",
|
|
228
228
|
"--no-heading",
|
|
@@ -231,7 +231,7 @@ def build_rg_command(
|
|
|
231
231
|
]
|
|
232
232
|
else:
|
|
233
233
|
# Use --json for full match details
|
|
234
|
-
cmd
|
|
234
|
+
cmd = [
|
|
235
235
|
"rg",
|
|
236
236
|
"--json",
|
|
237
237
|
"--no-heading",
|
|
@@ -286,11 +286,15 @@ def build_rg_command(
|
|
|
286
286
|
# Normalize filesize
|
|
287
287
|
cmd += ["--max-filesize", normalize_max_filesize(max_filesize)]
|
|
288
288
|
|
|
289
|
-
#
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
289
|
+
# Add timeout if provided and > 0 (enable timeout for performance optimization)
|
|
290
|
+
if timeout_ms is not None and timeout_ms > 0:
|
|
291
|
+
# effective_timeout = clamp_int(
|
|
292
|
+
# timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS
|
|
293
|
+
# ) # Commented out as not used yet
|
|
294
|
+
# Use timeout in milliseconds for better control
|
|
295
|
+
# Note: We'll handle timeout at the process level instead of ripgrep flag
|
|
296
|
+
# to ensure compatibility across ripgrep versions
|
|
297
|
+
pass
|
|
294
298
|
|
|
295
299
|
# Query must be last before roots/files
|
|
296
300
|
cmd.append(query)
|
|
@@ -307,39 +311,63 @@ def build_rg_command(
|
|
|
307
311
|
def parse_rg_json_lines_to_matches(stdout_bytes: bytes) -> list[dict[str, Any]]:
|
|
308
312
|
"""Parse ripgrep JSON event stream and keep only match events."""
|
|
309
313
|
results: list[dict[str, Any]] = []
|
|
310
|
-
|
|
314
|
+
lines = stdout_bytes.splitlines()
|
|
315
|
+
|
|
316
|
+
# Batch process lines for better performance
|
|
317
|
+
for raw_line in lines:
|
|
311
318
|
if not raw_line.strip():
|
|
312
319
|
continue
|
|
313
320
|
try:
|
|
314
|
-
|
|
321
|
+
# Decode once and parse JSON
|
|
322
|
+
line_str = raw_line.decode("utf-8", errors="replace")
|
|
323
|
+
evt = json.loads(line_str)
|
|
315
324
|
except (json.JSONDecodeError, UnicodeDecodeError): # nosec B112
|
|
316
325
|
continue
|
|
326
|
+
|
|
327
|
+
# Quick type check to skip non-match events
|
|
317
328
|
if evt.get("type") != "match":
|
|
318
329
|
continue
|
|
330
|
+
|
|
319
331
|
data = evt.get("data", {})
|
|
320
|
-
|
|
332
|
+
if not data:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
# Extract data with safe defaults
|
|
336
|
+
path_data = data.get("path", {})
|
|
337
|
+
path_text = path_data.get("text") if path_data else None
|
|
338
|
+
if not path_text:
|
|
339
|
+
continue
|
|
340
|
+
|
|
321
341
|
line_number = data.get("line_number")
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
342
|
+
lines_data = data.get("lines", {})
|
|
343
|
+
line_text = lines_data.get("text") if lines_data else ""
|
|
344
|
+
|
|
345
|
+
# Normalize line content to reduce token usage (optimized)
|
|
325
346
|
normalized_line = " ".join(line_text.split()) if line_text else ""
|
|
326
347
|
|
|
327
|
-
# Simplify submatches -
|
|
348
|
+
# Simplify submatches - keep only essential position data
|
|
349
|
+
submatches_raw = data.get("submatches", [])
|
|
328
350
|
simplified_matches = []
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
351
|
+
if submatches_raw:
|
|
352
|
+
for sm in submatches_raw:
|
|
353
|
+
start = sm.get("start")
|
|
354
|
+
end = sm.get("end")
|
|
355
|
+
if start is not None and end is not None:
|
|
356
|
+
simplified_matches.append([start, end])
|
|
334
357
|
|
|
335
358
|
results.append(
|
|
336
359
|
{
|
|
337
360
|
"file": path_text,
|
|
338
|
-
"line": line_number,
|
|
339
|
-
"text": normalized_line,
|
|
340
|
-
"matches": simplified_matches,
|
|
361
|
+
"line": line_number,
|
|
362
|
+
"text": normalized_line,
|
|
363
|
+
"matches": simplified_matches,
|
|
341
364
|
}
|
|
342
365
|
)
|
|
366
|
+
|
|
367
|
+
# Early exit if we have too many results to prevent memory issues
|
|
368
|
+
if len(results) >= MAX_RESULTS_HARD_CAP:
|
|
369
|
+
break
|
|
370
|
+
|
|
343
371
|
return results
|
|
344
372
|
|
|
345
373
|
|
|
@@ -572,7 +600,9 @@ class TempFileList:
|
|
|
572
600
|
def __enter__(self) -> TempFileList:
|
|
573
601
|
return self
|
|
574
602
|
|
|
575
|
-
def __exit__(
|
|
603
|
+
def __exit__(
|
|
604
|
+
self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: Any
|
|
605
|
+
) -> None:
|
|
576
606
|
with contextlib.suppress(Exception):
|
|
577
607
|
Path(self.path).unlink(missing_ok=True)
|
|
578
608
|
|
|
@@ -585,7 +615,12 @@ class contextlib: # minimal shim for suppress without importing globally
|
|
|
585
615
|
def __enter__(self) -> None: # noqa: D401
|
|
586
616
|
return None
|
|
587
617
|
|
|
588
|
-
def __exit__(
|
|
618
|
+
def __exit__(
|
|
619
|
+
self,
|
|
620
|
+
exc_type: type[BaseException] | None,
|
|
621
|
+
exc: BaseException | None,
|
|
622
|
+
tb: Any,
|
|
623
|
+
) -> bool:
|
|
589
624
|
return exc_type is not None and issubclass(exc_type, self.exceptions)
|
|
590
625
|
|
|
591
626
|
|
|
@@ -595,3 +630,174 @@ def write_files_to_temp(files: list[str]) -> TempFileList:
|
|
|
595
630
|
content = "\n".join(files)
|
|
596
631
|
Path(temp_path).write_text(content, encoding="utf-8")
|
|
597
632
|
return TempFileList(path=temp_path)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
async def run_parallel_rg_searches(
|
|
636
|
+
commands: list[list[str]],
|
|
637
|
+
timeout_ms: int | None = None,
|
|
638
|
+
max_concurrent: int = 4,
|
|
639
|
+
) -> list[tuple[int, bytes, bytes]]:
|
|
640
|
+
"""
|
|
641
|
+
Run multiple ripgrep commands in parallel with concurrency control.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
commands: List of ripgrep command lists to execute
|
|
645
|
+
timeout_ms: Timeout in milliseconds for each command
|
|
646
|
+
max_concurrent: Maximum number of concurrent processes (default: 4)
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
List of (returncode, stdout, stderr) tuples in the same order as commands
|
|
650
|
+
"""
|
|
651
|
+
if not commands:
|
|
652
|
+
return []
|
|
653
|
+
|
|
654
|
+
# Create semaphore to limit concurrent processes
|
|
655
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
656
|
+
|
|
657
|
+
async def run_single_command(cmd: list[str]) -> tuple[int, bytes, bytes]:
|
|
658
|
+
async with semaphore:
|
|
659
|
+
return await run_command_capture(cmd, timeout_ms=timeout_ms)
|
|
660
|
+
|
|
661
|
+
# Execute all commands concurrently
|
|
662
|
+
tasks = [run_single_command(cmd) for cmd in commands]
|
|
663
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
664
|
+
|
|
665
|
+
# Handle exceptions and convert to proper format
|
|
666
|
+
processed_results: list[tuple[int, bytes, bytes]] = []
|
|
667
|
+
for _i, result in enumerate(results):
|
|
668
|
+
if isinstance(result, Exception):
|
|
669
|
+
# Convert exception to error result
|
|
670
|
+
error_msg = f"Command failed: {str(result)}"
|
|
671
|
+
processed_results.append((1, b"", error_msg.encode()))
|
|
672
|
+
elif isinstance(result, tuple) and len(result) == 3:
|
|
673
|
+
processed_results.append(result)
|
|
674
|
+
else:
|
|
675
|
+
# Fallback for unexpected result types
|
|
676
|
+
processed_results.append((1, b"", b"Unexpected result type"))
|
|
677
|
+
|
|
678
|
+
return processed_results
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def merge_rg_results(
|
|
682
|
+
results: list[tuple[int, bytes, bytes]],
|
|
683
|
+
count_only_mode: bool = False,
|
|
684
|
+
) -> tuple[int, bytes, bytes]:
|
|
685
|
+
"""
|
|
686
|
+
Merge results from multiple ripgrep executions.
|
|
687
|
+
|
|
688
|
+
Args:
|
|
689
|
+
results: List of (returncode, stdout, stderr) tuples
|
|
690
|
+
count_only_mode: Whether the results are from count-only mode
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Merged (returncode, stdout, stderr) tuple
|
|
694
|
+
"""
|
|
695
|
+
if not results:
|
|
696
|
+
return (1, b"", b"No results to merge")
|
|
697
|
+
|
|
698
|
+
# Check if any command failed critically (not just "no matches found")
|
|
699
|
+
critical_failures = []
|
|
700
|
+
successful_results = []
|
|
701
|
+
|
|
702
|
+
for rc, stdout, stderr in results:
|
|
703
|
+
if rc not in (0, 1): # 0=matches found, 1=no matches, others=errors
|
|
704
|
+
critical_failures.append((rc, stdout, stderr))
|
|
705
|
+
else:
|
|
706
|
+
successful_results.append((rc, stdout, stderr))
|
|
707
|
+
|
|
708
|
+
# If all commands failed critically, return the first failure
|
|
709
|
+
if not successful_results:
|
|
710
|
+
return critical_failures[0] if critical_failures else (1, b"", b"")
|
|
711
|
+
|
|
712
|
+
# Merge successful results
|
|
713
|
+
if count_only_mode:
|
|
714
|
+
return _merge_count_results(successful_results)
|
|
715
|
+
else:
|
|
716
|
+
return _merge_json_results(successful_results)
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def _merge_count_results(
|
|
720
|
+
results: list[tuple[int, bytes, bytes]],
|
|
721
|
+
) -> tuple[int, bytes, bytes]:
|
|
722
|
+
"""Merge count-only results from multiple ripgrep executions."""
|
|
723
|
+
merged_counts: dict[str, int] = {}
|
|
724
|
+
total_matches = 0
|
|
725
|
+
|
|
726
|
+
for rc, stdout, _stderr in results:
|
|
727
|
+
if rc in (0, 1): # Success or no matches
|
|
728
|
+
file_counts = parse_rg_count_output(stdout)
|
|
729
|
+
# Remove the __total__ key and merge file counts
|
|
730
|
+
for file_path, count in file_counts.items():
|
|
731
|
+
if file_path != "__total__":
|
|
732
|
+
merged_counts[file_path] = merged_counts.get(file_path, 0) + count
|
|
733
|
+
total_matches += count
|
|
734
|
+
|
|
735
|
+
# Format as ripgrep count output
|
|
736
|
+
output_lines = []
|
|
737
|
+
for file_path, count in merged_counts.items():
|
|
738
|
+
output_lines.append(f"{file_path}:{count}")
|
|
739
|
+
|
|
740
|
+
merged_stdout = "\n".join(output_lines).encode("utf-8")
|
|
741
|
+
|
|
742
|
+
# Return code 0 if we have matches, 1 if no matches
|
|
743
|
+
return_code = 0 if total_matches > 0 else 1
|
|
744
|
+
return (return_code, merged_stdout, b"")
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def _merge_json_results(
|
|
748
|
+
results: list[tuple[int, bytes, bytes]],
|
|
749
|
+
) -> tuple[int, bytes, bytes]:
|
|
750
|
+
"""Merge JSON results from multiple ripgrep executions."""
|
|
751
|
+
merged_lines = []
|
|
752
|
+
has_matches = False
|
|
753
|
+
|
|
754
|
+
for rc, stdout, _stderr in results:
|
|
755
|
+
if rc in (0, 1): # Success or no matches
|
|
756
|
+
if stdout.strip():
|
|
757
|
+
merged_lines.extend(stdout.splitlines())
|
|
758
|
+
if rc == 0: # Has matches
|
|
759
|
+
has_matches = True
|
|
760
|
+
|
|
761
|
+
merged_stdout = b"\n".join(merged_lines)
|
|
762
|
+
return_code = 0 if has_matches else 1
|
|
763
|
+
return (return_code, merged_stdout, b"")
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def split_roots_for_parallel_processing(
|
|
767
|
+
roots: list[str], max_chunks: int = 4
|
|
768
|
+
) -> list[list[str]]:
|
|
769
|
+
"""
|
|
770
|
+
Split roots into chunks for parallel processing.
|
|
771
|
+
|
|
772
|
+
Args:
|
|
773
|
+
roots: List of root directories
|
|
774
|
+
max_chunks: Maximum number of chunks to create
|
|
775
|
+
|
|
776
|
+
Returns:
|
|
777
|
+
List of root chunks for parallel processing
|
|
778
|
+
"""
|
|
779
|
+
if not roots:
|
|
780
|
+
return []
|
|
781
|
+
|
|
782
|
+
if len(roots) <= max_chunks:
|
|
783
|
+
# Each root gets its own chunk
|
|
784
|
+
return [[root] for root in roots]
|
|
785
|
+
|
|
786
|
+
# Distribute roots across chunks
|
|
787
|
+
chunk_size = len(roots) // max_chunks
|
|
788
|
+
remainder = len(roots) % max_chunks
|
|
789
|
+
|
|
790
|
+
chunks = []
|
|
791
|
+
start = 0
|
|
792
|
+
|
|
793
|
+
for i in range(max_chunks):
|
|
794
|
+
# Add one extra item to first 'remainder' chunks
|
|
795
|
+
current_chunk_size = chunk_size + (1 if i < remainder else 0)
|
|
796
|
+
end = start + current_chunk_size
|
|
797
|
+
|
|
798
|
+
if start < len(roots):
|
|
799
|
+
chunks.append(roots[start:end])
|
|
800
|
+
|
|
801
|
+
start = end
|
|
802
|
+
|
|
803
|
+
return [chunk for chunk in chunks if chunk] # Remove empty chunks
|