tree-sitter-analyzer 1.8.4__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +4 -4
  3. tree_sitter_analyzer/cli/argument_validator.py +29 -17
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
  5. tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
  6. tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
  7. tree_sitter_analyzer/cli/commands/table_command.py +8 -7
  8. tree_sitter_analyzer/cli/info_commands.py +1 -1
  9. tree_sitter_analyzer/cli_main.py +3 -2
  10. tree_sitter_analyzer/core/analysis_engine.py +5 -5
  11. tree_sitter_analyzer/core/cache_service.py +3 -1
  12. tree_sitter_analyzer/core/query.py +17 -5
  13. tree_sitter_analyzer/core/query_service.py +1 -1
  14. tree_sitter_analyzer/encoding_utils.py +3 -3
  15. tree_sitter_analyzer/exceptions.py +61 -50
  16. tree_sitter_analyzer/file_handler.py +3 -0
  17. tree_sitter_analyzer/formatters/base_formatter.py +10 -5
  18. tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
  19. tree_sitter_analyzer/formatters/html_formatter.py +90 -64
  20. tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
  21. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
  22. tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
  23. tree_sitter_analyzer/formatters/python_formatter.py +61 -38
  24. tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
  25. tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
  26. tree_sitter_analyzer/language_detector.py +6 -6
  27. tree_sitter_analyzer/language_loader.py +3 -1
  28. tree_sitter_analyzer/languages/css_plugin.py +120 -61
  29. tree_sitter_analyzer/languages/html_plugin.py +159 -62
  30. tree_sitter_analyzer/languages/java_plugin.py +42 -34
  31. tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
  32. tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
  33. tree_sitter_analyzer/languages/python_plugin.py +111 -64
  34. tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
  35. tree_sitter_analyzer/mcp/server.py +22 -18
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
  37. tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
  38. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
  39. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
  40. tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
  41. tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
  42. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
  43. tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
  44. tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
  45. tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
  46. tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
  47. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
  48. tree_sitter_analyzer/models.py +7 -5
  49. tree_sitter_analyzer/plugins/base.py +9 -7
  50. tree_sitter_analyzer/plugins/manager.py +1 -0
  51. tree_sitter_analyzer/queries/css.py +2 -21
  52. tree_sitter_analyzer/queries/html.py +2 -15
  53. tree_sitter_analyzer/queries/markdown.py +30 -41
  54. tree_sitter_analyzer/queries/python.py +20 -5
  55. tree_sitter_analyzer/query_loader.py +5 -5
  56. tree_sitter_analyzer/security/validator.py +114 -86
  57. tree_sitter_analyzer/utils/__init__.py +58 -28
  58. tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
  59. tree_sitter_analyzer/utils.py +26 -15
  60. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/METADATA +23 -6
  61. tree_sitter_analyzer-1.9.1.dist-info/RECORD +109 -0
  62. tree_sitter_analyzer-1.8.4.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ except ImportError:
29
29
  pass
30
30
 
31
31
  class InitializationOptions:
32
- def __init__(self, **kwargs):
32
+ def __init__(self, **kwargs: Any) -> None:
33
33
  pass
34
34
 
35
35
  class Tool:
@@ -41,7 +41,7 @@ except ImportError:
41
41
  class TextContent:
42
42
  pass
43
43
 
44
- def stdio_server():
44
+ def stdio_server() -> None:
45
45
  pass
46
46
 
47
47
 
@@ -71,7 +71,7 @@ from .tools.table_format_tool import TableFormatTool
71
71
  try:
72
72
  from .tools.universal_analyze_tool import UniversalAnalyzeTool
73
73
  except ImportError:
74
- UniversalAnalyzeTool = None
74
+ UniversalAnalyzeTool: type[Any] | None = None
75
75
 
76
76
  # Set up logging
77
77
  logger = setup_logger(__name__)
@@ -85,7 +85,7 @@ class TreeSitterAnalyzerMCPServer:
85
85
  integrating with existing analyzer components.
86
86
  """
87
87
 
88
- def __init__(self, project_root: str = None) -> None:
88
+ def __init__(self, project_root: str | None = None) -> None:
89
89
  """Initialize the MCP server with analyzer components."""
90
90
  self.server: Server | None = None
91
91
  self._initialization_complete = False
@@ -116,9 +116,9 @@ class TreeSitterAnalyzerMCPServer:
116
116
  try:
117
117
  self.universal_analyze_tool = UniversalAnalyzeTool(project_root)
118
118
  except Exception:
119
- self.universal_analyze_tool = None
119
+ self.universal_analyze_tool: Any = None
120
120
  else:
121
- self.universal_analyze_tool = None
121
+ self.universal_analyze_tool: Any = None
122
122
 
123
123
  # Initialize MCP resources
124
124
  self.code_file_resource = CodeFileResource()
@@ -132,7 +132,9 @@ class TreeSitterAnalyzerMCPServer:
132
132
 
133
133
  self._initialization_complete = True
134
134
  try:
135
- logger.info(f"MCP server initialization complete: {self.name} v{self.version}")
135
+ logger.info(
136
+ f"MCP server initialization complete: {self.name} v{self.version}"
137
+ )
136
138
  except Exception:
137
139
  # Gracefully handle logging failures during initialization
138
140
  pass
@@ -215,7 +217,9 @@ class TreeSitterAnalyzerMCPServer:
215
217
 
216
218
  if analysis_result is None or not analysis_result.success:
217
219
  error_msg = (
218
- analysis_result.error_message if analysis_result else "Unknown error"
220
+ analysis_result.error_message or "Unknown error"
221
+ if analysis_result
222
+ else "Unknown error"
219
223
  )
220
224
  raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
221
225
 
@@ -293,7 +297,7 @@ class TreeSitterAnalyzerMCPServer:
293
297
  if hasattr(elem, "__dict__"):
294
298
  detailed_elements.append(elem.__dict__)
295
299
  else:
296
- detailed_elements.append(str(elem))
300
+ detailed_elements.append({"element": str(elem)})
297
301
  result["detailed_elements"] = detailed_elements
298
302
 
299
303
  return result
@@ -301,24 +305,24 @@ class TreeSitterAnalyzerMCPServer:
301
305
  async def _read_resource(self, uri: str) -> dict[str, Any]:
302
306
  """
303
307
  Read a resource by URI.
304
-
308
+
305
309
  Args:
306
310
  uri: Resource URI to read
307
-
311
+
308
312
  Returns:
309
313
  Resource content
310
-
314
+
311
315
  Raises:
312
316
  ValueError: If URI is invalid or resource not found
313
317
  """
314
318
  if uri.startswith("code://file/"):
315
319
  # Extract file path from URI
316
- file_path = uri.replace("code://file/", "")
317
- return await self.code_file_resource.read_resource(uri)
320
+ result = await self.code_file_resource.read_resource(uri)
321
+ return {"content": result}
318
322
  elif uri.startswith("code://stats/"):
319
323
  # Extract stats type from URI
320
- stats_type = uri.replace("code://stats/", "")
321
- return await self.project_stats_resource.read_resource(uri)
324
+ result = await self.project_stats_resource.read_resource(uri)
325
+ return {"content": result}
322
326
  else:
323
327
  raise ValueError(f"Unknown resource URI: {uri}")
324
328
 
@@ -727,7 +731,7 @@ class TreeSitterAnalyzerMCPServer:
727
731
  pass # Silently ignore logging errors during shutdown
728
732
 
729
733
 
730
- def parse_mcp_args(args=None) -> argparse.Namespace:
734
+ def parse_mcp_args(args: list[str] | None = None) -> argparse.Namespace:
731
735
  """Parse command line arguments for MCP server."""
732
736
  parser = argparse.ArgumentParser(
733
737
  description="Tree-sitter Analyzer MCP Server",
@@ -798,7 +802,7 @@ async def main() -> None:
798
802
 
799
803
  server = TreeSitterAnalyzerMCPServer(project_root)
800
804
  await server.run()
801
-
805
+
802
806
  # Exit successfully after server run completes
803
807
  sys.exit(0)
804
808
  except KeyboardInterrupt:
@@ -36,7 +36,7 @@ class AnalyzeScaleTool(BaseMCPTool):
36
36
  for LLM workflow efficiency.
37
37
  """
38
38
 
39
- def __init__(self, project_root: str = None) -> None:
39
+ def __init__(self, project_root: str | None = None) -> None:
40
40
  """Initialize the analyze scale tool."""
41
41
  # Use unified analysis engine instead of deprecated AdvancedAnalyzer
42
42
  super().__init__(project_root)
@@ -464,7 +464,7 @@ class AnalyzeScaleTool(BaseMCPTool):
464
464
  universal_result = await self.analysis_engine.analyze(request)
465
465
  if not universal_result or not universal_result.success:
466
466
  error_msg = (
467
- universal_result.error_message
467
+ universal_result.error_message or "Unknown error"
468
468
  if universal_result
469
469
  else "Unknown error"
470
470
  )
@@ -708,12 +708,12 @@ class AnalyzeScaleTool(BaseMCPTool):
708
708
  ) -> dict[str, Any]:
709
709
  """
710
710
  Create analysis result for JSON files.
711
-
711
+
712
712
  Args:
713
713
  file_path: Path to the JSON file
714
714
  file_metrics: Basic file metrics
715
715
  include_guidance: Whether to include guidance
716
-
716
+
717
717
  Returns:
718
718
  Analysis result for JSON file
719
719
  """
@@ -723,7 +723,8 @@ class AnalyzeScaleTool(BaseMCPTool):
723
723
  "language": "json",
724
724
  "file_size_bytes": file_metrics["file_size_bytes"],
725
725
  "total_lines": file_metrics["total_lines"],
726
- "non_empty_lines": file_metrics["total_lines"] - file_metrics["blank_lines"],
726
+ "non_empty_lines": file_metrics["total_lines"]
727
+ - file_metrics["blank_lines"],
727
728
  "estimated_tokens": file_metrics["estimated_tokens"],
728
729
  "complexity_metrics": {
729
730
  "total_elements": 0,
@@ -735,14 +736,18 @@ class AnalyzeScaleTool(BaseMCPTool):
735
736
  "methods": [],
736
737
  "fields": [],
737
738
  },
738
- "scale_category": "small" if file_metrics["total_lines"] < 100 else "medium" if file_metrics["total_lines"] < 1000 else "large",
739
+ "scale_category": "small"
740
+ if file_metrics["total_lines"] < 100
741
+ else "medium"
742
+ if file_metrics["total_lines"] < 1000
743
+ else "large",
739
744
  "analysis_recommendations": {
740
745
  "suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
741
746
  "recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
742
747
  "token_efficiency_notes": "JSON files can be read directly without tree-sitter parsing",
743
748
  },
744
749
  }
745
-
750
+
746
751
  if include_guidance:
747
752
  result["llm_analysis_guidance"] = {
748
753
  "file_characteristics": "JSON configuration/data file",
@@ -750,7 +755,7 @@ class AnalyzeScaleTool(BaseMCPTool):
750
755
  "token_optimization": "Use simple file reading tools for JSON content",
751
756
  "analysis_focus": "Data structure and configuration values",
752
757
  }
753
-
758
+
754
759
  return result
755
760
 
756
761
  def get_tool_definition(self) -> dict[str, Any]:
@@ -121,7 +121,7 @@ class MCPTool(BaseMCPTool):
121
121
  Returns:
122
122
  Dictionary containing execution results
123
123
  """
124
- ...
124
+ raise NotImplementedError("Subclasses must implement execute method")
125
125
 
126
126
  def validate_arguments(self, arguments: dict[str, Any]) -> bool:
127
127
  """
@@ -136,4 +136,4 @@ class MCPTool(BaseMCPTool):
136
136
  Raises:
137
137
  ValueError: If arguments are invalid
138
138
  """
139
- ...
139
+ raise NotImplementedError("Subclasses must implement validate_arguments method")
@@ -84,7 +84,7 @@ async def run_command_capture(
84
84
  if cmd and not check_external_command(cmd[0]):
85
85
  error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
86
86
  return 127, b"", error_msg.encode()
87
-
87
+
88
88
  try:
89
89
  # Create process
90
90
  proc = await asyncio.create_subprocess_exec(
@@ -106,7 +106,7 @@ async def run_command_capture(
106
106
  stdout, stderr = await asyncio.wait_for(
107
107
  proc.communicate(input=input_data), timeout=timeout_s
108
108
  )
109
- return proc.returncode, stdout, stderr
109
+ return proc.returncode or 0, stdout, stderr
110
110
  except asyncio.TimeoutError:
111
111
  try:
112
112
  proc.kill()
@@ -222,7 +222,7 @@ def build_rg_command(
222
222
  """Build ripgrep command with JSON output and options."""
223
223
  if count_only_matches:
224
224
  # Use --count-matches for count-only mode (no JSON output)
225
- cmd: list[str] = [
225
+ cmd = [
226
226
  "rg",
227
227
  "--count-matches",
228
228
  "--no-heading",
@@ -231,7 +231,7 @@ def build_rg_command(
231
231
  ]
232
232
  else:
233
233
  # Use --json for full match details
234
- cmd: list[str] = [
234
+ cmd = [
235
235
  "rg",
236
236
  "--json",
237
237
  "--no-heading",
@@ -286,11 +286,15 @@ def build_rg_command(
286
286
  # Normalize filesize
287
287
  cmd += ["--max-filesize", normalize_max_filesize(max_filesize)]
288
288
 
289
- # Only add timeout if supported (check if timeout_ms is provided and > 0)
290
- # Note: --timeout flag may not be available in all ripgrep versions
291
- # For now, we'll skip the timeout flag to ensure compatibility
292
- # effective_timeout = clamp_int(timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS)
293
- # cmd += ["--timeout", str(effective_timeout)]
289
+ # Add timeout if provided and > 0 (enable timeout for performance optimization)
290
+ if timeout_ms is not None and timeout_ms > 0:
291
+ # effective_timeout = clamp_int(
292
+ # timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS
293
+ # ) # Commented out as not used yet
294
+ # Use timeout in milliseconds for better control
295
+ # Note: We'll handle timeout at the process level instead of ripgrep flag
296
+ # to ensure compatibility across ripgrep versions
297
+ pass
294
298
 
295
299
  # Query must be last before roots/files
296
300
  cmd.append(query)
@@ -307,39 +311,63 @@ def build_rg_command(
307
311
  def parse_rg_json_lines_to_matches(stdout_bytes: bytes) -> list[dict[str, Any]]:
308
312
  """Parse ripgrep JSON event stream and keep only match events."""
309
313
  results: list[dict[str, Any]] = []
310
- for raw_line in stdout_bytes.splitlines():
314
+ lines = stdout_bytes.splitlines()
315
+
316
+ # Batch process lines for better performance
317
+ for raw_line in lines:
311
318
  if not raw_line.strip():
312
319
  continue
313
320
  try:
314
- evt = json.loads(raw_line.decode("utf-8", errors="replace"))
321
+ # Decode once and parse JSON
322
+ line_str = raw_line.decode("utf-8", errors="replace")
323
+ evt = json.loads(line_str)
315
324
  except (json.JSONDecodeError, UnicodeDecodeError): # nosec B112
316
325
  continue
326
+
327
+ # Quick type check to skip non-match events
317
328
  if evt.get("type") != "match":
318
329
  continue
330
+
319
331
  data = evt.get("data", {})
320
- path_text = (data.get("path", {}) or {}).get("text")
332
+ if not data:
333
+ continue
334
+
335
+ # Extract data with safe defaults
336
+ path_data = data.get("path", {})
337
+ path_text = path_data.get("text") if path_data else None
338
+ if not path_text:
339
+ continue
340
+
321
341
  line_number = data.get("line_number")
322
- line_text = (data.get("lines", {}) or {}).get("text")
323
- submatches_raw = data.get("submatches", []) or []
324
- # Normalize line content to reduce token usage
342
+ lines_data = data.get("lines", {})
343
+ line_text = lines_data.get("text") if lines_data else ""
344
+
345
+ # Normalize line content to reduce token usage (optimized)
325
346
  normalized_line = " ".join(line_text.split()) if line_text else ""
326
347
 
327
- # Simplify submatches - remove redundant match text, keep only positions
348
+ # Simplify submatches - keep only essential position data
349
+ submatches_raw = data.get("submatches", [])
328
350
  simplified_matches = []
329
- for sm in submatches_raw:
330
- start = sm.get("start")
331
- end = sm.get("end")
332
- if start is not None and end is not None:
333
- simplified_matches.append([start, end])
351
+ if submatches_raw:
352
+ for sm in submatches_raw:
353
+ start = sm.get("start")
354
+ end = sm.get("end")
355
+ if start is not None and end is not None:
356
+ simplified_matches.append([start, end])
334
357
 
335
358
  results.append(
336
359
  {
337
360
  "file": path_text,
338
- "line": line_number, # Shortened field name
339
- "text": normalized_line, # Normalized content
340
- "matches": simplified_matches, # Simplified match positions
361
+ "line": line_number,
362
+ "text": normalized_line,
363
+ "matches": simplified_matches,
341
364
  }
342
365
  )
366
+
367
+ # Early exit if we have too many results to prevent memory issues
368
+ if len(results) >= MAX_RESULTS_HARD_CAP:
369
+ break
370
+
343
371
  return results
344
372
 
345
373
 
@@ -572,7 +600,9 @@ class TempFileList:
572
600
  def __enter__(self) -> TempFileList:
573
601
  return self
574
602
 
575
- def __exit__(self, exc_type, exc, tb) -> None:
603
+ def __exit__(
604
+ self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: Any
605
+ ) -> None:
576
606
  with contextlib.suppress(Exception):
577
607
  Path(self.path).unlink(missing_ok=True)
578
608
 
@@ -585,7 +615,12 @@ class contextlib: # minimal shim for suppress without importing globally
585
615
  def __enter__(self) -> None: # noqa: D401
586
616
  return None
587
617
 
588
- def __exit__(self, exc_type, exc, tb) -> bool:
618
+ def __exit__(
619
+ self,
620
+ exc_type: type[BaseException] | None,
621
+ exc: BaseException | None,
622
+ tb: Any,
623
+ ) -> bool:
589
624
  return exc_type is not None and issubclass(exc_type, self.exceptions)
590
625
 
591
626
 
@@ -595,3 +630,174 @@ def write_files_to_temp(files: list[str]) -> TempFileList:
595
630
  content = "\n".join(files)
596
631
  Path(temp_path).write_text(content, encoding="utf-8")
597
632
  return TempFileList(path=temp_path)
633
+
634
+
635
+ async def run_parallel_rg_searches(
636
+ commands: list[list[str]],
637
+ timeout_ms: int | None = None,
638
+ max_concurrent: int = 4,
639
+ ) -> list[tuple[int, bytes, bytes]]:
640
+ """
641
+ Run multiple ripgrep commands in parallel with concurrency control.
642
+
643
+ Args:
644
+ commands: List of ripgrep command lists to execute
645
+ timeout_ms: Timeout in milliseconds for each command
646
+ max_concurrent: Maximum number of concurrent processes (default: 4)
647
+
648
+ Returns:
649
+ List of (returncode, stdout, stderr) tuples in the same order as commands
650
+ """
651
+ if not commands:
652
+ return []
653
+
654
+ # Create semaphore to limit concurrent processes
655
+ semaphore = asyncio.Semaphore(max_concurrent)
656
+
657
+ async def run_single_command(cmd: list[str]) -> tuple[int, bytes, bytes]:
658
+ async with semaphore:
659
+ return await run_command_capture(cmd, timeout_ms=timeout_ms)
660
+
661
+ # Execute all commands concurrently
662
+ tasks = [run_single_command(cmd) for cmd in commands]
663
+ results = await asyncio.gather(*tasks, return_exceptions=True)
664
+
665
+ # Handle exceptions and convert to proper format
666
+ processed_results: list[tuple[int, bytes, bytes]] = []
667
+ for _i, result in enumerate(results):
668
+ if isinstance(result, Exception):
669
+ # Convert exception to error result
670
+ error_msg = f"Command failed: {str(result)}"
671
+ processed_results.append((1, b"", error_msg.encode()))
672
+ elif isinstance(result, tuple) and len(result) == 3:
673
+ processed_results.append(result)
674
+ else:
675
+ # Fallback for unexpected result types
676
+ processed_results.append((1, b"", b"Unexpected result type"))
677
+
678
+ return processed_results
679
+
680
+
681
+ def merge_rg_results(
682
+ results: list[tuple[int, bytes, bytes]],
683
+ count_only_mode: bool = False,
684
+ ) -> tuple[int, bytes, bytes]:
685
+ """
686
+ Merge results from multiple ripgrep executions.
687
+
688
+ Args:
689
+ results: List of (returncode, stdout, stderr) tuples
690
+ count_only_mode: Whether the results are from count-only mode
691
+
692
+ Returns:
693
+ Merged (returncode, stdout, stderr) tuple
694
+ """
695
+ if not results:
696
+ return (1, b"", b"No results to merge")
697
+
698
+ # Check if any command failed critically (not just "no matches found")
699
+ critical_failures = []
700
+ successful_results = []
701
+
702
+ for rc, stdout, stderr in results:
703
+ if rc not in (0, 1): # 0=matches found, 1=no matches, others=errors
704
+ critical_failures.append((rc, stdout, stderr))
705
+ else:
706
+ successful_results.append((rc, stdout, stderr))
707
+
708
+ # If all commands failed critically, return the first failure
709
+ if not successful_results:
710
+ return critical_failures[0] if critical_failures else (1, b"", b"")
711
+
712
+ # Merge successful results
713
+ if count_only_mode:
714
+ return _merge_count_results(successful_results)
715
+ else:
716
+ return _merge_json_results(successful_results)
717
+
718
+
719
+ def _merge_count_results(
720
+ results: list[tuple[int, bytes, bytes]],
721
+ ) -> tuple[int, bytes, bytes]:
722
+ """Merge count-only results from multiple ripgrep executions."""
723
+ merged_counts: dict[str, int] = {}
724
+ total_matches = 0
725
+
726
+ for rc, stdout, _stderr in results:
727
+ if rc in (0, 1): # Success or no matches
728
+ file_counts = parse_rg_count_output(stdout)
729
+ # Remove the __total__ key and merge file counts
730
+ for file_path, count in file_counts.items():
731
+ if file_path != "__total__":
732
+ merged_counts[file_path] = merged_counts.get(file_path, 0) + count
733
+ total_matches += count
734
+
735
+ # Format as ripgrep count output
736
+ output_lines = []
737
+ for file_path, count in merged_counts.items():
738
+ output_lines.append(f"{file_path}:{count}")
739
+
740
+ merged_stdout = "\n".join(output_lines).encode("utf-8")
741
+
742
+ # Return code 0 if we have matches, 1 if no matches
743
+ return_code = 0 if total_matches > 0 else 1
744
+ return (return_code, merged_stdout, b"")
745
+
746
+
747
+ def _merge_json_results(
748
+ results: list[tuple[int, bytes, bytes]],
749
+ ) -> tuple[int, bytes, bytes]:
750
+ """Merge JSON results from multiple ripgrep executions."""
751
+ merged_lines = []
752
+ has_matches = False
753
+
754
+ for rc, stdout, _stderr in results:
755
+ if rc in (0, 1): # Success or no matches
756
+ if stdout.strip():
757
+ merged_lines.extend(stdout.splitlines())
758
+ if rc == 0: # Has matches
759
+ has_matches = True
760
+
761
+ merged_stdout = b"\n".join(merged_lines)
762
+ return_code = 0 if has_matches else 1
763
+ return (return_code, merged_stdout, b"")
764
+
765
+
766
+ def split_roots_for_parallel_processing(
767
+ roots: list[str], max_chunks: int = 4
768
+ ) -> list[list[str]]:
769
+ """
770
+ Split roots into chunks for parallel processing.
771
+
772
+ Args:
773
+ roots: List of root directories
774
+ max_chunks: Maximum number of chunks to create
775
+
776
+ Returns:
777
+ List of root chunks for parallel processing
778
+ """
779
+ if not roots:
780
+ return []
781
+
782
+ if len(roots) <= max_chunks:
783
+ # Each root gets its own chunk
784
+ return [[root] for root in roots]
785
+
786
+ # Distribute roots across chunks
787
+ chunk_size = len(roots) // max_chunks
788
+ remainder = len(roots) % max_chunks
789
+
790
+ chunks = []
791
+ start = 0
792
+
793
+ for i in range(max_chunks):
794
+ # Add one extra item to first 'remainder' chunks
795
+ current_chunk_size = chunk_size + (1 if i < remainder else 0)
796
+ end = start + current_chunk_size
797
+
798
+ if start < len(roots):
799
+ chunks.append(roots[start:end])
800
+
801
+ start = end
802
+
803
+ return [chunk for chunk in chunks if chunk] # Remove empty chunks