tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,816 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Shared utilities for fd/ripgrep based MCP tools.
|
|
4
|
+
|
|
5
|
+
This module centralizes subprocess execution, command building, result caps,
|
|
6
|
+
and JSON line parsing for ripgrep.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import tempfile
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
# Safety caps (hard limits)
|
|
21
|
+
MAX_RESULTS_HARD_CAP = 10000
|
|
22
|
+
DEFAULT_RESULTS_LIMIT = 2000
|
|
23
|
+
|
|
24
|
+
DEFAULT_RG_MAX_FILESIZE = "10M"
|
|
25
|
+
RG_MAX_FILESIZE_HARD_CAP_BYTES = 200 * 1024 * 1024 # 200M
|
|
26
|
+
|
|
27
|
+
DEFAULT_RG_TIMEOUT_MS = 4000
|
|
28
|
+
RG_TIMEOUT_HARD_CAP_MS = 30000
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_external_command(command: str) -> bool:
|
|
32
|
+
"""Check if an external command is available in the system PATH."""
|
|
33
|
+
return shutil.which(command) is not None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_missing_commands() -> list[str]:
|
|
37
|
+
"""Get list of missing external commands required by fd/rg tools."""
|
|
38
|
+
missing = []
|
|
39
|
+
if not check_external_command("fd"):
|
|
40
|
+
missing.append("fd")
|
|
41
|
+
if not check_external_command("rg"):
|
|
42
|
+
missing.append("rg")
|
|
43
|
+
return missing
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def clamp_int(value: int | None, default_value: int, hard_cap: int) -> int:
|
|
47
|
+
if value is None:
|
|
48
|
+
return default_value
|
|
49
|
+
try:
|
|
50
|
+
v = int(value)
|
|
51
|
+
except (TypeError, ValueError):
|
|
52
|
+
return default_value
|
|
53
|
+
return max(0, min(v, hard_cap))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def parse_size_to_bytes(size_str: str) -> int | None:
|
|
57
|
+
"""Parse ripgrep --max-filesize strings like '10M', '200K' to bytes."""
|
|
58
|
+
if not size_str:
|
|
59
|
+
return None
|
|
60
|
+
s = size_str.strip().upper()
|
|
61
|
+
try:
|
|
62
|
+
if s.endswith("K"):
|
|
63
|
+
return int(float(s[:-1]) * 1024)
|
|
64
|
+
if s.endswith("M"):
|
|
65
|
+
return int(float(s[:-1]) * 1024 * 1024)
|
|
66
|
+
if s.endswith("G"):
|
|
67
|
+
return int(float(s[:-1]) * 1024 * 1024 * 1024)
|
|
68
|
+
return int(s)
|
|
69
|
+
except ValueError:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
async def run_command_capture(
|
|
74
|
+
cmd: list[str],
|
|
75
|
+
input_data: bytes | None = None,
|
|
76
|
+
timeout_ms: int | None = None,
|
|
77
|
+
) -> tuple[int, bytes, bytes]:
|
|
78
|
+
"""Run a subprocess and capture output.
|
|
79
|
+
|
|
80
|
+
Returns (returncode, stdout, stderr). On timeout, kills process and returns 124.
|
|
81
|
+
Separated into a util for easy monkeypatching in tests.
|
|
82
|
+
"""
|
|
83
|
+
# Check if command exists before attempting to run
|
|
84
|
+
if cmd and not check_external_command(cmd[0]):
|
|
85
|
+
error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
|
|
86
|
+
return 127, b"", error_msg.encode()
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
# Create process
|
|
90
|
+
proc = await asyncio.create_subprocess_exec(
|
|
91
|
+
*cmd,
|
|
92
|
+
stdin=asyncio.subprocess.PIPE if input_data is not None else None,
|
|
93
|
+
stdout=asyncio.subprocess.PIPE,
|
|
94
|
+
stderr=asyncio.subprocess.PIPE,
|
|
95
|
+
)
|
|
96
|
+
except FileNotFoundError as e:
|
|
97
|
+
error_msg = f"Command '{cmd[0]}' not found: {e}"
|
|
98
|
+
return 127, b"", error_msg.encode()
|
|
99
|
+
|
|
100
|
+
# Compute timeout seconds
|
|
101
|
+
timeout_s: float | None = None
|
|
102
|
+
if timeout_ms and timeout_ms > 0:
|
|
103
|
+
timeout_s = timeout_ms / 1000.0
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
stdout, stderr = await asyncio.wait_for(
|
|
107
|
+
proc.communicate(input=input_data), timeout=timeout_s
|
|
108
|
+
)
|
|
109
|
+
return proc.returncode or 0, stdout, stderr
|
|
110
|
+
except asyncio.TimeoutError:
|
|
111
|
+
try:
|
|
112
|
+
proc.kill()
|
|
113
|
+
finally:
|
|
114
|
+
with contextlib.suppress(Exception):
|
|
115
|
+
await proc.wait()
|
|
116
|
+
return 124, b"", f"Timeout after {timeout_ms} ms".encode()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def build_fd_command(
|
|
120
|
+
*,
|
|
121
|
+
pattern: str | None,
|
|
122
|
+
glob: bool,
|
|
123
|
+
types: list[str] | None,
|
|
124
|
+
extensions: list[str] | None,
|
|
125
|
+
exclude: list[str] | None,
|
|
126
|
+
depth: int | None,
|
|
127
|
+
follow_symlinks: bool,
|
|
128
|
+
hidden: bool,
|
|
129
|
+
no_ignore: bool,
|
|
130
|
+
size: list[str] | None,
|
|
131
|
+
changed_within: str | None,
|
|
132
|
+
changed_before: str | None,
|
|
133
|
+
full_path_match: bool,
|
|
134
|
+
absolute: bool,
|
|
135
|
+
limit: int | None,
|
|
136
|
+
roots: list[str],
|
|
137
|
+
) -> list[str]:
|
|
138
|
+
"""Build an fd command with appropriate flags."""
|
|
139
|
+
cmd: list[str] = ["fd", "--color", "never"]
|
|
140
|
+
if glob:
|
|
141
|
+
cmd.append("--glob")
|
|
142
|
+
if full_path_match:
|
|
143
|
+
cmd.append("-p")
|
|
144
|
+
if absolute:
|
|
145
|
+
cmd.append("-a")
|
|
146
|
+
if follow_symlinks:
|
|
147
|
+
cmd.append("-L")
|
|
148
|
+
if hidden:
|
|
149
|
+
cmd.append("-H")
|
|
150
|
+
if no_ignore:
|
|
151
|
+
cmd.append("-I")
|
|
152
|
+
if depth is not None:
|
|
153
|
+
cmd += ["-d", str(depth)]
|
|
154
|
+
if types:
|
|
155
|
+
for t in types:
|
|
156
|
+
cmd += ["-t", str(t)]
|
|
157
|
+
if extensions:
|
|
158
|
+
for ext in extensions:
|
|
159
|
+
if ext.startswith("."):
|
|
160
|
+
ext = ext[1:]
|
|
161
|
+
cmd += ["-e", ext]
|
|
162
|
+
if exclude:
|
|
163
|
+
for ex in exclude:
|
|
164
|
+
cmd += ["-E", ex]
|
|
165
|
+
if size:
|
|
166
|
+
for s in size:
|
|
167
|
+
cmd += ["-S", s]
|
|
168
|
+
if changed_within:
|
|
169
|
+
cmd += ["--changed-within", str(changed_within)]
|
|
170
|
+
if changed_before:
|
|
171
|
+
cmd += ["--changed-before", str(changed_before)]
|
|
172
|
+
if limit is not None:
|
|
173
|
+
cmd += ["--max-results", str(limit)]
|
|
174
|
+
|
|
175
|
+
# Pattern goes before roots if present
|
|
176
|
+
# If no pattern is specified, use '.' to match all files (required to prevent roots being interpreted as pattern)
|
|
177
|
+
if pattern:
|
|
178
|
+
cmd.append(pattern)
|
|
179
|
+
else:
|
|
180
|
+
cmd.append(".")
|
|
181
|
+
|
|
182
|
+
# Append roots - these are search directories, not patterns
|
|
183
|
+
if roots:
|
|
184
|
+
cmd += roots
|
|
185
|
+
|
|
186
|
+
return cmd
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def normalize_max_filesize(user_value: str | None) -> str:
|
|
190
|
+
if not user_value:
|
|
191
|
+
return DEFAULT_RG_MAX_FILESIZE
|
|
192
|
+
bytes_val = parse_size_to_bytes(user_value)
|
|
193
|
+
if bytes_val is None:
|
|
194
|
+
return DEFAULT_RG_MAX_FILESIZE
|
|
195
|
+
if bytes_val > RG_MAX_FILESIZE_HARD_CAP_BYTES:
|
|
196
|
+
return "200M"
|
|
197
|
+
return user_value
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_rg_command(
|
|
201
|
+
*,
|
|
202
|
+
query: str,
|
|
203
|
+
case: str | None,
|
|
204
|
+
fixed_strings: bool,
|
|
205
|
+
word: bool,
|
|
206
|
+
multiline: bool,
|
|
207
|
+
include_globs: list[str] | None,
|
|
208
|
+
exclude_globs: list[str] | None,
|
|
209
|
+
follow_symlinks: bool,
|
|
210
|
+
hidden: bool,
|
|
211
|
+
no_ignore: bool,
|
|
212
|
+
max_filesize: str | None,
|
|
213
|
+
context_before: int | None,
|
|
214
|
+
context_after: int | None,
|
|
215
|
+
encoding: str | None,
|
|
216
|
+
max_count: int | None,
|
|
217
|
+
timeout_ms: int | None,
|
|
218
|
+
roots: list[str] | None,
|
|
219
|
+
files_from: str | None,
|
|
220
|
+
count_only_matches: bool = False,
|
|
221
|
+
) -> list[str]:
|
|
222
|
+
"""Build ripgrep command with JSON output and options."""
|
|
223
|
+
if count_only_matches:
|
|
224
|
+
# Use --count-matches for count-only mode (no JSON output)
|
|
225
|
+
cmd = [
|
|
226
|
+
"rg",
|
|
227
|
+
"--count-matches",
|
|
228
|
+
"--no-heading",
|
|
229
|
+
"--color",
|
|
230
|
+
"never",
|
|
231
|
+
]
|
|
232
|
+
else:
|
|
233
|
+
# Use --json for full match details
|
|
234
|
+
cmd = [
|
|
235
|
+
"rg",
|
|
236
|
+
"--json",
|
|
237
|
+
"--no-heading",
|
|
238
|
+
"--color",
|
|
239
|
+
"never",
|
|
240
|
+
]
|
|
241
|
+
|
|
242
|
+
# Case sensitivity
|
|
243
|
+
if case == "smart":
|
|
244
|
+
cmd.append("-S")
|
|
245
|
+
elif case == "insensitive":
|
|
246
|
+
cmd.append("-i")
|
|
247
|
+
elif case == "sensitive":
|
|
248
|
+
cmd.append("-s")
|
|
249
|
+
|
|
250
|
+
if fixed_strings:
|
|
251
|
+
cmd.append("-F")
|
|
252
|
+
if word:
|
|
253
|
+
cmd.append("-w")
|
|
254
|
+
if multiline:
|
|
255
|
+
# Prefer --multiline (does not imply binary)
|
|
256
|
+
cmd.append("--multiline")
|
|
257
|
+
|
|
258
|
+
if follow_symlinks:
|
|
259
|
+
cmd.append("-L")
|
|
260
|
+
if hidden:
|
|
261
|
+
cmd.append("-H")
|
|
262
|
+
if no_ignore:
|
|
263
|
+
# Use -u (respect ignore but include hidden); do not escalate to -uu automatically
|
|
264
|
+
cmd.append("-u")
|
|
265
|
+
|
|
266
|
+
if include_globs:
|
|
267
|
+
for g in include_globs:
|
|
268
|
+
cmd += ["-g", g]
|
|
269
|
+
if exclude_globs:
|
|
270
|
+
for g in exclude_globs:
|
|
271
|
+
# ripgrep exclusion via !pattern
|
|
272
|
+
if not g.startswith("!"):
|
|
273
|
+
cmd += ["-g", f"!{g}"]
|
|
274
|
+
else:
|
|
275
|
+
cmd += ["-g", g]
|
|
276
|
+
|
|
277
|
+
if context_before is not None:
|
|
278
|
+
cmd += ["-B", str(context_before)]
|
|
279
|
+
if context_after is not None:
|
|
280
|
+
cmd += ["-A", str(context_after)]
|
|
281
|
+
if encoding:
|
|
282
|
+
cmd += ["--encoding", encoding]
|
|
283
|
+
if max_count is not None:
|
|
284
|
+
cmd += ["-m", str(max_count)]
|
|
285
|
+
|
|
286
|
+
# Normalize filesize
|
|
287
|
+
cmd += ["--max-filesize", normalize_max_filesize(max_filesize)]
|
|
288
|
+
|
|
289
|
+
# Add timeout if provided and > 0 (enable timeout for performance optimization)
|
|
290
|
+
if timeout_ms is not None and timeout_ms > 0:
|
|
291
|
+
# effective_timeout = clamp_int(
|
|
292
|
+
# timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS
|
|
293
|
+
# ) # Commented out as not used yet
|
|
294
|
+
# Use timeout in milliseconds for better control
|
|
295
|
+
# Note: We'll handle timeout at the process level instead of ripgrep flag
|
|
296
|
+
# to ensure compatibility across ripgrep versions
|
|
297
|
+
pass
|
|
298
|
+
|
|
299
|
+
# Query must be last before roots/files
|
|
300
|
+
cmd.append(query)
|
|
301
|
+
|
|
302
|
+
# Skip --files-from flag as it's not supported in this ripgrep version
|
|
303
|
+
# Use roots instead for compatibility
|
|
304
|
+
if roots:
|
|
305
|
+
cmd += roots
|
|
306
|
+
# Note: files_from functionality is disabled for compatibility
|
|
307
|
+
|
|
308
|
+
return cmd
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def parse_rg_json_lines_to_matches(stdout_bytes: bytes) -> list[dict[str, Any]]:
|
|
312
|
+
"""Parse ripgrep JSON event stream and keep only match events."""
|
|
313
|
+
results: list[dict[str, Any]] = []
|
|
314
|
+
lines = stdout_bytes.splitlines()
|
|
315
|
+
|
|
316
|
+
# Batch process lines for better performance
|
|
317
|
+
for raw_line in lines:
|
|
318
|
+
if not raw_line.strip():
|
|
319
|
+
continue
|
|
320
|
+
try:
|
|
321
|
+
# Decode once and parse JSON
|
|
322
|
+
line_str = raw_line.decode("utf-8", errors="replace")
|
|
323
|
+
evt = json.loads(line_str)
|
|
324
|
+
except (json.JSONDecodeError, UnicodeDecodeError): # nosec B112
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
# Quick type check to skip non-match events
|
|
328
|
+
if evt.get("type") != "match":
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
data = evt.get("data", {})
|
|
332
|
+
if not data:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
# Extract data with safe defaults
|
|
336
|
+
path_data = data.get("path", {})
|
|
337
|
+
path_text = path_data.get("text") if path_data else None
|
|
338
|
+
if not path_text:
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
line_number = data.get("line_number")
|
|
342
|
+
lines_data = data.get("lines", {})
|
|
343
|
+
line_text = lines_data.get("text") if lines_data else ""
|
|
344
|
+
|
|
345
|
+
# Normalize line content to reduce token usage (optimized)
|
|
346
|
+
normalized_line = " ".join(line_text.split()) if line_text else ""
|
|
347
|
+
|
|
348
|
+
# Simplify submatches - keep only essential position data
|
|
349
|
+
submatches_raw = data.get("submatches", [])
|
|
350
|
+
simplified_matches = []
|
|
351
|
+
if submatches_raw:
|
|
352
|
+
for sm in submatches_raw:
|
|
353
|
+
start = sm.get("start")
|
|
354
|
+
end = sm.get("end")
|
|
355
|
+
if start is not None and end is not None:
|
|
356
|
+
simplified_matches.append([start, end])
|
|
357
|
+
|
|
358
|
+
results.append(
|
|
359
|
+
{
|
|
360
|
+
"file": path_text,
|
|
361
|
+
"line": line_number,
|
|
362
|
+
"text": normalized_line,
|
|
363
|
+
"matches": simplified_matches,
|
|
364
|
+
}
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Early exit if we have too many results to prevent memory issues
|
|
368
|
+
if len(results) >= MAX_RESULTS_HARD_CAP:
|
|
369
|
+
break
|
|
370
|
+
|
|
371
|
+
return results
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def group_matches_by_file(matches: list[dict[str, Any]]) -> dict[str, Any]:
|
|
375
|
+
"""Group matches by file to eliminate file path duplication."""
|
|
376
|
+
if not matches:
|
|
377
|
+
return {"success": True, "count": 0, "files": []}
|
|
378
|
+
|
|
379
|
+
# Group matches by file
|
|
380
|
+
file_groups: dict[str, list[dict[str, Any]]] = {}
|
|
381
|
+
total_matches = 0
|
|
382
|
+
|
|
383
|
+
for match in matches:
|
|
384
|
+
file_path = match.get("file", "unknown")
|
|
385
|
+
if file_path not in file_groups:
|
|
386
|
+
file_groups[file_path] = []
|
|
387
|
+
|
|
388
|
+
# Create match entry without file path
|
|
389
|
+
match_entry = {
|
|
390
|
+
"line": match.get("line", match.get("line_number", "?")),
|
|
391
|
+
"text": match.get("text", match.get("line", "")),
|
|
392
|
+
"positions": match.get("matches", match.get("submatches", [])),
|
|
393
|
+
}
|
|
394
|
+
file_groups[file_path].append(match_entry)
|
|
395
|
+
total_matches += 1
|
|
396
|
+
|
|
397
|
+
# Convert to grouped structure
|
|
398
|
+
files = []
|
|
399
|
+
for file_path, file_matches in file_groups.items():
|
|
400
|
+
files.append(
|
|
401
|
+
{
|
|
402
|
+
"file": file_path,
|
|
403
|
+
"matches": file_matches,
|
|
404
|
+
"match_count": len(file_matches),
|
|
405
|
+
}
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
return {"success": True, "count": total_matches, "files": files}
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def optimize_match_paths(matches: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
412
|
+
"""Optimize file paths in match results to reduce token consumption."""
|
|
413
|
+
if not matches:
|
|
414
|
+
return matches
|
|
415
|
+
|
|
416
|
+
# Find common prefix among all file paths
|
|
417
|
+
file_paths = [match.get("file", "") for match in matches if match.get("file")]
|
|
418
|
+
common_prefix = ""
|
|
419
|
+
if len(file_paths) > 1:
|
|
420
|
+
import os
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
common_prefix = os.path.commonpath(file_paths)
|
|
424
|
+
except (ValueError, TypeError):
|
|
425
|
+
common_prefix = ""
|
|
426
|
+
|
|
427
|
+
# Optimize each match
|
|
428
|
+
optimized_matches = []
|
|
429
|
+
for match in matches:
|
|
430
|
+
optimized_match = match.copy()
|
|
431
|
+
file_path = match.get("file")
|
|
432
|
+
if file_path:
|
|
433
|
+
optimized_match["file"] = _optimize_file_path(file_path, common_prefix)
|
|
434
|
+
optimized_matches.append(optimized_match)
|
|
435
|
+
|
|
436
|
+
return optimized_matches
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _optimize_file_path(file_path: str, common_prefix: str = "") -> str:
|
|
440
|
+
"""Optimize file path for token efficiency by removing common prefixes and shortening."""
|
|
441
|
+
if not file_path:
|
|
442
|
+
return file_path
|
|
443
|
+
|
|
444
|
+
# Remove common prefix if provided
|
|
445
|
+
if common_prefix and file_path.startswith(common_prefix):
|
|
446
|
+
optimized = file_path[len(common_prefix) :].lstrip("/\\")
|
|
447
|
+
if optimized:
|
|
448
|
+
return optimized
|
|
449
|
+
|
|
450
|
+
# For very long paths, show only the last few components
|
|
451
|
+
from pathlib import Path
|
|
452
|
+
|
|
453
|
+
path_obj = Path(file_path)
|
|
454
|
+
parts = path_obj.parts
|
|
455
|
+
|
|
456
|
+
if len(parts) > 4:
|
|
457
|
+
# Show first part + ... + last 3 parts
|
|
458
|
+
return str(Path(parts[0]) / "..." / Path(*parts[-3:]))
|
|
459
|
+
|
|
460
|
+
return file_path
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def summarize_search_results(
|
|
464
|
+
matches: list[dict[str, Any]], max_files: int = 10, max_total_lines: int = 50
|
|
465
|
+
) -> dict[str, Any]:
|
|
466
|
+
"""Summarize search results to reduce context size while preserving key information."""
|
|
467
|
+
if not matches:
|
|
468
|
+
return {
|
|
469
|
+
"total_matches": 0,
|
|
470
|
+
"total_files": 0,
|
|
471
|
+
"summary": "No matches found",
|
|
472
|
+
"top_files": [],
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# Group matches by file and find common prefix for optimization
|
|
476
|
+
file_groups: dict[str, list[dict[str, Any]]] = {}
|
|
477
|
+
all_file_paths = []
|
|
478
|
+
for match in matches:
|
|
479
|
+
file_path = match.get("file", "unknown")
|
|
480
|
+
all_file_paths.append(file_path)
|
|
481
|
+
if file_path not in file_groups:
|
|
482
|
+
file_groups[file_path] = []
|
|
483
|
+
file_groups[file_path].append(match)
|
|
484
|
+
|
|
485
|
+
# Find common prefix to optimize paths
|
|
486
|
+
common_prefix = ""
|
|
487
|
+
if len(all_file_paths) > 1:
|
|
488
|
+
import os
|
|
489
|
+
|
|
490
|
+
common_prefix = os.path.commonpath(all_file_paths) if all_file_paths else ""
|
|
491
|
+
|
|
492
|
+
# Sort files by match count (descending)
|
|
493
|
+
sorted_files = sorted(file_groups.items(), key=lambda x: len(x[1]), reverse=True)
|
|
494
|
+
|
|
495
|
+
# Create summary
|
|
496
|
+
total_matches = len(matches)
|
|
497
|
+
total_files = len(file_groups)
|
|
498
|
+
|
|
499
|
+
# Top files with match counts
|
|
500
|
+
top_files = []
|
|
501
|
+
remaining_lines = max_total_lines
|
|
502
|
+
|
|
503
|
+
for file_path, file_matches in sorted_files[:max_files]:
|
|
504
|
+
match_count = len(file_matches)
|
|
505
|
+
|
|
506
|
+
# Include a few sample lines from this file
|
|
507
|
+
sample_lines = []
|
|
508
|
+
lines_to_include = min(3, remaining_lines, len(file_matches))
|
|
509
|
+
|
|
510
|
+
for _i, match in enumerate(file_matches[:lines_to_include]):
|
|
511
|
+
line_num = match.get(
|
|
512
|
+
"line", match.get("line_number", "?")
|
|
513
|
+
) # Support both old and new format
|
|
514
|
+
line_text = match.get(
|
|
515
|
+
"text", match.get("line", "")
|
|
516
|
+
).strip() # Support both old and new format
|
|
517
|
+
if line_text:
|
|
518
|
+
# Truncate long lines and remove extra whitespace to save tokens
|
|
519
|
+
truncated_line = " ".join(line_text.split())[:60]
|
|
520
|
+
if len(line_text) > 60:
|
|
521
|
+
truncated_line += "..."
|
|
522
|
+
sample_lines.append(f"L{line_num}: {truncated_line}")
|
|
523
|
+
remaining_lines -= 1
|
|
524
|
+
|
|
525
|
+
# Ensure we have at least some sample lines if matches exist
|
|
526
|
+
if not sample_lines and file_matches:
|
|
527
|
+
# Fallback: create a simple summary line
|
|
528
|
+
sample_lines.append(f"Found {len(file_matches)} matches")
|
|
529
|
+
|
|
530
|
+
# Optimize file path for token efficiency
|
|
531
|
+
optimized_path = _optimize_file_path(file_path, common_prefix)
|
|
532
|
+
|
|
533
|
+
top_files.append(
|
|
534
|
+
{
|
|
535
|
+
"file": optimized_path,
|
|
536
|
+
"match_count": match_count,
|
|
537
|
+
"sample_lines": sample_lines,
|
|
538
|
+
}
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if remaining_lines <= 0:
|
|
542
|
+
break
|
|
543
|
+
|
|
544
|
+
# Create summary text
|
|
545
|
+
if total_files <= max_files:
|
|
546
|
+
summary = f"Found {total_matches} matches in {total_files} files"
|
|
547
|
+
else:
|
|
548
|
+
summary = f"Found {total_matches} matches in {total_files} files (showing top {len(top_files)})"
|
|
549
|
+
|
|
550
|
+
return {
|
|
551
|
+
"total_matches": total_matches,
|
|
552
|
+
"total_files": total_files,
|
|
553
|
+
"summary": summary,
|
|
554
|
+
"top_files": top_files,
|
|
555
|
+
"truncated": total_files > max_files,
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def parse_rg_count_output(stdout_bytes: bytes) -> dict[str, int]:
|
|
560
|
+
"""Parse ripgrep --count-matches output and return file->count mapping."""
|
|
561
|
+
results: dict[str, int] = {}
|
|
562
|
+
total_matches = 0
|
|
563
|
+
|
|
564
|
+
for line in stdout_bytes.decode("utf-8", errors="replace").splitlines():
|
|
565
|
+
line = line.strip()
|
|
566
|
+
if not line:
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
# Format: "file_path:count"
|
|
570
|
+
if ":" in line:
|
|
571
|
+
file_path, count_str = line.rsplit(":", 1)
|
|
572
|
+
try:
|
|
573
|
+
count = int(count_str)
|
|
574
|
+
results[file_path] = count
|
|
575
|
+
total_matches += count
|
|
576
|
+
except ValueError:
|
|
577
|
+
# Skip lines that don't have valid count format
|
|
578
|
+
continue
|
|
579
|
+
|
|
580
|
+
# Add total count as special key
|
|
581
|
+
results["__total__"] = total_matches
|
|
582
|
+
return results
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def extract_file_list_from_count_data(count_data: dict[str, int]) -> list[str]:
|
|
586
|
+
"""Extract file list from count data, excluding the special __total__ key."""
|
|
587
|
+
return [file_path for file_path in count_data.keys() if file_path != "__total__"]
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def create_file_summary_from_count_data(count_data: dict[str, int]) -> dict[str, Any]:
|
|
591
|
+
"""Create a file summary structure from count data."""
|
|
592
|
+
file_list = extract_file_list_from_count_data(count_data)
|
|
593
|
+
total_matches = count_data.get("__total__", 0)
|
|
594
|
+
|
|
595
|
+
return {
|
|
596
|
+
"success": True,
|
|
597
|
+
"total_matches": total_matches,
|
|
598
|
+
"file_count": len(file_list),
|
|
599
|
+
"files": [
|
|
600
|
+
{"file": file_path, "match_count": count_data[file_path]}
|
|
601
|
+
for file_path in file_list
|
|
602
|
+
],
|
|
603
|
+
"derived_from_count": True, # 标识这是从count数据推导的
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
@dataclass
|
|
608
|
+
class TempFileList:
|
|
609
|
+
path: str
|
|
610
|
+
|
|
611
|
+
def __enter__(self) -> TempFileList:
|
|
612
|
+
return self
|
|
613
|
+
|
|
614
|
+
def __exit__(
|
|
615
|
+
self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: Any
|
|
616
|
+
) -> None:
|
|
617
|
+
with contextlib.suppress(Exception):
|
|
618
|
+
Path(self.path).unlink(missing_ok=True)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
class contextlib: # minimal shim for suppress without importing globally
|
|
622
|
+
class suppress:
|
|
623
|
+
def __init__(self, *exceptions: type[BaseException]) -> None:
|
|
624
|
+
self.exceptions = exceptions
|
|
625
|
+
|
|
626
|
+
def __enter__(self) -> None: # noqa: D401
|
|
627
|
+
return None
|
|
628
|
+
|
|
629
|
+
def __exit__(
|
|
630
|
+
self,
|
|
631
|
+
exc_type: type[BaseException] | None,
|
|
632
|
+
exc: BaseException | None,
|
|
633
|
+
tb: Any,
|
|
634
|
+
) -> bool:
|
|
635
|
+
return exc_type is not None and issubclass(exc_type, self.exceptions)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def write_files_to_temp(files: list[str]) -> TempFileList:
|
|
639
|
+
fd, temp_path = tempfile.mkstemp(prefix="rg-files-", suffix=".lst")
|
|
640
|
+
os.close(fd)
|
|
641
|
+
content = "\n".join(files)
|
|
642
|
+
from ...encoding_utils import write_file_safe
|
|
643
|
+
|
|
644
|
+
write_file_safe(temp_path, content)
|
|
645
|
+
return TempFileList(path=temp_path)
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
async def run_parallel_rg_searches(
|
|
649
|
+
commands: list[list[str]],
|
|
650
|
+
timeout_ms: int | None = None,
|
|
651
|
+
max_concurrent: int = 4,
|
|
652
|
+
) -> list[tuple[int, bytes, bytes]]:
|
|
653
|
+
"""
|
|
654
|
+
Run multiple ripgrep commands in parallel with concurrency control.
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
commands: List of ripgrep command lists to execute
|
|
658
|
+
timeout_ms: Timeout in milliseconds for each command
|
|
659
|
+
max_concurrent: Maximum number of concurrent processes (default: 4)
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
List of (returncode, stdout, stderr) tuples in the same order as commands
|
|
663
|
+
"""
|
|
664
|
+
if not commands:
|
|
665
|
+
return []
|
|
666
|
+
|
|
667
|
+
# Create semaphore to limit concurrent processes
|
|
668
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
669
|
+
|
|
670
|
+
async def run_single_command(cmd: list[str]) -> tuple[int, bytes, bytes]:
|
|
671
|
+
async with semaphore:
|
|
672
|
+
return await run_command_capture(cmd, timeout_ms=timeout_ms)
|
|
673
|
+
|
|
674
|
+
# Execute all commands concurrently
|
|
675
|
+
tasks = [run_single_command(cmd) for cmd in commands]
|
|
676
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
677
|
+
|
|
678
|
+
# Handle exceptions and convert to proper format
|
|
679
|
+
processed_results: list[tuple[int, bytes, bytes]] = []
|
|
680
|
+
for _i, result in enumerate(results):
|
|
681
|
+
if isinstance(result, Exception):
|
|
682
|
+
# Convert exception to error result
|
|
683
|
+
error_msg = f"Command failed: {str(result)}"
|
|
684
|
+
processed_results.append((1, b"", error_msg.encode()))
|
|
685
|
+
elif isinstance(result, tuple) and len(result) == 3:
|
|
686
|
+
processed_results.append(result)
|
|
687
|
+
else:
|
|
688
|
+
# Fallback for unexpected result types
|
|
689
|
+
processed_results.append((1, b"", b"Unexpected result type"))
|
|
690
|
+
|
|
691
|
+
return processed_results
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def merge_rg_results(
|
|
695
|
+
results: list[tuple[int, bytes, bytes]],
|
|
696
|
+
count_only_mode: bool = False,
|
|
697
|
+
) -> tuple[int, bytes, bytes]:
|
|
698
|
+
"""
|
|
699
|
+
Merge results from multiple ripgrep executions.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
results: List of (returncode, stdout, stderr) tuples
|
|
703
|
+
count_only_mode: Whether the results are from count-only mode
|
|
704
|
+
|
|
705
|
+
Returns:
|
|
706
|
+
Merged (returncode, stdout, stderr) tuple
|
|
707
|
+
"""
|
|
708
|
+
if not results:
|
|
709
|
+
return (1, b"", b"No results to merge")
|
|
710
|
+
|
|
711
|
+
# Check if any command failed critically (not just "no matches found")
|
|
712
|
+
critical_failures = []
|
|
713
|
+
successful_results = []
|
|
714
|
+
|
|
715
|
+
for rc, stdout, stderr in results:
|
|
716
|
+
if rc not in (0, 1): # 0=matches found, 1=no matches, others=errors
|
|
717
|
+
critical_failures.append((rc, stdout, stderr))
|
|
718
|
+
else:
|
|
719
|
+
successful_results.append((rc, stdout, stderr))
|
|
720
|
+
|
|
721
|
+
# If all commands failed critically, return the first failure
|
|
722
|
+
if not successful_results:
|
|
723
|
+
return critical_failures[0] if critical_failures else (1, b"", b"")
|
|
724
|
+
|
|
725
|
+
# Merge successful results
|
|
726
|
+
if count_only_mode:
|
|
727
|
+
return _merge_count_results(successful_results)
|
|
728
|
+
else:
|
|
729
|
+
return _merge_json_results(successful_results)
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _merge_count_results(
|
|
733
|
+
results: list[tuple[int, bytes, bytes]],
|
|
734
|
+
) -> tuple[int, bytes, bytes]:
|
|
735
|
+
"""Merge count-only results from multiple ripgrep executions."""
|
|
736
|
+
merged_counts: dict[str, int] = {}
|
|
737
|
+
total_matches = 0
|
|
738
|
+
|
|
739
|
+
for rc, stdout, _stderr in results:
|
|
740
|
+
if rc in (0, 1): # Success or no matches
|
|
741
|
+
file_counts = parse_rg_count_output(stdout)
|
|
742
|
+
# Remove the __total__ key and merge file counts
|
|
743
|
+
for file_path, count in file_counts.items():
|
|
744
|
+
if file_path != "__total__":
|
|
745
|
+
merged_counts[file_path] = merged_counts.get(file_path, 0) + count
|
|
746
|
+
total_matches += count
|
|
747
|
+
|
|
748
|
+
# Format as ripgrep count output
|
|
749
|
+
output_lines = []
|
|
750
|
+
for file_path, count in merged_counts.items():
|
|
751
|
+
output_lines.append(f"{file_path}:{count}")
|
|
752
|
+
|
|
753
|
+
merged_stdout = "\n".join(output_lines).encode("utf-8")
|
|
754
|
+
|
|
755
|
+
# Return code 0 if we have matches, 1 if no matches
|
|
756
|
+
return_code = 0 if total_matches > 0 else 1
|
|
757
|
+
return (return_code, merged_stdout, b"")
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def _merge_json_results(
|
|
761
|
+
results: list[tuple[int, bytes, bytes]],
|
|
762
|
+
) -> tuple[int, bytes, bytes]:
|
|
763
|
+
"""Merge JSON results from multiple ripgrep executions."""
|
|
764
|
+
merged_lines = []
|
|
765
|
+
has_matches = False
|
|
766
|
+
|
|
767
|
+
for rc, stdout, _stderr in results:
|
|
768
|
+
if rc in (0, 1): # Success or no matches
|
|
769
|
+
if stdout.strip():
|
|
770
|
+
merged_lines.extend(stdout.splitlines())
|
|
771
|
+
if rc == 0: # Has matches
|
|
772
|
+
has_matches = True
|
|
773
|
+
|
|
774
|
+
merged_stdout = b"\n".join(merged_lines)
|
|
775
|
+
return_code = 0 if has_matches else 1
|
|
776
|
+
return (return_code, merged_stdout, b"")
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def split_roots_for_parallel_processing(
|
|
780
|
+
roots: list[str], max_chunks: int = 4
|
|
781
|
+
) -> list[list[str]]:
|
|
782
|
+
"""
|
|
783
|
+
Split roots into chunks for parallel processing.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
roots: List of root directories
|
|
787
|
+
max_chunks: Maximum number of chunks to create
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
List of root chunks for parallel processing
|
|
791
|
+
"""
|
|
792
|
+
if not roots:
|
|
793
|
+
return []
|
|
794
|
+
|
|
795
|
+
if len(roots) <= max_chunks:
|
|
796
|
+
# Each root gets its own chunk
|
|
797
|
+
return [[root] for root in roots]
|
|
798
|
+
|
|
799
|
+
# Distribute roots across chunks
|
|
800
|
+
chunk_size = len(roots) // max_chunks
|
|
801
|
+
remainder = len(roots) % max_chunks
|
|
802
|
+
|
|
803
|
+
chunks = []
|
|
804
|
+
start = 0
|
|
805
|
+
|
|
806
|
+
for i in range(max_chunks):
|
|
807
|
+
# Add one extra item to first 'remainder' chunks
|
|
808
|
+
current_chunk_size = chunk_size + (1 if i < remainder else 0)
|
|
809
|
+
end = start + current_chunk_size
|
|
810
|
+
|
|
811
|
+
if start < len(roots):
|
|
812
|
+
chunks.append(roots[start:end])
|
|
813
|
+
|
|
814
|
+
start = end
|
|
815
|
+
|
|
816
|
+
return [chunk for chunk in chunks if chunk] # Remove empty chunks
|