tunacode-cli 0.0.17__py3-none-any.whl → 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tunacode-cli might be problematic. Click here for more details.
- tunacode/cli/commands.py +39 -41
- tunacode/cli/main.py +29 -26
- tunacode/cli/repl.py +35 -10
- tunacode/cli/textual_app.py +69 -66
- tunacode/cli/textual_bridge.py +33 -32
- tunacode/configuration/settings.py +2 -9
- tunacode/constants.py +2 -4
- tunacode/context.py +1 -1
- tunacode/core/agents/main.py +88 -62
- tunacode/core/setup/config_setup.py +79 -44
- tunacode/core/setup/coordinator.py +20 -13
- tunacode/core/setup/git_safety_setup.py +35 -49
- tunacode/core/state.py +2 -9
- tunacode/exceptions.py +0 -2
- tunacode/tools/__init__.py +10 -1
- tunacode/tools/base.py +1 -1
- tunacode/tools/bash.py +5 -5
- tunacode/tools/grep.py +210 -250
- tunacode/tools/read_file.py +2 -8
- tunacode/tools/run_command.py +4 -11
- tunacode/tools/update_file.py +2 -6
- tunacode/ui/completers.py +32 -31
- tunacode/ui/console.py +3 -3
- tunacode/ui/input.py +8 -5
- tunacode/ui/keybindings.py +1 -3
- tunacode/ui/lexers.py +16 -16
- tunacode/ui/output.py +2 -2
- tunacode/ui/panels.py +8 -8
- tunacode/ui/prompt_manager.py +19 -7
- tunacode/utils/import_cache.py +11 -0
- tunacode/utils/user_configuration.py +24 -2
- {tunacode_cli-0.0.17.dist-info → tunacode_cli-0.0.18.dist-info}/METADATA +43 -2
- tunacode_cli-0.0.18.dist-info/RECORD +68 -0
- tunacode_cli-0.0.17.dist-info/RECORD +0 -67
- {tunacode_cli-0.0.17.dist-info → tunacode_cli-0.0.18.dist-info}/WHEEL +0 -0
- {tunacode_cli-0.0.17.dist-info → tunacode_cli-0.0.18.dist-info}/entry_points.txt +0 -0
- {tunacode_cli-0.0.17.dist-info → tunacode_cli-0.0.18.dist-info}/licenses/LICENSE +0 -0
- {tunacode_cli-0.0.17.dist-info → tunacode_cli-0.0.18.dist-info}/top_level.txt +0 -0
tunacode/tools/grep.py
CHANGED
|
@@ -9,22 +9,23 @@ This tool provides sophisticated grep-like functionality with:
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import asyncio
|
|
12
|
-
import re
|
|
13
|
-
import subprocess
|
|
14
12
|
import fnmatch
|
|
15
13
|
import os
|
|
14
|
+
import re
|
|
15
|
+
import subprocess
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
17
|
+
from dataclasses import dataclass
|
|
16
18
|
from pathlib import Path
|
|
17
19
|
from typing import Dict, List, Optional, Tuple, Union
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
20
20
|
|
|
21
|
-
from tunacode.tools.base import BaseTool
|
|
22
21
|
from tunacode.exceptions import ToolExecutionError
|
|
22
|
+
from tunacode.tools.base import BaseTool
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@dataclass
|
|
26
26
|
class SearchResult:
|
|
27
27
|
"""Represents a single search match with context."""
|
|
28
|
+
|
|
28
29
|
file_path: str
|
|
29
30
|
line_number: int
|
|
30
31
|
line_content: str
|
|
@@ -38,6 +39,7 @@ class SearchResult:
|
|
|
38
39
|
@dataclass
|
|
39
40
|
class SearchConfig:
|
|
40
41
|
"""Configuration for search operations."""
|
|
42
|
+
|
|
41
43
|
case_sensitive: bool = False
|
|
42
44
|
use_regex: bool = False
|
|
43
45
|
max_results: int = 50
|
|
@@ -49,45 +51,56 @@ class SearchConfig:
|
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
# Fast-Glob Prefilter Configuration
|
|
52
|
-
MAX_GLOB = 5_000
|
|
53
|
-
GLOB_BATCH = 500
|
|
54
|
-
EXCLUDE_DIRS = {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
|
|
55
|
+
GLOB_BATCH = 500 # Streaming batch size
|
|
56
|
+
EXCLUDE_DIRS = { # Common directories to skip
|
|
57
|
+
"node_modules",
|
|
58
|
+
".git",
|
|
59
|
+
"__pycache__",
|
|
60
|
+
".venv",
|
|
61
|
+
"venv",
|
|
62
|
+
"dist",
|
|
63
|
+
"build",
|
|
64
|
+
".pytest_cache",
|
|
65
|
+
".mypy_cache",
|
|
66
|
+
".tox",
|
|
67
|
+
"target",
|
|
68
|
+
"node_modules",
|
|
58
69
|
}
|
|
59
70
|
|
|
60
71
|
|
|
61
72
|
def fast_glob(root: Path, include: str, exclude: str = None) -> List[Path]:
|
|
62
73
|
"""
|
|
63
74
|
Lightning-fast filename filtering using os.scandir.
|
|
64
|
-
|
|
75
|
+
|
|
65
76
|
Args:
|
|
66
77
|
root: Directory to search
|
|
67
78
|
include: Include pattern (e.g., "*.py", "*.{js,ts}")
|
|
68
79
|
exclude: Exclude pattern (optional)
|
|
69
|
-
|
|
80
|
+
|
|
70
81
|
Returns:
|
|
71
82
|
List of matching file paths (bounded by MAX_GLOB)
|
|
72
83
|
"""
|
|
73
84
|
matches, stack = [], [root]
|
|
74
|
-
|
|
85
|
+
|
|
75
86
|
# Handle multiple extensions in include pattern like "*.{py,js,ts}"
|
|
76
|
-
if
|
|
87
|
+
if "{" in include and "}" in include:
|
|
77
88
|
# Convert *.{py,js,ts} to multiple patterns
|
|
78
|
-
base, ext_part = include.split(
|
|
79
|
-
ext_part = ext_part.split(
|
|
80
|
-
extensions = ext_part.split(
|
|
89
|
+
base, ext_part = include.split("{", 1)
|
|
90
|
+
ext_part = ext_part.split("}", 1)[0]
|
|
91
|
+
extensions = ext_part.split(",")
|
|
81
92
|
include_patterns = [base + ext.strip() for ext in extensions]
|
|
82
|
-
include_regexes = [
|
|
93
|
+
include_regexes = [
|
|
94
|
+
re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns
|
|
95
|
+
]
|
|
83
96
|
else:
|
|
84
97
|
include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
|
|
85
|
-
|
|
98
|
+
|
|
86
99
|
exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
|
|
87
|
-
|
|
100
|
+
|
|
88
101
|
while stack and len(matches) < MAX_GLOB:
|
|
89
102
|
current_dir = stack.pop()
|
|
90
|
-
|
|
103
|
+
|
|
91
104
|
try:
|
|
92
105
|
with os.scandir(current_dir) as entries:
|
|
93
106
|
for entry in entries:
|
|
@@ -95,33 +108,33 @@ def fast_glob(root: Path, include: str, exclude: str = None) -> List[Path]:
|
|
|
95
108
|
if entry.is_dir(follow_symlinks=False):
|
|
96
109
|
if entry.name not in EXCLUDE_DIRS:
|
|
97
110
|
stack.append(Path(entry.path))
|
|
98
|
-
|
|
111
|
+
|
|
99
112
|
# Check file matches
|
|
100
113
|
elif entry.is_file(follow_symlinks=False):
|
|
101
114
|
# Check against any include pattern
|
|
102
115
|
matches_include = any(regex.match(entry.name) for regex in include_regexes)
|
|
103
|
-
|
|
116
|
+
|
|
104
117
|
if matches_include:
|
|
105
118
|
if not exclude_rx or not exclude_rx.match(entry.name):
|
|
106
119
|
matches.append(Path(entry.path))
|
|
107
|
-
|
|
120
|
+
|
|
108
121
|
except (PermissionError, OSError):
|
|
109
122
|
continue # Skip inaccessible directories
|
|
110
|
-
|
|
123
|
+
|
|
111
124
|
return matches[:MAX_GLOB]
|
|
112
125
|
|
|
113
126
|
|
|
114
127
|
class ParallelGrep(BaseTool):
|
|
115
128
|
"""Advanced parallel grep tool with multiple search strategies."""
|
|
116
|
-
|
|
129
|
+
|
|
117
130
|
def __init__(self, ui_logger=None):
|
|
118
131
|
super().__init__(ui_logger)
|
|
119
132
|
self._executor = ThreadPoolExecutor(max_workers=8)
|
|
120
|
-
|
|
133
|
+
|
|
121
134
|
@property
|
|
122
135
|
def tool_name(self) -> str:
|
|
123
136
|
return "grep"
|
|
124
|
-
|
|
137
|
+
|
|
125
138
|
async def _execute(
|
|
126
139
|
self,
|
|
127
140
|
pattern: str,
|
|
@@ -132,11 +145,11 @@ class ParallelGrep(BaseTool):
|
|
|
132
145
|
exclude_files: Optional[str] = None,
|
|
133
146
|
max_results: int = 50,
|
|
134
147
|
context_lines: int = 2,
|
|
135
|
-
search_type: str = "smart" # smart, ripgrep, python, hybrid
|
|
148
|
+
search_type: str = "smart", # smart, ripgrep, python, hybrid
|
|
136
149
|
) -> str:
|
|
137
150
|
"""
|
|
138
151
|
Execute parallel grep search with fast-glob prefiltering and multiple strategies.
|
|
139
|
-
|
|
152
|
+
|
|
140
153
|
Args:
|
|
141
154
|
pattern: Search pattern (literal text or regex)
|
|
142
155
|
directory: Directory to search (default: current)
|
|
@@ -147,7 +160,7 @@ class ParallelGrep(BaseTool):
|
|
|
147
160
|
max_results: Maximum number of results to return
|
|
148
161
|
context_lines: Number of context lines before/after matches
|
|
149
162
|
search_type: Search strategy to use
|
|
150
|
-
|
|
163
|
+
|
|
151
164
|
Returns:
|
|
152
165
|
Formatted search results
|
|
153
166
|
"""
|
|
@@ -155,18 +168,14 @@ class ParallelGrep(BaseTool):
|
|
|
155
168
|
# 1️⃣ Fast-glob prefilter to find candidate files
|
|
156
169
|
include_pattern = include_files or "*"
|
|
157
170
|
exclude_pattern = exclude_files
|
|
158
|
-
|
|
171
|
+
|
|
159
172
|
candidates = await asyncio.get_event_loop().run_in_executor(
|
|
160
|
-
self._executor,
|
|
161
|
-
fast_glob,
|
|
162
|
-
Path(directory),
|
|
163
|
-
include_pattern,
|
|
164
|
-
exclude_pattern
|
|
173
|
+
self._executor, fast_glob, Path(directory), include_pattern, exclude_pattern
|
|
165
174
|
)
|
|
166
|
-
|
|
175
|
+
|
|
167
176
|
if not candidates:
|
|
168
177
|
return f"No files found matching pattern: {include_pattern}"
|
|
169
|
-
|
|
178
|
+
|
|
170
179
|
# 2️⃣ Smart strategy selection based on candidate count
|
|
171
180
|
original_search_type = search_type
|
|
172
181
|
if search_type == "smart":
|
|
@@ -175,11 +184,11 @@ class ParallelGrep(BaseTool):
|
|
|
175
184
|
search_type = "python"
|
|
176
185
|
elif len(candidates) <= 1000:
|
|
177
186
|
# Medium set - Ripgrep optimal for this range
|
|
178
|
-
search_type = "ripgrep"
|
|
187
|
+
search_type = "ripgrep"
|
|
179
188
|
else:
|
|
180
189
|
# Large set - Hybrid for best coverage and redundancy
|
|
181
190
|
search_type = "hybrid"
|
|
182
|
-
|
|
191
|
+
|
|
183
192
|
# 3️⃣ Create search configuration
|
|
184
193
|
# Note: include_patterns/exclude_patterns now only used for legacy compatibility
|
|
185
194
|
include_patterns = self._parse_patterns(include_files) if include_files else ["*"]
|
|
@@ -190,9 +199,9 @@ class ParallelGrep(BaseTool):
|
|
|
190
199
|
max_results=max_results,
|
|
191
200
|
context_lines=context_lines,
|
|
192
201
|
include_patterns=include_patterns,
|
|
193
|
-
exclude_patterns=exclude_patterns
|
|
202
|
+
exclude_patterns=exclude_patterns,
|
|
194
203
|
)
|
|
195
|
-
|
|
204
|
+
|
|
196
205
|
# 4️⃣ Execute chosen strategy with pre-filtered candidates
|
|
197
206
|
if search_type == "ripgrep":
|
|
198
207
|
results = await self._ripgrep_search_filtered(pattern, candidates, config)
|
|
@@ -202,30 +211,29 @@ class ParallelGrep(BaseTool):
|
|
|
202
211
|
results = await self._hybrid_search_filtered(pattern, candidates, config)
|
|
203
212
|
else:
|
|
204
213
|
raise ToolExecutionError(f"Unknown search type: {search_type}")
|
|
205
|
-
|
|
214
|
+
|
|
206
215
|
# 5️⃣ Format and return results with strategy info
|
|
207
216
|
strategy_info = f"Strategy: {search_type} (was {original_search_type}), Files: {len(candidates)}/{MAX_GLOB}"
|
|
208
217
|
formatted_results = self._format_results(results, pattern, config)
|
|
209
|
-
|
|
218
|
+
|
|
210
219
|
# Add strategy info to results
|
|
211
220
|
if formatted_results.startswith("Found"):
|
|
212
|
-
lines = formatted_results.split(
|
|
213
|
-
lines[1] =
|
|
214
|
-
|
|
221
|
+
lines = formatted_results.split("\n")
|
|
222
|
+
lines[1] = (
|
|
223
|
+
f"Strategy: {search_type} | Candidates: {len(candidates)} files | " + lines[1]
|
|
224
|
+
)
|
|
225
|
+
return "\n".join(lines)
|
|
215
226
|
else:
|
|
216
227
|
return f"{formatted_results}\n\n{strategy_info}"
|
|
217
|
-
|
|
228
|
+
|
|
218
229
|
except Exception as e:
|
|
219
230
|
raise ToolExecutionError(f"Grep search failed: {str(e)}")
|
|
220
|
-
|
|
231
|
+
|
|
221
232
|
async def _smart_search(
|
|
222
|
-
self,
|
|
223
|
-
pattern: str,
|
|
224
|
-
directory: str,
|
|
225
|
-
config: SearchConfig
|
|
233
|
+
self, pattern: str, directory: str, config: SearchConfig
|
|
226
234
|
) -> List[SearchResult]:
|
|
227
235
|
"""Smart search that chooses optimal strategy based on context."""
|
|
228
|
-
|
|
236
|
+
|
|
229
237
|
# Try ripgrep first (fastest for large codebases)
|
|
230
238
|
try:
|
|
231
239
|
results = await self._ripgrep_search(pattern, directory, config)
|
|
@@ -233,21 +241,18 @@ class ParallelGrep(BaseTool):
|
|
|
233
241
|
return results
|
|
234
242
|
except:
|
|
235
243
|
pass
|
|
236
|
-
|
|
244
|
+
|
|
237
245
|
# Fallback to Python implementation
|
|
238
246
|
return await self._python_search(pattern, directory, config)
|
|
239
|
-
|
|
247
|
+
|
|
240
248
|
async def _ripgrep_search(
|
|
241
|
-
self,
|
|
242
|
-
pattern: str,
|
|
243
|
-
directory: str,
|
|
244
|
-
config: SearchConfig
|
|
249
|
+
self, pattern: str, directory: str, config: SearchConfig
|
|
245
250
|
) -> List[SearchResult]:
|
|
246
251
|
"""Use ripgrep for high-performance searching."""
|
|
247
|
-
|
|
252
|
+
|
|
248
253
|
def run_ripgrep():
|
|
249
254
|
cmd = ["rg", "--json"]
|
|
250
|
-
|
|
255
|
+
|
|
251
256
|
# Add options based on config
|
|
252
257
|
if not config.case_sensitive:
|
|
253
258
|
cmd.append("--ignore-case")
|
|
@@ -255,100 +260,87 @@ class ParallelGrep(BaseTool):
|
|
|
255
260
|
cmd.extend(["--context", str(config.context_lines)])
|
|
256
261
|
if config.max_results:
|
|
257
262
|
cmd.extend(["--max-count", str(config.max_results)])
|
|
258
|
-
|
|
263
|
+
|
|
259
264
|
# Add include/exclude patterns
|
|
260
265
|
for pattern_str in config.include_patterns:
|
|
261
266
|
if pattern_str != "*":
|
|
262
267
|
cmd.extend(["--glob", pattern_str])
|
|
263
268
|
for pattern_str in config.exclude_patterns:
|
|
264
269
|
cmd.extend(["--glob", f"!{pattern_str}"])
|
|
265
|
-
|
|
270
|
+
|
|
266
271
|
# Add pattern and directory
|
|
267
272
|
cmd.extend([pattern, directory])
|
|
268
|
-
|
|
273
|
+
|
|
269
274
|
try:
|
|
270
275
|
result = subprocess.run(
|
|
271
|
-
cmd,
|
|
272
|
-
capture_output=True,
|
|
273
|
-
text=True,
|
|
274
|
-
timeout=config.timeout_seconds
|
|
276
|
+
cmd, capture_output=True, text=True, timeout=config.timeout_seconds
|
|
275
277
|
)
|
|
276
278
|
return result.stdout if result.returncode == 0 else None
|
|
277
279
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
278
280
|
return None
|
|
279
|
-
|
|
281
|
+
|
|
280
282
|
# Run ripgrep in thread pool
|
|
281
|
-
output = await asyncio.get_event_loop().run_in_executor(
|
|
282
|
-
|
|
283
|
-
)
|
|
284
|
-
|
|
283
|
+
output = await asyncio.get_event_loop().run_in_executor(self._executor, run_ripgrep)
|
|
284
|
+
|
|
285
285
|
if not output:
|
|
286
286
|
return []
|
|
287
|
-
|
|
287
|
+
|
|
288
288
|
# Parse ripgrep JSON output
|
|
289
289
|
return self._parse_ripgrep_output(output)
|
|
290
|
-
|
|
290
|
+
|
|
291
291
|
async def _python_search(
|
|
292
|
-
self,
|
|
293
|
-
pattern: str,
|
|
294
|
-
directory: str,
|
|
295
|
-
config: SearchConfig
|
|
292
|
+
self, pattern: str, directory: str, config: SearchConfig
|
|
296
293
|
) -> List[SearchResult]:
|
|
297
294
|
"""Pure Python parallel search implementation."""
|
|
298
|
-
|
|
295
|
+
|
|
299
296
|
# Find all files to search
|
|
300
297
|
files = await self._find_files(directory, config)
|
|
301
|
-
|
|
298
|
+
|
|
302
299
|
# Prepare search pattern
|
|
303
300
|
if config.use_regex:
|
|
304
301
|
flags = 0 if config.case_sensitive else re.IGNORECASE
|
|
305
302
|
regex_pattern = re.compile(pattern, flags)
|
|
306
303
|
else:
|
|
307
304
|
regex_pattern = None
|
|
308
|
-
|
|
305
|
+
|
|
309
306
|
# Create search tasks for parallel execution
|
|
310
307
|
search_tasks = []
|
|
311
308
|
for file_path in files:
|
|
312
|
-
task = self._search_file(
|
|
313
|
-
file_path, pattern, regex_pattern, config
|
|
314
|
-
)
|
|
309
|
+
task = self._search_file(file_path, pattern, regex_pattern, config)
|
|
315
310
|
search_tasks.append(task)
|
|
316
|
-
|
|
311
|
+
|
|
317
312
|
# Execute searches in parallel
|
|
318
313
|
all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
319
|
-
|
|
314
|
+
|
|
320
315
|
# Flatten results and filter out exceptions
|
|
321
316
|
results = []
|
|
322
317
|
for file_results in all_results:
|
|
323
318
|
if isinstance(file_results, list):
|
|
324
319
|
results.extend(file_results)
|
|
325
|
-
|
|
320
|
+
|
|
326
321
|
# Sort by relevance and limit results
|
|
327
322
|
results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
328
|
-
return results[:config.max_results]
|
|
329
|
-
|
|
323
|
+
return results[: config.max_results]
|
|
324
|
+
|
|
330
325
|
async def _hybrid_search(
|
|
331
|
-
self,
|
|
332
|
-
pattern: str,
|
|
333
|
-
directory: str,
|
|
334
|
-
config: SearchConfig
|
|
326
|
+
self, pattern: str, directory: str, config: SearchConfig
|
|
335
327
|
) -> List[SearchResult]:
|
|
336
328
|
"""Hybrid approach using multiple search methods concurrently."""
|
|
337
|
-
|
|
329
|
+
|
|
338
330
|
# Run multiple search strategies in parallel
|
|
339
331
|
tasks = [
|
|
340
332
|
self._ripgrep_search(pattern, directory, config),
|
|
341
|
-
self._python_search(pattern, directory, config)
|
|
333
|
+
self._python_search(pattern, directory, config),
|
|
342
334
|
]
|
|
343
|
-
|
|
335
|
+
|
|
344
336
|
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
345
|
-
|
|
337
|
+
|
|
346
338
|
# Merge and deduplicate results
|
|
347
339
|
all_results = []
|
|
348
340
|
for results in results_list:
|
|
349
341
|
if isinstance(results, list):
|
|
350
342
|
all_results.extend(results)
|
|
351
|
-
|
|
343
|
+
|
|
352
344
|
# Deduplicate by file path and line number
|
|
353
345
|
seen = set()
|
|
354
346
|
unique_results = []
|
|
@@ -357,25 +349,23 @@ class ParallelGrep(BaseTool):
|
|
|
357
349
|
if key not in seen:
|
|
358
350
|
seen.add(key)
|
|
359
351
|
unique_results.append(result)
|
|
360
|
-
|
|
352
|
+
|
|
361
353
|
# Sort and limit
|
|
362
354
|
unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
363
|
-
return unique_results[:config.max_results]
|
|
364
|
-
|
|
355
|
+
return unique_results[: config.max_results]
|
|
356
|
+
|
|
365
357
|
# ====== NEW FILTERED SEARCH METHODS ======
|
|
366
|
-
|
|
358
|
+
|
|
367
359
|
async def _ripgrep_search_filtered(
|
|
368
|
-
self,
|
|
369
|
-
pattern: str,
|
|
370
|
-
candidates: List[Path],
|
|
371
|
-
config: SearchConfig
|
|
360
|
+
self, pattern: str, candidates: List[Path], config: SearchConfig
|
|
372
361
|
) -> List[SearchResult]:
|
|
373
362
|
"""
|
|
374
363
|
Run ripgrep on pre-filtered file list.
|
|
375
364
|
"""
|
|
365
|
+
|
|
376
366
|
def run_ripgrep_filtered():
|
|
377
367
|
cmd = ["rg", "--json"]
|
|
378
|
-
|
|
368
|
+
|
|
379
369
|
# Add configuration flags
|
|
380
370
|
if not config.case_sensitive:
|
|
381
371
|
cmd.append("--ignore-case")
|
|
@@ -383,34 +373,28 @@ class ParallelGrep(BaseTool):
|
|
|
383
373
|
cmd.extend(["--context", str(config.context_lines)])
|
|
384
374
|
if config.max_results:
|
|
385
375
|
cmd.extend(["--max-count", str(config.max_results)])
|
|
386
|
-
|
|
376
|
+
|
|
387
377
|
# Add pattern and explicit file list
|
|
388
378
|
cmd.append(pattern)
|
|
389
379
|
cmd.extend(str(f) for f in candidates)
|
|
390
|
-
|
|
380
|
+
|
|
391
381
|
try:
|
|
392
382
|
result = subprocess.run(
|
|
393
|
-
cmd,
|
|
394
|
-
capture_output=True,
|
|
395
|
-
text=True,
|
|
396
|
-
timeout=config.timeout_seconds
|
|
383
|
+
cmd, capture_output=True, text=True, timeout=config.timeout_seconds
|
|
397
384
|
)
|
|
398
385
|
return result.stdout if result.returncode == 0 else None
|
|
399
386
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
400
387
|
return None
|
|
401
|
-
|
|
388
|
+
|
|
402
389
|
# Run ripgrep in thread pool
|
|
403
390
|
output = await asyncio.get_event_loop().run_in_executor(
|
|
404
391
|
self._executor, run_ripgrep_filtered
|
|
405
392
|
)
|
|
406
|
-
|
|
393
|
+
|
|
407
394
|
return self._parse_ripgrep_output(output) if output else []
|
|
408
|
-
|
|
395
|
+
|
|
409
396
|
async def _python_search_filtered(
|
|
410
|
-
self,
|
|
411
|
-
pattern: str,
|
|
412
|
-
candidates: List[Path],
|
|
413
|
-
config: SearchConfig
|
|
397
|
+
self, pattern: str, candidates: List[Path], config: SearchConfig
|
|
414
398
|
) -> List[SearchResult]:
|
|
415
399
|
"""
|
|
416
400
|
Run Python parallel search on pre-filtered candidates.
|
|
@@ -421,52 +405,47 @@ class ParallelGrep(BaseTool):
|
|
|
421
405
|
regex_pattern = re.compile(pattern, flags)
|
|
422
406
|
else:
|
|
423
407
|
regex_pattern = None
|
|
424
|
-
|
|
408
|
+
|
|
425
409
|
# Create search tasks for candidates only
|
|
426
410
|
search_tasks = []
|
|
427
411
|
for file_path in candidates:
|
|
428
|
-
task = self._search_file(
|
|
429
|
-
file_path, pattern, regex_pattern, config
|
|
430
|
-
)
|
|
412
|
+
task = self._search_file(file_path, pattern, regex_pattern, config)
|
|
431
413
|
search_tasks.append(task)
|
|
432
|
-
|
|
414
|
+
|
|
433
415
|
# Execute searches in parallel
|
|
434
416
|
all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
435
|
-
|
|
417
|
+
|
|
436
418
|
# Flatten results and filter out exceptions
|
|
437
419
|
results = []
|
|
438
420
|
for file_results in all_results:
|
|
439
421
|
if isinstance(file_results, list):
|
|
440
422
|
results.extend(file_results)
|
|
441
|
-
|
|
423
|
+
|
|
442
424
|
# Sort by relevance and limit results
|
|
443
425
|
results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
444
|
-
return results[:config.max_results]
|
|
445
|
-
|
|
426
|
+
return results[: config.max_results]
|
|
427
|
+
|
|
446
428
|
async def _hybrid_search_filtered(
|
|
447
|
-
self,
|
|
448
|
-
pattern: str,
|
|
449
|
-
candidates: List[Path],
|
|
450
|
-
config: SearchConfig
|
|
429
|
+
self, pattern: str, candidates: List[Path], config: SearchConfig
|
|
451
430
|
) -> List[SearchResult]:
|
|
452
431
|
"""
|
|
453
432
|
Hybrid approach using multiple search methods concurrently on pre-filtered candidates.
|
|
454
433
|
"""
|
|
455
|
-
|
|
434
|
+
|
|
456
435
|
# Run multiple search strategies in parallel
|
|
457
436
|
tasks = [
|
|
458
437
|
self._ripgrep_search_filtered(pattern, candidates, config),
|
|
459
|
-
self._python_search_filtered(pattern, candidates, config)
|
|
438
|
+
self._python_search_filtered(pattern, candidates, config),
|
|
460
439
|
]
|
|
461
|
-
|
|
440
|
+
|
|
462
441
|
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
463
|
-
|
|
442
|
+
|
|
464
443
|
# Merge and deduplicate results
|
|
465
444
|
all_results = []
|
|
466
445
|
for results in results_list:
|
|
467
446
|
if isinstance(results, list):
|
|
468
447
|
all_results.extend(results)
|
|
469
|
-
|
|
448
|
+
|
|
470
449
|
# Deduplicate by file path and line number
|
|
471
450
|
seen = set()
|
|
472
451
|
unique_results = []
|
|
@@ -475,69 +454,65 @@ class ParallelGrep(BaseTool):
|
|
|
475
454
|
if key not in seen:
|
|
476
455
|
seen.add(key)
|
|
477
456
|
unique_results.append(result)
|
|
478
|
-
|
|
457
|
+
|
|
479
458
|
# Sort and limit
|
|
480
459
|
unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
481
|
-
return unique_results[:config.max_results]
|
|
482
|
-
|
|
483
|
-
async def _find_files(
|
|
484
|
-
self,
|
|
485
|
-
directory: str,
|
|
486
|
-
config: SearchConfig
|
|
487
|
-
) -> List[Path]:
|
|
460
|
+
return unique_results[: config.max_results]
|
|
461
|
+
|
|
462
|
+
async def _find_files(self, directory: str, config: SearchConfig) -> List[Path]:
|
|
488
463
|
"""Find all files matching include/exclude patterns."""
|
|
489
|
-
|
|
464
|
+
|
|
490
465
|
def find_files_sync():
|
|
491
466
|
files = []
|
|
492
467
|
dir_path = Path(directory)
|
|
493
|
-
|
|
468
|
+
|
|
494
469
|
for file_path in dir_path.rglob("*"):
|
|
495
470
|
if not file_path.is_file():
|
|
496
471
|
continue
|
|
497
|
-
|
|
472
|
+
|
|
498
473
|
# Check file size
|
|
499
474
|
try:
|
|
500
475
|
if file_path.stat().st_size > config.max_file_size:
|
|
501
476
|
continue
|
|
502
477
|
except OSError:
|
|
503
478
|
continue
|
|
504
|
-
|
|
479
|
+
|
|
505
480
|
# Check include patterns
|
|
506
|
-
if not any(
|
|
507
|
-
|
|
481
|
+
if not any(
|
|
482
|
+
fnmatch.fnmatch(str(file_path), pattern) for pattern in config.include_patterns
|
|
483
|
+
):
|
|
508
484
|
continue
|
|
509
|
-
|
|
485
|
+
|
|
510
486
|
# Check exclude patterns
|
|
511
|
-
if any(
|
|
512
|
-
|
|
487
|
+
if any(
|
|
488
|
+
fnmatch.fnmatch(str(file_path), pattern) for pattern in config.exclude_patterns
|
|
489
|
+
):
|
|
513
490
|
continue
|
|
514
|
-
|
|
491
|
+
|
|
515
492
|
files.append(file_path)
|
|
516
|
-
|
|
493
|
+
|
|
517
494
|
return files
|
|
518
|
-
|
|
519
|
-
return await asyncio.get_event_loop().run_in_executor(
|
|
520
|
-
|
|
521
|
-
)
|
|
522
|
-
|
|
495
|
+
|
|
496
|
+
return await asyncio.get_event_loop().run_in_executor(self._executor, find_files_sync)
|
|
497
|
+
|
|
523
498
|
async def _search_file(
|
|
524
499
|
self,
|
|
525
500
|
file_path: Path,
|
|
526
501
|
pattern: str,
|
|
527
502
|
regex_pattern: Optional[re.Pattern],
|
|
528
|
-
config: SearchConfig
|
|
503
|
+
config: SearchConfig,
|
|
529
504
|
) -> List[SearchResult]:
|
|
530
505
|
"""Search a single file for the pattern."""
|
|
531
|
-
|
|
506
|
+
|
|
532
507
|
def search_file_sync():
|
|
533
508
|
try:
|
|
534
|
-
with file_path.open(
|
|
509
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
535
510
|
lines = f.readlines()
|
|
536
|
-
|
|
511
|
+
|
|
537
512
|
results = []
|
|
538
513
|
for i, line in enumerate(lines):
|
|
539
|
-
line = line.rstrip(
|
|
540
|
-
|
|
514
|
+
line = line.rstrip("\n\r")
|
|
515
|
+
|
|
541
516
|
# Search for pattern
|
|
542
517
|
if regex_pattern:
|
|
543
518
|
matches = list(regex_pattern.finditer(line))
|
|
@@ -545,170 +520,155 @@ class ParallelGrep(BaseTool):
|
|
|
545
520
|
# Simple string search
|
|
546
521
|
search_line = line if config.case_sensitive else line.lower()
|
|
547
522
|
search_pattern = pattern if config.case_sensitive else pattern.lower()
|
|
548
|
-
|
|
523
|
+
|
|
549
524
|
matches = []
|
|
550
525
|
start = 0
|
|
551
526
|
while True:
|
|
552
527
|
pos = search_line.find(search_pattern, start)
|
|
553
528
|
if pos == -1:
|
|
554
529
|
break
|
|
530
|
+
|
|
555
531
|
# Create a simple match object
|
|
556
532
|
class SimpleMatch:
|
|
557
533
|
def __init__(self, start_pos, end_pos):
|
|
558
534
|
self._start = start_pos
|
|
559
535
|
self._end = end_pos
|
|
536
|
+
|
|
560
537
|
def start(self):
|
|
561
538
|
return self._start
|
|
539
|
+
|
|
562
540
|
def end(self):
|
|
563
541
|
return self._end
|
|
542
|
+
|
|
564
543
|
match = SimpleMatch(pos, pos + len(search_pattern))
|
|
565
544
|
matches.append(match)
|
|
566
545
|
start = pos + 1
|
|
567
|
-
|
|
546
|
+
|
|
568
547
|
# Create results for each match
|
|
569
548
|
for match in matches:
|
|
570
549
|
# Get context lines
|
|
571
550
|
context_start = max(0, i - config.context_lines)
|
|
572
551
|
context_end = min(len(lines), i + config.context_lines + 1)
|
|
573
|
-
|
|
574
|
-
context_before = [
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
]
|
|
578
|
-
context_after = [
|
|
579
|
-
lines[j].rstrip('\n\r')
|
|
580
|
-
for j in range(i + 1, context_end)
|
|
581
|
-
]
|
|
582
|
-
|
|
552
|
+
|
|
553
|
+
context_before = [lines[j].rstrip("\n\r") for j in range(context_start, i)]
|
|
554
|
+
context_after = [lines[j].rstrip("\n\r") for j in range(i + 1, context_end)]
|
|
555
|
+
|
|
583
556
|
# Calculate relevance score
|
|
584
|
-
relevance = self._calculate_relevance(
|
|
585
|
-
|
|
586
|
-
)
|
|
587
|
-
|
|
557
|
+
relevance = self._calculate_relevance(str(file_path), line, pattern, match)
|
|
558
|
+
|
|
588
559
|
result = SearchResult(
|
|
589
560
|
file_path=str(file_path),
|
|
590
561
|
line_number=i + 1,
|
|
591
562
|
line_content=line,
|
|
592
|
-
match_start=match.start() if hasattr(match,
|
|
593
|
-
match_end=match.end() if hasattr(match,
|
|
563
|
+
match_start=match.start() if hasattr(match, "start") else match.start(),
|
|
564
|
+
match_end=match.end() if hasattr(match, "end") else match.end(),
|
|
594
565
|
context_before=context_before,
|
|
595
566
|
context_after=context_after,
|
|
596
|
-
relevance_score=relevance
|
|
567
|
+
relevance_score=relevance,
|
|
597
568
|
)
|
|
598
569
|
results.append(result)
|
|
599
|
-
|
|
570
|
+
|
|
600
571
|
return results
|
|
601
|
-
|
|
572
|
+
|
|
602
573
|
except Exception:
|
|
603
574
|
return []
|
|
604
|
-
|
|
605
|
-
return await asyncio.get_event_loop().run_in_executor(
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def _calculate_relevance(
|
|
610
|
-
self,
|
|
611
|
-
file_path: str,
|
|
612
|
-
line: str,
|
|
613
|
-
pattern: str,
|
|
614
|
-
match
|
|
615
|
-
) -> float:
|
|
575
|
+
|
|
576
|
+
return await asyncio.get_event_loop().run_in_executor(self._executor, search_file_sync)
|
|
577
|
+
|
|
578
|
+
def _calculate_relevance(self, file_path: str, line: str, pattern: str, match) -> float:
|
|
616
579
|
"""Calculate relevance score for a search result."""
|
|
617
580
|
score = 0.0
|
|
618
|
-
|
|
581
|
+
|
|
619
582
|
# Base score
|
|
620
583
|
score += 1.0
|
|
621
|
-
|
|
584
|
+
|
|
622
585
|
# Boost for exact matches
|
|
623
586
|
if pattern.lower() in line.lower():
|
|
624
587
|
score += 0.5
|
|
625
|
-
|
|
588
|
+
|
|
626
589
|
# Boost for matches at word boundaries
|
|
627
590
|
if match.start() == 0 or not line[match.start() - 1].isalnum():
|
|
628
591
|
score += 0.3
|
|
629
|
-
|
|
592
|
+
|
|
630
593
|
# Boost for certain file types
|
|
631
|
-
if file_path.endswith((
|
|
594
|
+
if file_path.endswith((".py", ".js", ".ts", ".java", ".cpp", ".c")):
|
|
632
595
|
score += 0.2
|
|
633
|
-
|
|
596
|
+
|
|
634
597
|
# Boost for matches in comments or docstrings
|
|
635
598
|
stripped_line = line.strip()
|
|
636
|
-
if stripped_line.startswith((
|
|
599
|
+
if stripped_line.startswith(("#", "//", "/*", '"""', "'''")):
|
|
637
600
|
score += 0.1
|
|
638
|
-
|
|
601
|
+
|
|
639
602
|
return score
|
|
640
|
-
|
|
603
|
+
|
|
641
604
|
def _parse_ripgrep_output(self, output: str) -> List[SearchResult]:
|
|
642
605
|
"""Parse ripgrep JSON output into SearchResult objects."""
|
|
643
606
|
import json
|
|
644
|
-
|
|
607
|
+
|
|
645
608
|
results = []
|
|
646
|
-
for line in output.strip().split(
|
|
609
|
+
for line in output.strip().split("\n"):
|
|
647
610
|
if not line:
|
|
648
611
|
continue
|
|
649
|
-
|
|
612
|
+
|
|
650
613
|
try:
|
|
651
614
|
data = json.loads(line)
|
|
652
|
-
if data.get(
|
|
615
|
+
if data.get("type") != "match":
|
|
653
616
|
continue
|
|
654
|
-
|
|
655
|
-
match_data = data[
|
|
617
|
+
|
|
618
|
+
match_data = data["data"]
|
|
656
619
|
result = SearchResult(
|
|
657
|
-
file_path=match_data[
|
|
658
|
-
line_number=match_data[
|
|
659
|
-
line_content=match_data[
|
|
660
|
-
match_start=match_data[
|
|
661
|
-
match_end=match_data[
|
|
620
|
+
file_path=match_data["path"]["text"],
|
|
621
|
+
line_number=match_data["line_number"],
|
|
622
|
+
line_content=match_data["lines"]["text"].rstrip("\n\r"),
|
|
623
|
+
match_start=match_data["submatches"][0]["start"],
|
|
624
|
+
match_end=match_data["submatches"][0]["end"],
|
|
662
625
|
context_before=[], # Ripgrep context handling would go here
|
|
663
626
|
context_after=[],
|
|
664
|
-
relevance_score=1.0
|
|
627
|
+
relevance_score=1.0,
|
|
665
628
|
)
|
|
666
629
|
results.append(result)
|
|
667
630
|
except (json.JSONDecodeError, KeyError):
|
|
668
631
|
continue
|
|
669
|
-
|
|
632
|
+
|
|
670
633
|
return results
|
|
671
|
-
|
|
634
|
+
|
|
672
635
|
def _parse_patterns(self, patterns: str) -> List[str]:
|
|
673
636
|
"""Parse comma-separated file patterns."""
|
|
674
|
-
return [p.strip() for p in patterns.split(
|
|
675
|
-
|
|
637
|
+
return [p.strip() for p in patterns.split(",") if p.strip()]
|
|
638
|
+
|
|
676
639
|
def _format_results(
|
|
677
|
-
self,
|
|
678
|
-
results: List[SearchResult],
|
|
679
|
-
pattern: str,
|
|
680
|
-
config: SearchConfig
|
|
640
|
+
self, results: List[SearchResult], pattern: str, config: SearchConfig
|
|
681
641
|
) -> str:
|
|
682
642
|
"""Format search results for display."""
|
|
683
643
|
if not results:
|
|
684
644
|
return f"No matches found for pattern: {pattern}"
|
|
685
|
-
|
|
645
|
+
|
|
686
646
|
output = []
|
|
687
647
|
output.append(f"Found {len(results)} matches for pattern: {pattern}")
|
|
688
648
|
output.append("=" * 60)
|
|
689
|
-
|
|
649
|
+
|
|
690
650
|
for result in results:
|
|
691
651
|
# File header
|
|
692
652
|
output.append(f"\n📁 {result.file_path}:{result.line_number}")
|
|
693
|
-
|
|
653
|
+
|
|
694
654
|
# Context before
|
|
695
655
|
for i, context_line in enumerate(result.context_before):
|
|
696
656
|
line_num = result.line_number - len(result.context_before) + i
|
|
697
657
|
output.append(f" {line_num:4d}│ {context_line}")
|
|
698
|
-
|
|
658
|
+
|
|
699
659
|
# Main match line with highlighting
|
|
700
660
|
line_content = result.line_content
|
|
701
|
-
before_match = line_content[:result.match_start]
|
|
702
|
-
match_text = line_content[result.match_start:result.match_end]
|
|
703
|
-
after_match = line_content[result.match_end:]
|
|
704
|
-
|
|
661
|
+
before_match = line_content[: result.match_start]
|
|
662
|
+
match_text = line_content[result.match_start : result.match_end]
|
|
663
|
+
after_match = line_content[result.match_end :]
|
|
664
|
+
|
|
705
665
|
output.append(f"▶ {result.line_number:4d}│ {before_match}⟨{match_text}⟩{after_match}")
|
|
706
|
-
|
|
666
|
+
|
|
707
667
|
# Context after
|
|
708
668
|
for i, context_line in enumerate(result.context_after):
|
|
709
669
|
line_num = result.line_number + i + 1
|
|
710
670
|
output.append(f" {line_num:4d}│ {context_line}")
|
|
711
|
-
|
|
671
|
+
|
|
712
672
|
return "\n".join(output)
|
|
713
673
|
|
|
714
674
|
|
|
@@ -722,11 +682,11 @@ async def grep(
|
|
|
722
682
|
exclude_files: Optional[str] = None,
|
|
723
683
|
max_results: int = 50,
|
|
724
684
|
context_lines: int = 2,
|
|
725
|
-
search_type: str = "smart"
|
|
685
|
+
search_type: str = "smart",
|
|
726
686
|
) -> str:
|
|
727
687
|
"""
|
|
728
688
|
Advanced parallel grep search with multiple strategies.
|
|
729
|
-
|
|
689
|
+
|
|
730
690
|
Args:
|
|
731
691
|
pattern: Search pattern (literal text or regex)
|
|
732
692
|
directory: Directory to search (default: current directory)
|
|
@@ -737,10 +697,10 @@ async def grep(
|
|
|
737
697
|
max_results: Maximum number of results to return (default: 50)
|
|
738
698
|
context_lines: Number of context lines before/after matches (default: 2)
|
|
739
699
|
search_type: Search strategy - "smart", "ripgrep", "python", or "hybrid" (default: "smart")
|
|
740
|
-
|
|
700
|
+
|
|
741
701
|
Returns:
|
|
742
702
|
Formatted search results with file paths, line numbers, and context
|
|
743
|
-
|
|
703
|
+
|
|
744
704
|
Examples:
|
|
745
705
|
grep("TODO", ".", max_results=20)
|
|
746
706
|
grep("function.*export", "src/", use_regex=True, include_files="*.js,*.ts")
|
|
@@ -756,5 +716,5 @@ async def grep(
|
|
|
756
716
|
exclude_files=exclude_files,
|
|
757
717
|
max_results=max_results,
|
|
758
718
|
context_lines=context_lines,
|
|
759
|
-
search_type=search_type
|
|
760
|
-
)
|
|
719
|
+
search_type=search_type,
|
|
720
|
+
)
|