tunacode-cli 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tunacode-cli might be problematic. Click here for more details.
- tunacode/cli/commands.py +205 -14
- tunacode/cli/repl.py +41 -2
- tunacode/configuration/defaults.py +1 -0
- tunacode/constants.py +1 -1
- tunacode/core/agents/main.py +220 -2
- tunacode/core/state.py +10 -2
- tunacode/prompts/system.txt +22 -0
- tunacode/tools/__init__.py +1 -0
- tunacode/tools/bash.py +252 -0
- tunacode/tools/grep.py +760 -0
- tunacode/tools/read_file.py +15 -10
- tunacode/tools/run_command.py +13 -7
- tunacode/tools/update_file.py +9 -10
- tunacode/tools/write_file.py +8 -9
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/METADATA +50 -14
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/RECORD +20 -18
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/WHEEL +0 -0
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/entry_points.txt +0 -0
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/licenses/LICENSE +0 -0
- {tunacode_cli-0.0.12.dist-info → tunacode_cli-0.0.14.dist-info}/top_level.txt +0 -0
tunacode/tools/grep.py
ADDED
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parallel grep tool for TunaCode - Enhanced content search with parallel processing.
|
|
3
|
+
|
|
4
|
+
This tool provides sophisticated grep-like functionality with:
|
|
5
|
+
- Parallel file searching across multiple directories
|
|
6
|
+
- Multiple search strategies (literal, regex, fuzzy)
|
|
7
|
+
- Smart result ranking and deduplication
|
|
8
|
+
- Context-aware output formatting
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import re
|
|
13
|
+
import subprocess
|
|
14
|
+
import fnmatch
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
20
|
+
|
|
21
|
+
from tunacode.tools.base import BaseTool
|
|
22
|
+
from tunacode.exceptions import ToolExecutionError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class SearchResult:
|
|
27
|
+
"""Represents a single search match with context."""
|
|
28
|
+
file_path: str
|
|
29
|
+
line_number: int
|
|
30
|
+
line_content: str
|
|
31
|
+
match_start: int
|
|
32
|
+
match_end: int
|
|
33
|
+
context_before: List[str]
|
|
34
|
+
context_after: List[str]
|
|
35
|
+
relevance_score: float = 0.0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class SearchConfig:
|
|
40
|
+
"""Configuration for search operations."""
|
|
41
|
+
case_sensitive: bool = False
|
|
42
|
+
use_regex: bool = False
|
|
43
|
+
max_results: int = 50
|
|
44
|
+
context_lines: int = 2
|
|
45
|
+
include_patterns: List[str] = None
|
|
46
|
+
exclude_patterns: List[str] = None
|
|
47
|
+
max_file_size: int = 1024 * 1024 # 1MB
|
|
48
|
+
timeout_seconds: int = 30
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Fast-Glob Prefilter Configuration
|
|
52
|
+
MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
|
|
53
|
+
GLOB_BATCH = 500 # Streaming batch size
|
|
54
|
+
EXCLUDE_DIRS = { # Common directories to skip
|
|
55
|
+
'node_modules', '.git', '__pycache__',
|
|
56
|
+
'.venv', 'venv', 'dist', 'build', '.pytest_cache',
|
|
57
|
+
'.mypy_cache', '.tox', 'target', 'node_modules'
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def fast_glob(root: Path, include: str, exclude: str = None) -> List[Path]:
|
|
62
|
+
"""
|
|
63
|
+
Lightning-fast filename filtering using os.scandir.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
root: Directory to search
|
|
67
|
+
include: Include pattern (e.g., "*.py", "*.{js,ts}")
|
|
68
|
+
exclude: Exclude pattern (optional)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
List of matching file paths (bounded by MAX_GLOB)
|
|
72
|
+
"""
|
|
73
|
+
matches, stack = [], [root]
|
|
74
|
+
|
|
75
|
+
# Handle multiple extensions in include pattern like "*.{py,js,ts}"
|
|
76
|
+
if '{' in include and '}' in include:
|
|
77
|
+
# Convert *.{py,js,ts} to multiple patterns
|
|
78
|
+
base, ext_part = include.split('{', 1)
|
|
79
|
+
ext_part = ext_part.split('}', 1)[0]
|
|
80
|
+
extensions = ext_part.split(',')
|
|
81
|
+
include_patterns = [base + ext.strip() for ext in extensions]
|
|
82
|
+
include_regexes = [re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns]
|
|
83
|
+
else:
|
|
84
|
+
include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
|
|
85
|
+
|
|
86
|
+
exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
|
|
87
|
+
|
|
88
|
+
while stack and len(matches) < MAX_GLOB:
|
|
89
|
+
current_dir = stack.pop()
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
with os.scandir(current_dir) as entries:
|
|
93
|
+
for entry in entries:
|
|
94
|
+
# Skip common irrelevant directories
|
|
95
|
+
if entry.is_dir(follow_symlinks=False):
|
|
96
|
+
if entry.name not in EXCLUDE_DIRS:
|
|
97
|
+
stack.append(Path(entry.path))
|
|
98
|
+
|
|
99
|
+
# Check file matches
|
|
100
|
+
elif entry.is_file(follow_symlinks=False):
|
|
101
|
+
# Check against any include pattern
|
|
102
|
+
matches_include = any(regex.match(entry.name) for regex in include_regexes)
|
|
103
|
+
|
|
104
|
+
if matches_include:
|
|
105
|
+
if not exclude_rx or not exclude_rx.match(entry.name):
|
|
106
|
+
matches.append(Path(entry.path))
|
|
107
|
+
|
|
108
|
+
except (PermissionError, OSError):
|
|
109
|
+
continue # Skip inaccessible directories
|
|
110
|
+
|
|
111
|
+
return matches[:MAX_GLOB]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class ParallelGrep(BaseTool):
|
|
115
|
+
"""Advanced parallel grep tool with multiple search strategies."""
|
|
116
|
+
|
|
117
|
+
def __init__(self, ui_logger=None):
|
|
118
|
+
super().__init__(ui_logger)
|
|
119
|
+
self._executor = ThreadPoolExecutor(max_workers=8)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def tool_name(self) -> str:
|
|
123
|
+
return "grep"
|
|
124
|
+
|
|
125
|
+
async def _execute(
|
|
126
|
+
self,
|
|
127
|
+
pattern: str,
|
|
128
|
+
directory: str = ".",
|
|
129
|
+
case_sensitive: bool = False,
|
|
130
|
+
use_regex: bool = False,
|
|
131
|
+
include_files: Optional[str] = None,
|
|
132
|
+
exclude_files: Optional[str] = None,
|
|
133
|
+
max_results: int = 50,
|
|
134
|
+
context_lines: int = 2,
|
|
135
|
+
search_type: str = "smart" # smart, ripgrep, python, hybrid
|
|
136
|
+
) -> str:
|
|
137
|
+
"""
|
|
138
|
+
Execute parallel grep search with fast-glob prefiltering and multiple strategies.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
pattern: Search pattern (literal text or regex)
|
|
142
|
+
directory: Directory to search (default: current)
|
|
143
|
+
case_sensitive: Whether search is case sensitive
|
|
144
|
+
use_regex: Whether pattern is a regular expression
|
|
145
|
+
include_files: File patterns to include (e.g., "*.py", "*.{js,ts}")
|
|
146
|
+
exclude_files: File patterns to exclude (e.g., "*.pyc", "node_modules/*")
|
|
147
|
+
max_results: Maximum number of results to return
|
|
148
|
+
context_lines: Number of context lines before/after matches
|
|
149
|
+
search_type: Search strategy to use
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Formatted search results
|
|
153
|
+
"""
|
|
154
|
+
try:
|
|
155
|
+
# 1️⃣ Fast-glob prefilter to find candidate files
|
|
156
|
+
include_pattern = include_files or "*"
|
|
157
|
+
exclude_pattern = exclude_files
|
|
158
|
+
|
|
159
|
+
candidates = await asyncio.get_event_loop().run_in_executor(
|
|
160
|
+
self._executor,
|
|
161
|
+
fast_glob,
|
|
162
|
+
Path(directory),
|
|
163
|
+
include_pattern,
|
|
164
|
+
exclude_pattern
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if not candidates:
|
|
168
|
+
return f"No files found matching pattern: {include_pattern}"
|
|
169
|
+
|
|
170
|
+
# 2️⃣ Smart strategy selection based on candidate count
|
|
171
|
+
original_search_type = search_type
|
|
172
|
+
if search_type == "smart":
|
|
173
|
+
if len(candidates) <= 50:
|
|
174
|
+
# Small set - Python strategy more efficient (low startup cost)
|
|
175
|
+
search_type = "python"
|
|
176
|
+
elif len(candidates) <= 1000:
|
|
177
|
+
# Medium set - Ripgrep optimal for this range
|
|
178
|
+
search_type = "ripgrep"
|
|
179
|
+
else:
|
|
180
|
+
# Large set - Hybrid for best coverage and redundancy
|
|
181
|
+
search_type = "hybrid"
|
|
182
|
+
|
|
183
|
+
# 3️⃣ Create search configuration
|
|
184
|
+
# Note: include_patterns/exclude_patterns now only used for legacy compatibility
|
|
185
|
+
include_patterns = self._parse_patterns(include_files) if include_files else ["*"]
|
|
186
|
+
exclude_patterns = self._parse_patterns(exclude_files) if exclude_files else []
|
|
187
|
+
config = SearchConfig(
|
|
188
|
+
case_sensitive=case_sensitive,
|
|
189
|
+
use_regex=use_regex,
|
|
190
|
+
max_results=max_results,
|
|
191
|
+
context_lines=context_lines,
|
|
192
|
+
include_patterns=include_patterns,
|
|
193
|
+
exclude_patterns=exclude_patterns
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# 4️⃣ Execute chosen strategy with pre-filtered candidates
|
|
197
|
+
if search_type == "ripgrep":
|
|
198
|
+
results = await self._ripgrep_search_filtered(pattern, candidates, config)
|
|
199
|
+
elif search_type == "python":
|
|
200
|
+
results = await self._python_search_filtered(pattern, candidates, config)
|
|
201
|
+
elif search_type == "hybrid":
|
|
202
|
+
results = await self._hybrid_search_filtered(pattern, candidates, config)
|
|
203
|
+
else:
|
|
204
|
+
raise ToolExecutionError(f"Unknown search type: {search_type}")
|
|
205
|
+
|
|
206
|
+
# 5️⃣ Format and return results with strategy info
|
|
207
|
+
strategy_info = f"Strategy: {search_type} (was {original_search_type}), Files: {len(candidates)}/{MAX_GLOB}"
|
|
208
|
+
formatted_results = self._format_results(results, pattern, config)
|
|
209
|
+
|
|
210
|
+
# Add strategy info to results
|
|
211
|
+
if formatted_results.startswith("Found"):
|
|
212
|
+
lines = formatted_results.split('\n')
|
|
213
|
+
lines[1] = f"Strategy: {search_type} | Candidates: {len(candidates)} files | " + lines[1]
|
|
214
|
+
return '\n'.join(lines)
|
|
215
|
+
else:
|
|
216
|
+
return f"{formatted_results}\n\n{strategy_info}"
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
raise ToolExecutionError(f"Grep search failed: {str(e)}")
|
|
220
|
+
|
|
221
|
+
async def _smart_search(
|
|
222
|
+
self,
|
|
223
|
+
pattern: str,
|
|
224
|
+
directory: str,
|
|
225
|
+
config: SearchConfig
|
|
226
|
+
) -> List[SearchResult]:
|
|
227
|
+
"""Smart search that chooses optimal strategy based on context."""
|
|
228
|
+
|
|
229
|
+
# Try ripgrep first (fastest for large codebases)
|
|
230
|
+
try:
|
|
231
|
+
results = await self._ripgrep_search(pattern, directory, config)
|
|
232
|
+
if results:
|
|
233
|
+
return results
|
|
234
|
+
except:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
# Fallback to Python implementation
|
|
238
|
+
return await self._python_search(pattern, directory, config)
|
|
239
|
+
|
|
240
|
+
async def _ripgrep_search(
|
|
241
|
+
self,
|
|
242
|
+
pattern: str,
|
|
243
|
+
directory: str,
|
|
244
|
+
config: SearchConfig
|
|
245
|
+
) -> List[SearchResult]:
|
|
246
|
+
"""Use ripgrep for high-performance searching."""
|
|
247
|
+
|
|
248
|
+
def run_ripgrep():
|
|
249
|
+
cmd = ["rg", "--json"]
|
|
250
|
+
|
|
251
|
+
# Add options based on config
|
|
252
|
+
if not config.case_sensitive:
|
|
253
|
+
cmd.append("--ignore-case")
|
|
254
|
+
if config.context_lines > 0:
|
|
255
|
+
cmd.extend(["--context", str(config.context_lines)])
|
|
256
|
+
if config.max_results:
|
|
257
|
+
cmd.extend(["--max-count", str(config.max_results)])
|
|
258
|
+
|
|
259
|
+
# Add include/exclude patterns
|
|
260
|
+
for pattern_str in config.include_patterns:
|
|
261
|
+
if pattern_str != "*":
|
|
262
|
+
cmd.extend(["--glob", pattern_str])
|
|
263
|
+
for pattern_str in config.exclude_patterns:
|
|
264
|
+
cmd.extend(["--glob", f"!{pattern_str}"])
|
|
265
|
+
|
|
266
|
+
# Add pattern and directory
|
|
267
|
+
cmd.extend([pattern, directory])
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
result = subprocess.run(
|
|
271
|
+
cmd,
|
|
272
|
+
capture_output=True,
|
|
273
|
+
text=True,
|
|
274
|
+
timeout=config.timeout_seconds
|
|
275
|
+
)
|
|
276
|
+
return result.stdout if result.returncode == 0 else None
|
|
277
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# Run ripgrep in thread pool
|
|
281
|
+
output = await asyncio.get_event_loop().run_in_executor(
|
|
282
|
+
self._executor, run_ripgrep
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if not output:
|
|
286
|
+
return []
|
|
287
|
+
|
|
288
|
+
# Parse ripgrep JSON output
|
|
289
|
+
return self._parse_ripgrep_output(output)
|
|
290
|
+
|
|
291
|
+
async def _python_search(
|
|
292
|
+
self,
|
|
293
|
+
pattern: str,
|
|
294
|
+
directory: str,
|
|
295
|
+
config: SearchConfig
|
|
296
|
+
) -> List[SearchResult]:
|
|
297
|
+
"""Pure Python parallel search implementation."""
|
|
298
|
+
|
|
299
|
+
# Find all files to search
|
|
300
|
+
files = await self._find_files(directory, config)
|
|
301
|
+
|
|
302
|
+
# Prepare search pattern
|
|
303
|
+
if config.use_regex:
|
|
304
|
+
flags = 0 if config.case_sensitive else re.IGNORECASE
|
|
305
|
+
regex_pattern = re.compile(pattern, flags)
|
|
306
|
+
else:
|
|
307
|
+
regex_pattern = None
|
|
308
|
+
|
|
309
|
+
# Create search tasks for parallel execution
|
|
310
|
+
search_tasks = []
|
|
311
|
+
for file_path in files:
|
|
312
|
+
task = self._search_file(
|
|
313
|
+
file_path, pattern, regex_pattern, config
|
|
314
|
+
)
|
|
315
|
+
search_tasks.append(task)
|
|
316
|
+
|
|
317
|
+
# Execute searches in parallel
|
|
318
|
+
all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
319
|
+
|
|
320
|
+
# Flatten results and filter out exceptions
|
|
321
|
+
results = []
|
|
322
|
+
for file_results in all_results:
|
|
323
|
+
if isinstance(file_results, list):
|
|
324
|
+
results.extend(file_results)
|
|
325
|
+
|
|
326
|
+
# Sort by relevance and limit results
|
|
327
|
+
results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
328
|
+
return results[:config.max_results]
|
|
329
|
+
|
|
330
|
+
async def _hybrid_search(
|
|
331
|
+
self,
|
|
332
|
+
pattern: str,
|
|
333
|
+
directory: str,
|
|
334
|
+
config: SearchConfig
|
|
335
|
+
) -> List[SearchResult]:
|
|
336
|
+
"""Hybrid approach using multiple search methods concurrently."""
|
|
337
|
+
|
|
338
|
+
# Run multiple search strategies in parallel
|
|
339
|
+
tasks = [
|
|
340
|
+
self._ripgrep_search(pattern, directory, config),
|
|
341
|
+
self._python_search(pattern, directory, config)
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
345
|
+
|
|
346
|
+
# Merge and deduplicate results
|
|
347
|
+
all_results = []
|
|
348
|
+
for results in results_list:
|
|
349
|
+
if isinstance(results, list):
|
|
350
|
+
all_results.extend(results)
|
|
351
|
+
|
|
352
|
+
# Deduplicate by file path and line number
|
|
353
|
+
seen = set()
|
|
354
|
+
unique_results = []
|
|
355
|
+
for result in all_results:
|
|
356
|
+
key = (result.file_path, result.line_number)
|
|
357
|
+
if key not in seen:
|
|
358
|
+
seen.add(key)
|
|
359
|
+
unique_results.append(result)
|
|
360
|
+
|
|
361
|
+
# Sort and limit
|
|
362
|
+
unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
363
|
+
return unique_results[:config.max_results]
|
|
364
|
+
|
|
365
|
+
# ====== NEW FILTERED SEARCH METHODS ======
|
|
366
|
+
|
|
367
|
+
async def _ripgrep_search_filtered(
|
|
368
|
+
self,
|
|
369
|
+
pattern: str,
|
|
370
|
+
candidates: List[Path],
|
|
371
|
+
config: SearchConfig
|
|
372
|
+
) -> List[SearchResult]:
|
|
373
|
+
"""
|
|
374
|
+
Run ripgrep on pre-filtered file list.
|
|
375
|
+
"""
|
|
376
|
+
def run_ripgrep_filtered():
|
|
377
|
+
cmd = ["rg", "--json"]
|
|
378
|
+
|
|
379
|
+
# Add configuration flags
|
|
380
|
+
if not config.case_sensitive:
|
|
381
|
+
cmd.append("--ignore-case")
|
|
382
|
+
if config.context_lines > 0:
|
|
383
|
+
cmd.extend(["--context", str(config.context_lines)])
|
|
384
|
+
if config.max_results:
|
|
385
|
+
cmd.extend(["--max-count", str(config.max_results)])
|
|
386
|
+
|
|
387
|
+
# Add pattern and explicit file list
|
|
388
|
+
cmd.append(pattern)
|
|
389
|
+
cmd.extend(str(f) for f in candidates)
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
result = subprocess.run(
|
|
393
|
+
cmd,
|
|
394
|
+
capture_output=True,
|
|
395
|
+
text=True,
|
|
396
|
+
timeout=config.timeout_seconds
|
|
397
|
+
)
|
|
398
|
+
return result.stdout if result.returncode == 0 else None
|
|
399
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
# Run ripgrep in thread pool
|
|
403
|
+
output = await asyncio.get_event_loop().run_in_executor(
|
|
404
|
+
self._executor, run_ripgrep_filtered
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return self._parse_ripgrep_output(output) if output else []
|
|
408
|
+
|
|
409
|
+
async def _python_search_filtered(
|
|
410
|
+
self,
|
|
411
|
+
pattern: str,
|
|
412
|
+
candidates: List[Path],
|
|
413
|
+
config: SearchConfig
|
|
414
|
+
) -> List[SearchResult]:
|
|
415
|
+
"""
|
|
416
|
+
Run Python parallel search on pre-filtered candidates.
|
|
417
|
+
"""
|
|
418
|
+
# Prepare search pattern
|
|
419
|
+
if config.use_regex:
|
|
420
|
+
flags = 0 if config.case_sensitive else re.IGNORECASE
|
|
421
|
+
regex_pattern = re.compile(pattern, flags)
|
|
422
|
+
else:
|
|
423
|
+
regex_pattern = None
|
|
424
|
+
|
|
425
|
+
# Create search tasks for candidates only
|
|
426
|
+
search_tasks = []
|
|
427
|
+
for file_path in candidates:
|
|
428
|
+
task = self._search_file(
|
|
429
|
+
file_path, pattern, regex_pattern, config
|
|
430
|
+
)
|
|
431
|
+
search_tasks.append(task)
|
|
432
|
+
|
|
433
|
+
# Execute searches in parallel
|
|
434
|
+
all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
435
|
+
|
|
436
|
+
# Flatten results and filter out exceptions
|
|
437
|
+
results = []
|
|
438
|
+
for file_results in all_results:
|
|
439
|
+
if isinstance(file_results, list):
|
|
440
|
+
results.extend(file_results)
|
|
441
|
+
|
|
442
|
+
# Sort by relevance and limit results
|
|
443
|
+
results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
444
|
+
return results[:config.max_results]
|
|
445
|
+
|
|
446
|
+
async def _hybrid_search_filtered(
|
|
447
|
+
self,
|
|
448
|
+
pattern: str,
|
|
449
|
+
candidates: List[Path],
|
|
450
|
+
config: SearchConfig
|
|
451
|
+
) -> List[SearchResult]:
|
|
452
|
+
"""
|
|
453
|
+
Hybrid approach using multiple search methods concurrently on pre-filtered candidates.
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
# Run multiple search strategies in parallel
|
|
457
|
+
tasks = [
|
|
458
|
+
self._ripgrep_search_filtered(pattern, candidates, config),
|
|
459
|
+
self._python_search_filtered(pattern, candidates, config)
|
|
460
|
+
]
|
|
461
|
+
|
|
462
|
+
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
463
|
+
|
|
464
|
+
# Merge and deduplicate results
|
|
465
|
+
all_results = []
|
|
466
|
+
for results in results_list:
|
|
467
|
+
if isinstance(results, list):
|
|
468
|
+
all_results.extend(results)
|
|
469
|
+
|
|
470
|
+
# Deduplicate by file path and line number
|
|
471
|
+
seen = set()
|
|
472
|
+
unique_results = []
|
|
473
|
+
for result in all_results:
|
|
474
|
+
key = (result.file_path, result.line_number)
|
|
475
|
+
if key not in seen:
|
|
476
|
+
seen.add(key)
|
|
477
|
+
unique_results.append(result)
|
|
478
|
+
|
|
479
|
+
# Sort and limit
|
|
480
|
+
unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
481
|
+
return unique_results[:config.max_results]
|
|
482
|
+
|
|
483
|
+
async def _find_files(
|
|
484
|
+
self,
|
|
485
|
+
directory: str,
|
|
486
|
+
config: SearchConfig
|
|
487
|
+
) -> List[Path]:
|
|
488
|
+
"""Find all files matching include/exclude patterns."""
|
|
489
|
+
|
|
490
|
+
def find_files_sync():
|
|
491
|
+
files = []
|
|
492
|
+
dir_path = Path(directory)
|
|
493
|
+
|
|
494
|
+
for file_path in dir_path.rglob("*"):
|
|
495
|
+
if not file_path.is_file():
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
# Check file size
|
|
499
|
+
try:
|
|
500
|
+
if file_path.stat().st_size > config.max_file_size:
|
|
501
|
+
continue
|
|
502
|
+
except OSError:
|
|
503
|
+
continue
|
|
504
|
+
|
|
505
|
+
# Check include patterns
|
|
506
|
+
if not any(fnmatch.fnmatch(str(file_path), pattern)
|
|
507
|
+
for pattern in config.include_patterns):
|
|
508
|
+
continue
|
|
509
|
+
|
|
510
|
+
# Check exclude patterns
|
|
511
|
+
if any(fnmatch.fnmatch(str(file_path), pattern)
|
|
512
|
+
for pattern in config.exclude_patterns):
|
|
513
|
+
continue
|
|
514
|
+
|
|
515
|
+
files.append(file_path)
|
|
516
|
+
|
|
517
|
+
return files
|
|
518
|
+
|
|
519
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
520
|
+
self._executor, find_files_sync
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
async def _search_file(
|
|
524
|
+
self,
|
|
525
|
+
file_path: Path,
|
|
526
|
+
pattern: str,
|
|
527
|
+
regex_pattern: Optional[re.Pattern],
|
|
528
|
+
config: SearchConfig
|
|
529
|
+
) -> List[SearchResult]:
|
|
530
|
+
"""Search a single file for the pattern."""
|
|
531
|
+
|
|
532
|
+
def search_file_sync():
|
|
533
|
+
try:
|
|
534
|
+
with file_path.open('r', encoding='utf-8', errors='ignore') as f:
|
|
535
|
+
lines = f.readlines()
|
|
536
|
+
|
|
537
|
+
results = []
|
|
538
|
+
for i, line in enumerate(lines):
|
|
539
|
+
line = line.rstrip('\n\r')
|
|
540
|
+
|
|
541
|
+
# Search for pattern
|
|
542
|
+
if regex_pattern:
|
|
543
|
+
matches = list(regex_pattern.finditer(line))
|
|
544
|
+
else:
|
|
545
|
+
# Simple string search
|
|
546
|
+
search_line = line if config.case_sensitive else line.lower()
|
|
547
|
+
search_pattern = pattern if config.case_sensitive else pattern.lower()
|
|
548
|
+
|
|
549
|
+
matches = []
|
|
550
|
+
start = 0
|
|
551
|
+
while True:
|
|
552
|
+
pos = search_line.find(search_pattern, start)
|
|
553
|
+
if pos == -1:
|
|
554
|
+
break
|
|
555
|
+
# Create a simple match object
|
|
556
|
+
class SimpleMatch:
|
|
557
|
+
def __init__(self, start_pos, end_pos):
|
|
558
|
+
self._start = start_pos
|
|
559
|
+
self._end = end_pos
|
|
560
|
+
def start(self):
|
|
561
|
+
return self._start
|
|
562
|
+
def end(self):
|
|
563
|
+
return self._end
|
|
564
|
+
match = SimpleMatch(pos, pos + len(search_pattern))
|
|
565
|
+
matches.append(match)
|
|
566
|
+
start = pos + 1
|
|
567
|
+
|
|
568
|
+
# Create results for each match
|
|
569
|
+
for match in matches:
|
|
570
|
+
# Get context lines
|
|
571
|
+
context_start = max(0, i - config.context_lines)
|
|
572
|
+
context_end = min(len(lines), i + config.context_lines + 1)
|
|
573
|
+
|
|
574
|
+
context_before = [
|
|
575
|
+
lines[j].rstrip('\n\r')
|
|
576
|
+
for j in range(context_start, i)
|
|
577
|
+
]
|
|
578
|
+
context_after = [
|
|
579
|
+
lines[j].rstrip('\n\r')
|
|
580
|
+
for j in range(i + 1, context_end)
|
|
581
|
+
]
|
|
582
|
+
|
|
583
|
+
# Calculate relevance score
|
|
584
|
+
relevance = self._calculate_relevance(
|
|
585
|
+
str(file_path), line, pattern, match
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
result = SearchResult(
|
|
589
|
+
file_path=str(file_path),
|
|
590
|
+
line_number=i + 1,
|
|
591
|
+
line_content=line,
|
|
592
|
+
match_start=match.start() if hasattr(match, 'start') else match.start(),
|
|
593
|
+
match_end=match.end() if hasattr(match, 'end') else match.end(),
|
|
594
|
+
context_before=context_before,
|
|
595
|
+
context_after=context_after,
|
|
596
|
+
relevance_score=relevance
|
|
597
|
+
)
|
|
598
|
+
results.append(result)
|
|
599
|
+
|
|
600
|
+
return results
|
|
601
|
+
|
|
602
|
+
except Exception:
|
|
603
|
+
return []
|
|
604
|
+
|
|
605
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
606
|
+
self._executor, search_file_sync
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
def _calculate_relevance(
|
|
610
|
+
self,
|
|
611
|
+
file_path: str,
|
|
612
|
+
line: str,
|
|
613
|
+
pattern: str,
|
|
614
|
+
match
|
|
615
|
+
) -> float:
|
|
616
|
+
"""Calculate relevance score for a search result."""
|
|
617
|
+
score = 0.0
|
|
618
|
+
|
|
619
|
+
# Base score
|
|
620
|
+
score += 1.0
|
|
621
|
+
|
|
622
|
+
# Boost for exact matches
|
|
623
|
+
if pattern.lower() in line.lower():
|
|
624
|
+
score += 0.5
|
|
625
|
+
|
|
626
|
+
# Boost for matches at word boundaries
|
|
627
|
+
if match.start() == 0 or not line[match.start() - 1].isalnum():
|
|
628
|
+
score += 0.3
|
|
629
|
+
|
|
630
|
+
# Boost for certain file types
|
|
631
|
+
if file_path.endswith(('.py', '.js', '.ts', '.java', '.cpp', '.c')):
|
|
632
|
+
score += 0.2
|
|
633
|
+
|
|
634
|
+
# Boost for matches in comments or docstrings
|
|
635
|
+
stripped_line = line.strip()
|
|
636
|
+
if stripped_line.startswith(('#', '//', '/*', '"""', "'''")):
|
|
637
|
+
score += 0.1
|
|
638
|
+
|
|
639
|
+
return score
|
|
640
|
+
|
|
641
|
+
def _parse_ripgrep_output(self, output: str) -> List[SearchResult]:
|
|
642
|
+
"""Parse ripgrep JSON output into SearchResult objects."""
|
|
643
|
+
import json
|
|
644
|
+
|
|
645
|
+
results = []
|
|
646
|
+
for line in output.strip().split('\n'):
|
|
647
|
+
if not line:
|
|
648
|
+
continue
|
|
649
|
+
|
|
650
|
+
try:
|
|
651
|
+
data = json.loads(line)
|
|
652
|
+
if data.get('type') != 'match':
|
|
653
|
+
continue
|
|
654
|
+
|
|
655
|
+
match_data = data['data']
|
|
656
|
+
result = SearchResult(
|
|
657
|
+
file_path=match_data['path']['text'],
|
|
658
|
+
line_number=match_data['line_number'],
|
|
659
|
+
line_content=match_data['lines']['text'].rstrip('\n\r'),
|
|
660
|
+
match_start=match_data['submatches'][0]['start'],
|
|
661
|
+
match_end=match_data['submatches'][0]['end'],
|
|
662
|
+
context_before=[], # Ripgrep context handling would go here
|
|
663
|
+
context_after=[],
|
|
664
|
+
relevance_score=1.0
|
|
665
|
+
)
|
|
666
|
+
results.append(result)
|
|
667
|
+
except (json.JSONDecodeError, KeyError):
|
|
668
|
+
continue
|
|
669
|
+
|
|
670
|
+
return results
|
|
671
|
+
|
|
672
|
+
def _parse_patterns(self, patterns: str) -> List[str]:
|
|
673
|
+
"""Parse comma-separated file patterns."""
|
|
674
|
+
return [p.strip() for p in patterns.split(',') if p.strip()]
|
|
675
|
+
|
|
676
|
+
def _format_results(
|
|
677
|
+
self,
|
|
678
|
+
results: List[SearchResult],
|
|
679
|
+
pattern: str,
|
|
680
|
+
config: SearchConfig
|
|
681
|
+
) -> str:
|
|
682
|
+
"""Format search results for display."""
|
|
683
|
+
if not results:
|
|
684
|
+
return f"No matches found for pattern: {pattern}"
|
|
685
|
+
|
|
686
|
+
output = []
|
|
687
|
+
output.append(f"Found {len(results)} matches for pattern: {pattern}")
|
|
688
|
+
output.append("=" * 60)
|
|
689
|
+
|
|
690
|
+
for result in results:
|
|
691
|
+
# File header
|
|
692
|
+
output.append(f"\n📁 {result.file_path}:{result.line_number}")
|
|
693
|
+
|
|
694
|
+
# Context before
|
|
695
|
+
for i, context_line in enumerate(result.context_before):
|
|
696
|
+
line_num = result.line_number - len(result.context_before) + i
|
|
697
|
+
output.append(f" {line_num:4d}│ {context_line}")
|
|
698
|
+
|
|
699
|
+
# Main match line with highlighting
|
|
700
|
+
line_content = result.line_content
|
|
701
|
+
before_match = line_content[:result.match_start]
|
|
702
|
+
match_text = line_content[result.match_start:result.match_end]
|
|
703
|
+
after_match = line_content[result.match_end:]
|
|
704
|
+
|
|
705
|
+
output.append(f"▶ {result.line_number:4d}│ {before_match}⟨{match_text}⟩{after_match}")
|
|
706
|
+
|
|
707
|
+
# Context after
|
|
708
|
+
for i, context_line in enumerate(result.context_after):
|
|
709
|
+
line_num = result.line_number + i + 1
|
|
710
|
+
output.append(f" {line_num:4d}│ {context_line}")
|
|
711
|
+
|
|
712
|
+
return "\n".join(output)
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
# Create tool instance for pydantic-ai
|
|
716
|
+
async def grep(
|
|
717
|
+
pattern: str,
|
|
718
|
+
directory: str = ".",
|
|
719
|
+
case_sensitive: bool = False,
|
|
720
|
+
use_regex: bool = False,
|
|
721
|
+
include_files: Optional[str] = None,
|
|
722
|
+
exclude_files: Optional[str] = None,
|
|
723
|
+
max_results: int = 50,
|
|
724
|
+
context_lines: int = 2,
|
|
725
|
+
search_type: str = "smart"
|
|
726
|
+
) -> str:
|
|
727
|
+
"""
|
|
728
|
+
Advanced parallel grep search with multiple strategies.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
pattern: Search pattern (literal text or regex)
|
|
732
|
+
directory: Directory to search (default: current directory)
|
|
733
|
+
case_sensitive: Whether search is case sensitive (default: False)
|
|
734
|
+
use_regex: Whether pattern is a regular expression (default: False)
|
|
735
|
+
include_files: File patterns to include, comma-separated (e.g., "*.py,*.js")
|
|
736
|
+
exclude_files: File patterns to exclude, comma-separated (e.g., "*.pyc,node_modules/*")
|
|
737
|
+
max_results: Maximum number of results to return (default: 50)
|
|
738
|
+
context_lines: Number of context lines before/after matches (default: 2)
|
|
739
|
+
search_type: Search strategy - "smart", "ripgrep", "python", or "hybrid" (default: "smart")
|
|
740
|
+
|
|
741
|
+
Returns:
|
|
742
|
+
Formatted search results with file paths, line numbers, and context
|
|
743
|
+
|
|
744
|
+
Examples:
|
|
745
|
+
grep("TODO", ".", max_results=20)
|
|
746
|
+
grep("function.*export", "src/", use_regex=True, include_files="*.js,*.ts")
|
|
747
|
+
grep("import.*pandas", ".", include_files="*.py", search_type="hybrid")
|
|
748
|
+
"""
|
|
749
|
+
tool = ParallelGrep()
|
|
750
|
+
return await tool._execute(
|
|
751
|
+
pattern=pattern,
|
|
752
|
+
directory=directory,
|
|
753
|
+
case_sensitive=case_sensitive,
|
|
754
|
+
use_regex=use_regex,
|
|
755
|
+
include_files=include_files,
|
|
756
|
+
exclude_files=exclude_files,
|
|
757
|
+
max_results=max_results,
|
|
758
|
+
context_lines=context_lines,
|
|
759
|
+
search_type=search_type
|
|
760
|
+
)
|