tunacode-cli 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tunacode-cli might be problematic. Click here for more details.

tunacode/tools/grep.py ADDED
@@ -0,0 +1,760 @@
1
+ """
2
+ Parallel grep tool for TunaCode - Enhanced content search with parallel processing.
3
+
4
+ This tool provides sophisticated grep-like functionality with:
5
+ - Parallel file searching across multiple directories
6
+ - Multiple search strategies (literal, regex, fuzzy)
7
+ - Smart result ranking and deduplication
8
+ - Context-aware output formatting
9
+ """
10
+
11
+ import asyncio
12
+ import re
13
+ import subprocess
14
+ import fnmatch
15
+ import os
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Tuple, Union
18
+ from dataclasses import dataclass
19
+ from concurrent.futures import ThreadPoolExecutor
20
+
21
+ from tunacode.tools.base import BaseTool
22
+ from tunacode.exceptions import ToolExecutionError
23
+
24
+
25
+ @dataclass
26
+ class SearchResult:
27
+ """Represents a single search match with context."""
28
+ file_path: str
29
+ line_number: int
30
+ line_content: str
31
+ match_start: int
32
+ match_end: int
33
+ context_before: List[str]
34
+ context_after: List[str]
35
+ relevance_score: float = 0.0
36
+
37
+
38
+ @dataclass
39
+ class SearchConfig:
40
+ """Configuration for search operations."""
41
+ case_sensitive: bool = False
42
+ use_regex: bool = False
43
+ max_results: int = 50
44
+ context_lines: int = 2
45
+ include_patterns: List[str] = None
46
+ exclude_patterns: List[str] = None
47
+ max_file_size: int = 1024 * 1024 # 1MB
48
+ timeout_seconds: int = 30
49
+
50
+
51
+ # Fast-Glob Prefilter Configuration
52
+ MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
53
+ GLOB_BATCH = 500 # Streaming batch size
54
+ EXCLUDE_DIRS = { # Common directories to skip
55
+ 'node_modules', '.git', '__pycache__',
56
+ '.venv', 'venv', 'dist', 'build', '.pytest_cache',
57
+ '.mypy_cache', '.tox', 'target', 'node_modules'
58
+ }
59
+
60
+
61
+ def fast_glob(root: Path, include: str, exclude: str = None) -> List[Path]:
62
+ """
63
+ Lightning-fast filename filtering using os.scandir.
64
+
65
+ Args:
66
+ root: Directory to search
67
+ include: Include pattern (e.g., "*.py", "*.{js,ts}")
68
+ exclude: Exclude pattern (optional)
69
+
70
+ Returns:
71
+ List of matching file paths (bounded by MAX_GLOB)
72
+ """
73
+ matches, stack = [], [root]
74
+
75
+ # Handle multiple extensions in include pattern like "*.{py,js,ts}"
76
+ if '{' in include and '}' in include:
77
+ # Convert *.{py,js,ts} to multiple patterns
78
+ base, ext_part = include.split('{', 1)
79
+ ext_part = ext_part.split('}', 1)[0]
80
+ extensions = ext_part.split(',')
81
+ include_patterns = [base + ext.strip() for ext in extensions]
82
+ include_regexes = [re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns]
83
+ else:
84
+ include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
85
+
86
+ exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
87
+
88
+ while stack and len(matches) < MAX_GLOB:
89
+ current_dir = stack.pop()
90
+
91
+ try:
92
+ with os.scandir(current_dir) as entries:
93
+ for entry in entries:
94
+ # Skip common irrelevant directories
95
+ if entry.is_dir(follow_symlinks=False):
96
+ if entry.name not in EXCLUDE_DIRS:
97
+ stack.append(Path(entry.path))
98
+
99
+ # Check file matches
100
+ elif entry.is_file(follow_symlinks=False):
101
+ # Check against any include pattern
102
+ matches_include = any(regex.match(entry.name) for regex in include_regexes)
103
+
104
+ if matches_include:
105
+ if not exclude_rx or not exclude_rx.match(entry.name):
106
+ matches.append(Path(entry.path))
107
+
108
+ except (PermissionError, OSError):
109
+ continue # Skip inaccessible directories
110
+
111
+ return matches[:MAX_GLOB]
112
+
113
+
114
+ class ParallelGrep(BaseTool):
115
+ """Advanced parallel grep tool with multiple search strategies."""
116
+
117
+ def __init__(self, ui_logger=None):
118
+ super().__init__(ui_logger)
119
+ self._executor = ThreadPoolExecutor(max_workers=8)
120
+
121
+ @property
122
+ def tool_name(self) -> str:
123
+ return "grep"
124
+
125
+ async def _execute(
126
+ self,
127
+ pattern: str,
128
+ directory: str = ".",
129
+ case_sensitive: bool = False,
130
+ use_regex: bool = False,
131
+ include_files: Optional[str] = None,
132
+ exclude_files: Optional[str] = None,
133
+ max_results: int = 50,
134
+ context_lines: int = 2,
135
+ search_type: str = "smart" # smart, ripgrep, python, hybrid
136
+ ) -> str:
137
+ """
138
+ Execute parallel grep search with fast-glob prefiltering and multiple strategies.
139
+
140
+ Args:
141
+ pattern: Search pattern (literal text or regex)
142
+ directory: Directory to search (default: current)
143
+ case_sensitive: Whether search is case sensitive
144
+ use_regex: Whether pattern is a regular expression
145
+ include_files: File patterns to include (e.g., "*.py", "*.{js,ts}")
146
+ exclude_files: File patterns to exclude (e.g., "*.pyc", "node_modules/*")
147
+ max_results: Maximum number of results to return
148
+ context_lines: Number of context lines before/after matches
149
+ search_type: Search strategy to use
150
+
151
+ Returns:
152
+ Formatted search results
153
+ """
154
+ try:
155
+ # 1️⃣ Fast-glob prefilter to find candidate files
156
+ include_pattern = include_files or "*"
157
+ exclude_pattern = exclude_files
158
+
159
+ candidates = await asyncio.get_event_loop().run_in_executor(
160
+ self._executor,
161
+ fast_glob,
162
+ Path(directory),
163
+ include_pattern,
164
+ exclude_pattern
165
+ )
166
+
167
+ if not candidates:
168
+ return f"No files found matching pattern: {include_pattern}"
169
+
170
+ # 2️⃣ Smart strategy selection based on candidate count
171
+ original_search_type = search_type
172
+ if search_type == "smart":
173
+ if len(candidates) <= 50:
174
+ # Small set - Python strategy more efficient (low startup cost)
175
+ search_type = "python"
176
+ elif len(candidates) <= 1000:
177
+ # Medium set - Ripgrep optimal for this range
178
+ search_type = "ripgrep"
179
+ else:
180
+ # Large set - Hybrid for best coverage and redundancy
181
+ search_type = "hybrid"
182
+
183
+ # 3️⃣ Create search configuration
184
+ # Note: include_patterns/exclude_patterns now only used for legacy compatibility
185
+ include_patterns = self._parse_patterns(include_files) if include_files else ["*"]
186
+ exclude_patterns = self._parse_patterns(exclude_files) if exclude_files else []
187
+ config = SearchConfig(
188
+ case_sensitive=case_sensitive,
189
+ use_regex=use_regex,
190
+ max_results=max_results,
191
+ context_lines=context_lines,
192
+ include_patterns=include_patterns,
193
+ exclude_patterns=exclude_patterns
194
+ )
195
+
196
+ # 4️⃣ Execute chosen strategy with pre-filtered candidates
197
+ if search_type == "ripgrep":
198
+ results = await self._ripgrep_search_filtered(pattern, candidates, config)
199
+ elif search_type == "python":
200
+ results = await self._python_search_filtered(pattern, candidates, config)
201
+ elif search_type == "hybrid":
202
+ results = await self._hybrid_search_filtered(pattern, candidates, config)
203
+ else:
204
+ raise ToolExecutionError(f"Unknown search type: {search_type}")
205
+
206
+ # 5️⃣ Format and return results with strategy info
207
+ strategy_info = f"Strategy: {search_type} (was {original_search_type}), Files: {len(candidates)}/{MAX_GLOB}"
208
+ formatted_results = self._format_results(results, pattern, config)
209
+
210
+ # Add strategy info to results
211
+ if formatted_results.startswith("Found"):
212
+ lines = formatted_results.split('\n')
213
+ lines[1] = f"Strategy: {search_type} | Candidates: {len(candidates)} files | " + lines[1]
214
+ return '\n'.join(lines)
215
+ else:
216
+ return f"{formatted_results}\n\n{strategy_info}"
217
+
218
+ except Exception as e:
219
+ raise ToolExecutionError(f"Grep search failed: {str(e)}")
220
+
221
+ async def _smart_search(
222
+ self,
223
+ pattern: str,
224
+ directory: str,
225
+ config: SearchConfig
226
+ ) -> List[SearchResult]:
227
+ """Smart search that chooses optimal strategy based on context."""
228
+
229
+ # Try ripgrep first (fastest for large codebases)
230
+ try:
231
+ results = await self._ripgrep_search(pattern, directory, config)
232
+ if results:
233
+ return results
234
+ except:
235
+ pass
236
+
237
+ # Fallback to Python implementation
238
+ return await self._python_search(pattern, directory, config)
239
+
240
+ async def _ripgrep_search(
241
+ self,
242
+ pattern: str,
243
+ directory: str,
244
+ config: SearchConfig
245
+ ) -> List[SearchResult]:
246
+ """Use ripgrep for high-performance searching."""
247
+
248
+ def run_ripgrep():
249
+ cmd = ["rg", "--json"]
250
+
251
+ # Add options based on config
252
+ if not config.case_sensitive:
253
+ cmd.append("--ignore-case")
254
+ if config.context_lines > 0:
255
+ cmd.extend(["--context", str(config.context_lines)])
256
+ if config.max_results:
257
+ cmd.extend(["--max-count", str(config.max_results)])
258
+
259
+ # Add include/exclude patterns
260
+ for pattern_str in config.include_patterns:
261
+ if pattern_str != "*":
262
+ cmd.extend(["--glob", pattern_str])
263
+ for pattern_str in config.exclude_patterns:
264
+ cmd.extend(["--glob", f"!{pattern_str}"])
265
+
266
+ # Add pattern and directory
267
+ cmd.extend([pattern, directory])
268
+
269
+ try:
270
+ result = subprocess.run(
271
+ cmd,
272
+ capture_output=True,
273
+ text=True,
274
+ timeout=config.timeout_seconds
275
+ )
276
+ return result.stdout if result.returncode == 0 else None
277
+ except (subprocess.TimeoutExpired, FileNotFoundError):
278
+ return None
279
+
280
+ # Run ripgrep in thread pool
281
+ output = await asyncio.get_event_loop().run_in_executor(
282
+ self._executor, run_ripgrep
283
+ )
284
+
285
+ if not output:
286
+ return []
287
+
288
+ # Parse ripgrep JSON output
289
+ return self._parse_ripgrep_output(output)
290
+
291
+ async def _python_search(
292
+ self,
293
+ pattern: str,
294
+ directory: str,
295
+ config: SearchConfig
296
+ ) -> List[SearchResult]:
297
+ """Pure Python parallel search implementation."""
298
+
299
+ # Find all files to search
300
+ files = await self._find_files(directory, config)
301
+
302
+ # Prepare search pattern
303
+ if config.use_regex:
304
+ flags = 0 if config.case_sensitive else re.IGNORECASE
305
+ regex_pattern = re.compile(pattern, flags)
306
+ else:
307
+ regex_pattern = None
308
+
309
+ # Create search tasks for parallel execution
310
+ search_tasks = []
311
+ for file_path in files:
312
+ task = self._search_file(
313
+ file_path, pattern, regex_pattern, config
314
+ )
315
+ search_tasks.append(task)
316
+
317
+ # Execute searches in parallel
318
+ all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
319
+
320
+ # Flatten results and filter out exceptions
321
+ results = []
322
+ for file_results in all_results:
323
+ if isinstance(file_results, list):
324
+ results.extend(file_results)
325
+
326
+ # Sort by relevance and limit results
327
+ results.sort(key=lambda r: r.relevance_score, reverse=True)
328
+ return results[:config.max_results]
329
+
330
+ async def _hybrid_search(
331
+ self,
332
+ pattern: str,
333
+ directory: str,
334
+ config: SearchConfig
335
+ ) -> List[SearchResult]:
336
+ """Hybrid approach using multiple search methods concurrently."""
337
+
338
+ # Run multiple search strategies in parallel
339
+ tasks = [
340
+ self._ripgrep_search(pattern, directory, config),
341
+ self._python_search(pattern, directory, config)
342
+ ]
343
+
344
+ results_list = await asyncio.gather(*tasks, return_exceptions=True)
345
+
346
+ # Merge and deduplicate results
347
+ all_results = []
348
+ for results in results_list:
349
+ if isinstance(results, list):
350
+ all_results.extend(results)
351
+
352
+ # Deduplicate by file path and line number
353
+ seen = set()
354
+ unique_results = []
355
+ for result in all_results:
356
+ key = (result.file_path, result.line_number)
357
+ if key not in seen:
358
+ seen.add(key)
359
+ unique_results.append(result)
360
+
361
+ # Sort and limit
362
+ unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
363
+ return unique_results[:config.max_results]
364
+
365
+ # ====== NEW FILTERED SEARCH METHODS ======
366
+
367
+ async def _ripgrep_search_filtered(
368
+ self,
369
+ pattern: str,
370
+ candidates: List[Path],
371
+ config: SearchConfig
372
+ ) -> List[SearchResult]:
373
+ """
374
+ Run ripgrep on pre-filtered file list.
375
+ """
376
+ def run_ripgrep_filtered():
377
+ cmd = ["rg", "--json"]
378
+
379
+ # Add configuration flags
380
+ if not config.case_sensitive:
381
+ cmd.append("--ignore-case")
382
+ if config.context_lines > 0:
383
+ cmd.extend(["--context", str(config.context_lines)])
384
+ if config.max_results:
385
+ cmd.extend(["--max-count", str(config.max_results)])
386
+
387
+ # Add pattern and explicit file list
388
+ cmd.append(pattern)
389
+ cmd.extend(str(f) for f in candidates)
390
+
391
+ try:
392
+ result = subprocess.run(
393
+ cmd,
394
+ capture_output=True,
395
+ text=True,
396
+ timeout=config.timeout_seconds
397
+ )
398
+ return result.stdout if result.returncode == 0 else None
399
+ except (subprocess.TimeoutExpired, FileNotFoundError):
400
+ return None
401
+
402
+ # Run ripgrep in thread pool
403
+ output = await asyncio.get_event_loop().run_in_executor(
404
+ self._executor, run_ripgrep_filtered
405
+ )
406
+
407
+ return self._parse_ripgrep_output(output) if output else []
408
+
409
+ async def _python_search_filtered(
410
+ self,
411
+ pattern: str,
412
+ candidates: List[Path],
413
+ config: SearchConfig
414
+ ) -> List[SearchResult]:
415
+ """
416
+ Run Python parallel search on pre-filtered candidates.
417
+ """
418
+ # Prepare search pattern
419
+ if config.use_regex:
420
+ flags = 0 if config.case_sensitive else re.IGNORECASE
421
+ regex_pattern = re.compile(pattern, flags)
422
+ else:
423
+ regex_pattern = None
424
+
425
+ # Create search tasks for candidates only
426
+ search_tasks = []
427
+ for file_path in candidates:
428
+ task = self._search_file(
429
+ file_path, pattern, regex_pattern, config
430
+ )
431
+ search_tasks.append(task)
432
+
433
+ # Execute searches in parallel
434
+ all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
435
+
436
+ # Flatten results and filter out exceptions
437
+ results = []
438
+ for file_results in all_results:
439
+ if isinstance(file_results, list):
440
+ results.extend(file_results)
441
+
442
+ # Sort by relevance and limit results
443
+ results.sort(key=lambda r: r.relevance_score, reverse=True)
444
+ return results[:config.max_results]
445
+
446
+ async def _hybrid_search_filtered(
447
+ self,
448
+ pattern: str,
449
+ candidates: List[Path],
450
+ config: SearchConfig
451
+ ) -> List[SearchResult]:
452
+ """
453
+ Hybrid approach using multiple search methods concurrently on pre-filtered candidates.
454
+ """
455
+
456
+ # Run multiple search strategies in parallel
457
+ tasks = [
458
+ self._ripgrep_search_filtered(pattern, candidates, config),
459
+ self._python_search_filtered(pattern, candidates, config)
460
+ ]
461
+
462
+ results_list = await asyncio.gather(*tasks, return_exceptions=True)
463
+
464
+ # Merge and deduplicate results
465
+ all_results = []
466
+ for results in results_list:
467
+ if isinstance(results, list):
468
+ all_results.extend(results)
469
+
470
+ # Deduplicate by file path and line number
471
+ seen = set()
472
+ unique_results = []
473
+ for result in all_results:
474
+ key = (result.file_path, result.line_number)
475
+ if key not in seen:
476
+ seen.add(key)
477
+ unique_results.append(result)
478
+
479
+ # Sort and limit
480
+ unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
481
+ return unique_results[:config.max_results]
482
+
483
+ async def _find_files(
484
+ self,
485
+ directory: str,
486
+ config: SearchConfig
487
+ ) -> List[Path]:
488
+ """Find all files matching include/exclude patterns."""
489
+
490
+ def find_files_sync():
491
+ files = []
492
+ dir_path = Path(directory)
493
+
494
+ for file_path in dir_path.rglob("*"):
495
+ if not file_path.is_file():
496
+ continue
497
+
498
+ # Check file size
499
+ try:
500
+ if file_path.stat().st_size > config.max_file_size:
501
+ continue
502
+ except OSError:
503
+ continue
504
+
505
+ # Check include patterns
506
+ if not any(fnmatch.fnmatch(str(file_path), pattern)
507
+ for pattern in config.include_patterns):
508
+ continue
509
+
510
+ # Check exclude patterns
511
+ if any(fnmatch.fnmatch(str(file_path), pattern)
512
+ for pattern in config.exclude_patterns):
513
+ continue
514
+
515
+ files.append(file_path)
516
+
517
+ return files
518
+
519
+ return await asyncio.get_event_loop().run_in_executor(
520
+ self._executor, find_files_sync
521
+ )
522
+
523
+ async def _search_file(
524
+ self,
525
+ file_path: Path,
526
+ pattern: str,
527
+ regex_pattern: Optional[re.Pattern],
528
+ config: SearchConfig
529
+ ) -> List[SearchResult]:
530
+ """Search a single file for the pattern."""
531
+
532
+ def search_file_sync():
533
+ try:
534
+ with file_path.open('r', encoding='utf-8', errors='ignore') as f:
535
+ lines = f.readlines()
536
+
537
+ results = []
538
+ for i, line in enumerate(lines):
539
+ line = line.rstrip('\n\r')
540
+
541
+ # Search for pattern
542
+ if regex_pattern:
543
+ matches = list(regex_pattern.finditer(line))
544
+ else:
545
+ # Simple string search
546
+ search_line = line if config.case_sensitive else line.lower()
547
+ search_pattern = pattern if config.case_sensitive else pattern.lower()
548
+
549
+ matches = []
550
+ start = 0
551
+ while True:
552
+ pos = search_line.find(search_pattern, start)
553
+ if pos == -1:
554
+ break
555
+ # Create a simple match object
556
+ class SimpleMatch:
557
+ def __init__(self, start_pos, end_pos):
558
+ self._start = start_pos
559
+ self._end = end_pos
560
+ def start(self):
561
+ return self._start
562
+ def end(self):
563
+ return self._end
564
+ match = SimpleMatch(pos, pos + len(search_pattern))
565
+ matches.append(match)
566
+ start = pos + 1
567
+
568
+ # Create results for each match
569
+ for match in matches:
570
+ # Get context lines
571
+ context_start = max(0, i - config.context_lines)
572
+ context_end = min(len(lines), i + config.context_lines + 1)
573
+
574
+ context_before = [
575
+ lines[j].rstrip('\n\r')
576
+ for j in range(context_start, i)
577
+ ]
578
+ context_after = [
579
+ lines[j].rstrip('\n\r')
580
+ for j in range(i + 1, context_end)
581
+ ]
582
+
583
+ # Calculate relevance score
584
+ relevance = self._calculate_relevance(
585
+ str(file_path), line, pattern, match
586
+ )
587
+
588
+ result = SearchResult(
589
+ file_path=str(file_path),
590
+ line_number=i + 1,
591
+ line_content=line,
592
+ match_start=match.start() if hasattr(match, 'start') else match.start(),
593
+ match_end=match.end() if hasattr(match, 'end') else match.end(),
594
+ context_before=context_before,
595
+ context_after=context_after,
596
+ relevance_score=relevance
597
+ )
598
+ results.append(result)
599
+
600
+ return results
601
+
602
+ except Exception:
603
+ return []
604
+
605
+ return await asyncio.get_event_loop().run_in_executor(
606
+ self._executor, search_file_sync
607
+ )
608
+
609
+ def _calculate_relevance(
610
+ self,
611
+ file_path: str,
612
+ line: str,
613
+ pattern: str,
614
+ match
615
+ ) -> float:
616
+ """Calculate relevance score for a search result."""
617
+ score = 0.0
618
+
619
+ # Base score
620
+ score += 1.0
621
+
622
+ # Boost for exact matches
623
+ if pattern.lower() in line.lower():
624
+ score += 0.5
625
+
626
+ # Boost for matches at word boundaries
627
+ if match.start() == 0 or not line[match.start() - 1].isalnum():
628
+ score += 0.3
629
+
630
+ # Boost for certain file types
631
+ if file_path.endswith(('.py', '.js', '.ts', '.java', '.cpp', '.c')):
632
+ score += 0.2
633
+
634
+ # Boost for matches in comments or docstrings
635
+ stripped_line = line.strip()
636
+ if stripped_line.startswith(('#', '//', '/*', '"""', "'''")):
637
+ score += 0.1
638
+
639
+ return score
640
+
641
+ def _parse_ripgrep_output(self, output: str) -> List[SearchResult]:
642
+ """Parse ripgrep JSON output into SearchResult objects."""
643
+ import json
644
+
645
+ results = []
646
+ for line in output.strip().split('\n'):
647
+ if not line:
648
+ continue
649
+
650
+ try:
651
+ data = json.loads(line)
652
+ if data.get('type') != 'match':
653
+ continue
654
+
655
+ match_data = data['data']
656
+ result = SearchResult(
657
+ file_path=match_data['path']['text'],
658
+ line_number=match_data['line_number'],
659
+ line_content=match_data['lines']['text'].rstrip('\n\r'),
660
+ match_start=match_data['submatches'][0]['start'],
661
+ match_end=match_data['submatches'][0]['end'],
662
+ context_before=[], # Ripgrep context handling would go here
663
+ context_after=[],
664
+ relevance_score=1.0
665
+ )
666
+ results.append(result)
667
+ except (json.JSONDecodeError, KeyError):
668
+ continue
669
+
670
+ return results
671
+
672
+ def _parse_patterns(self, patterns: str) -> List[str]:
673
+ """Parse comma-separated file patterns."""
674
+ return [p.strip() for p in patterns.split(',') if p.strip()]
675
+
676
+ def _format_results(
677
+ self,
678
+ results: List[SearchResult],
679
+ pattern: str,
680
+ config: SearchConfig
681
+ ) -> str:
682
+ """Format search results for display."""
683
+ if not results:
684
+ return f"No matches found for pattern: {pattern}"
685
+
686
+ output = []
687
+ output.append(f"Found {len(results)} matches for pattern: {pattern}")
688
+ output.append("=" * 60)
689
+
690
+ for result in results:
691
+ # File header
692
+ output.append(f"\n📁 {result.file_path}:{result.line_number}")
693
+
694
+ # Context before
695
+ for i, context_line in enumerate(result.context_before):
696
+ line_num = result.line_number - len(result.context_before) + i
697
+ output.append(f" {line_num:4d}│ {context_line}")
698
+
699
+ # Main match line with highlighting
700
+ line_content = result.line_content
701
+ before_match = line_content[:result.match_start]
702
+ match_text = line_content[result.match_start:result.match_end]
703
+ after_match = line_content[result.match_end:]
704
+
705
+ output.append(f"▶ {result.line_number:4d}│ {before_match}⟨{match_text}⟩{after_match}")
706
+
707
+ # Context after
708
+ for i, context_line in enumerate(result.context_after):
709
+ line_num = result.line_number + i + 1
710
+ output.append(f" {line_num:4d}│ {context_line}")
711
+
712
+ return "\n".join(output)
713
+
714
+
715
+ # Create tool instance for pydantic-ai
716
+ async def grep(
717
+ pattern: str,
718
+ directory: str = ".",
719
+ case_sensitive: bool = False,
720
+ use_regex: bool = False,
721
+ include_files: Optional[str] = None,
722
+ exclude_files: Optional[str] = None,
723
+ max_results: int = 50,
724
+ context_lines: int = 2,
725
+ search_type: str = "smart"
726
+ ) -> str:
727
+ """
728
+ Advanced parallel grep search with multiple strategies.
729
+
730
+ Args:
731
+ pattern: Search pattern (literal text or regex)
732
+ directory: Directory to search (default: current directory)
733
+ case_sensitive: Whether search is case sensitive (default: False)
734
+ use_regex: Whether pattern is a regular expression (default: False)
735
+ include_files: File patterns to include, comma-separated (e.g., "*.py,*.js")
736
+ exclude_files: File patterns to exclude, comma-separated (e.g., "*.pyc,node_modules/*")
737
+ max_results: Maximum number of results to return (default: 50)
738
+ context_lines: Number of context lines before/after matches (default: 2)
739
+ search_type: Search strategy - "smart", "ripgrep", "python", or "hybrid" (default: "smart")
740
+
741
+ Returns:
742
+ Formatted search results with file paths, line numbers, and context
743
+
744
+ Examples:
745
+ grep("TODO", ".", max_results=20)
746
+ grep("function.*export", "src/", use_regex=True, include_files="*.js,*.ts")
747
+ grep("import.*pandas", ".", include_files="*.py", search_type="hybrid")
748
+ """
749
+ tool = ParallelGrep()
750
+ return await tool._execute(
751
+ pattern=pattern,
752
+ directory=directory,
753
+ case_sensitive=case_sensitive,
754
+ use_regex=use_regex,
755
+ include_files=include_files,
756
+ exclude_files=exclude_files,
757
+ max_results=max_results,
758
+ context_lines=context_lines,
759
+ search_type=search_type
760
+ )