tunacode-cli 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tunacode-cli might be problematic. Click here for more details.
- tunacode/cli/commands/base.py +2 -2
- tunacode/cli/commands/implementations/__init__.py +7 -1
- tunacode/cli/commands/implementations/conversation.py +1 -1
- tunacode/cli/commands/implementations/debug.py +1 -1
- tunacode/cli/commands/implementations/development.py +4 -1
- tunacode/cli/commands/implementations/template.py +132 -0
- tunacode/cli/commands/registry.py +28 -1
- tunacode/cli/commands/template_shortcut.py +93 -0
- tunacode/cli/main.py +6 -0
- tunacode/cli/repl.py +29 -174
- tunacode/cli/repl_components/__init__.py +10 -0
- tunacode/cli/repl_components/command_parser.py +34 -0
- tunacode/cli/repl_components/error_recovery.py +88 -0
- tunacode/cli/repl_components/output_display.py +33 -0
- tunacode/cli/repl_components/tool_executor.py +84 -0
- tunacode/configuration/defaults.py +2 -2
- tunacode/configuration/settings.py +11 -14
- tunacode/constants.py +57 -23
- tunacode/context.py +0 -14
- tunacode/core/agents/agent_components/__init__.py +27 -0
- tunacode/core/agents/agent_components/agent_config.py +109 -0
- tunacode/core/agents/agent_components/json_tool_parser.py +109 -0
- tunacode/core/agents/agent_components/message_handler.py +100 -0
- tunacode/core/agents/agent_components/node_processor.py +480 -0
- tunacode/core/agents/agent_components/response_state.py +13 -0
- tunacode/core/agents/agent_components/result_wrapper.py +50 -0
- tunacode/core/agents/agent_components/task_completion.py +28 -0
- tunacode/core/agents/agent_components/tool_buffer.py +24 -0
- tunacode/core/agents/agent_components/tool_executor.py +49 -0
- tunacode/core/agents/main.py +421 -778
- tunacode/core/agents/utils.py +42 -2
- tunacode/core/background/manager.py +3 -3
- tunacode/core/logging/__init__.py +4 -3
- tunacode/core/logging/config.py +1 -1
- tunacode/core/logging/formatters.py +1 -1
- tunacode/core/logging/handlers.py +41 -7
- tunacode/core/setup/__init__.py +2 -0
- tunacode/core/setup/agent_setup.py +2 -2
- tunacode/core/setup/base.py +2 -2
- tunacode/core/setup/config_setup.py +10 -6
- tunacode/core/setup/git_safety_setup.py +13 -2
- tunacode/core/setup/template_setup.py +75 -0
- tunacode/core/state.py +13 -2
- tunacode/core/token_usage/api_response_parser.py +6 -2
- tunacode/core/token_usage/usage_tracker.py +37 -7
- tunacode/core/tool_handler.py +24 -1
- tunacode/prompts/system.md +289 -4
- tunacode/setup.py +2 -0
- tunacode/templates/__init__.py +9 -0
- tunacode/templates/loader.py +210 -0
- tunacode/tools/glob.py +3 -3
- tunacode/tools/grep.py +26 -276
- tunacode/tools/grep_components/__init__.py +9 -0
- tunacode/tools/grep_components/file_filter.py +93 -0
- tunacode/tools/grep_components/pattern_matcher.py +152 -0
- tunacode/tools/grep_components/result_formatter.py +45 -0
- tunacode/tools/grep_components/search_result.py +35 -0
- tunacode/tools/todo.py +27 -21
- tunacode/types.py +19 -4
- tunacode/ui/completers.py +6 -1
- tunacode/ui/decorators.py +2 -2
- tunacode/ui/keybindings.py +1 -1
- tunacode/ui/panels.py +13 -5
- tunacode/ui/prompt_manager.py +1 -1
- tunacode/ui/tool_ui.py +8 -2
- tunacode/utils/bm25.py +4 -4
- tunacode/utils/file_utils.py +2 -2
- tunacode/utils/message_utils.py +3 -1
- tunacode/utils/system.py +0 -4
- tunacode/utils/text_utils.py +1 -1
- tunacode/utils/token_counter.py +2 -2
- {tunacode_cli-0.0.51.dist-info → tunacode_cli-0.0.53.dist-info}/METADATA +146 -1
- tunacode_cli-0.0.53.dist-info/RECORD +123 -0
- {tunacode_cli-0.0.51.dist-info → tunacode_cli-0.0.53.dist-info}/top_level.txt +0 -1
- api/auth.py +0 -13
- api/users.py +0 -8
- tunacode/core/recursive/__init__.py +0 -18
- tunacode/core/recursive/aggregator.py +0 -467
- tunacode/core/recursive/budget.py +0 -414
- tunacode/core/recursive/decomposer.py +0 -398
- tunacode/core/recursive/executor.py +0 -470
- tunacode/core/recursive/hierarchy.py +0 -488
- tunacode/ui/recursive_progress.py +0 -380
- tunacode_cli-0.0.51.dist-info/RECORD +0 -107
- {tunacode_cli-0.0.51.dist-info → tunacode_cli-0.0.53.dist-info}/WHEEL +0 -0
- {tunacode_cli-0.0.51.dist-info → tunacode_cli-0.0.53.dist-info}/entry_points.txt +0 -0
- {tunacode_cli-0.0.51.dist-info → tunacode_cli-0.0.53.dist-info}/licenses/LICENSE +0 -0
tunacode/tools/grep.py
CHANGED
|
@@ -10,121 +10,22 @@ This tool provides sophisticated grep-like functionality with:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import asyncio
|
|
13
|
-
import fnmatch
|
|
14
|
-
import os
|
|
15
13
|
import re
|
|
16
14
|
import subprocess
|
|
17
15
|
import time
|
|
18
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
-
from dataclasses import dataclass
|
|
20
17
|
from pathlib import Path
|
|
21
18
|
from typing import List, Optional, Union
|
|
22
19
|
|
|
23
20
|
from tunacode.exceptions import TooBroadPatternError, ToolExecutionError
|
|
24
21
|
from tunacode.tools.base import BaseTool
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
line_number: int
|
|
33
|
-
line_content: str
|
|
34
|
-
match_start: int
|
|
35
|
-
match_end: int
|
|
36
|
-
context_before: List[str]
|
|
37
|
-
context_after: List[str]
|
|
38
|
-
relevance_score: float = 0.0
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@dataclass
|
|
42
|
-
class SearchConfig:
|
|
43
|
-
"""Configuration for search operations."""
|
|
44
|
-
|
|
45
|
-
case_sensitive: bool = False
|
|
46
|
-
use_regex: bool = False
|
|
47
|
-
max_results: int = 50
|
|
48
|
-
context_lines: int = 2
|
|
49
|
-
include_patterns: List[str] = None
|
|
50
|
-
exclude_patterns: List[str] = None
|
|
51
|
-
max_file_size: int = 1024 * 1024 # 1MB
|
|
52
|
-
timeout_seconds: int = 30
|
|
53
|
-
first_match_deadline: float = 3.0 # Timeout for finding first match
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# Fast-Glob Prefilter Configuration
|
|
57
|
-
MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
|
|
58
|
-
GLOB_BATCH = 500 # Streaming batch size
|
|
59
|
-
EXCLUDE_DIRS = { # Common directories to skip
|
|
60
|
-
"node_modules",
|
|
61
|
-
".git",
|
|
62
|
-
"__pycache__",
|
|
63
|
-
".venv",
|
|
64
|
-
"venv",
|
|
65
|
-
"dist",
|
|
66
|
-
"build",
|
|
67
|
-
".pytest_cache",
|
|
68
|
-
".mypy_cache",
|
|
69
|
-
".tox",
|
|
70
|
-
"target",
|
|
71
|
-
"node_modules",
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def fast_glob(root: Path, include: str, exclude: str = None) -> List[Path]:
|
|
76
|
-
"""
|
|
77
|
-
Lightning-fast filename filtering using os.scandir.
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
root: Directory to search
|
|
81
|
-
include: Include pattern (e.g., "*.py", "*.{js,ts}")
|
|
82
|
-
exclude: Exclude pattern (optional)
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
List of matching file paths (bounded by MAX_GLOB)
|
|
86
|
-
"""
|
|
87
|
-
matches, stack = [], [root]
|
|
88
|
-
|
|
89
|
-
# Handle multiple extensions in include pattern like "*.{py,js,ts}"
|
|
90
|
-
if "{" in include and "}" in include:
|
|
91
|
-
# Convert *.{py,js,ts} to multiple patterns
|
|
92
|
-
base, ext_part = include.split("{", 1)
|
|
93
|
-
ext_part = ext_part.split("}", 1)[0]
|
|
94
|
-
extensions = ext_part.split(",")
|
|
95
|
-
include_patterns = [base + ext.strip() for ext in extensions]
|
|
96
|
-
include_regexes = [
|
|
97
|
-
re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns
|
|
98
|
-
]
|
|
99
|
-
else:
|
|
100
|
-
include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
|
|
101
|
-
|
|
102
|
-
exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
|
|
103
|
-
|
|
104
|
-
while stack and len(matches) < MAX_GLOB:
|
|
105
|
-
current_dir = stack.pop()
|
|
106
|
-
|
|
107
|
-
try:
|
|
108
|
-
with os.scandir(current_dir) as entries:
|
|
109
|
-
for entry in entries:
|
|
110
|
-
# Skip common irrelevant directories
|
|
111
|
-
if entry.is_dir(follow_symlinks=False):
|
|
112
|
-
if entry.name not in EXCLUDE_DIRS:
|
|
113
|
-
stack.append(Path(entry.path))
|
|
114
|
-
|
|
115
|
-
# Check file matches
|
|
116
|
-
elif entry.is_file(follow_symlinks=False):
|
|
117
|
-
# Check against any include pattern
|
|
118
|
-
matches_include = any(regex.match(entry.name) for regex in include_regexes)
|
|
119
|
-
|
|
120
|
-
if matches_include:
|
|
121
|
-
if not exclude_rx or not exclude_rx.match(entry.name):
|
|
122
|
-
matches.append(Path(entry.path))
|
|
123
|
-
|
|
124
|
-
except (PermissionError, OSError):
|
|
125
|
-
continue # Skip inaccessible directories
|
|
126
|
-
|
|
127
|
-
return matches[:MAX_GLOB]
|
|
22
|
+
from tunacode.tools.grep_components import (
|
|
23
|
+
FileFilter,
|
|
24
|
+
PatternMatcher,
|
|
25
|
+
SearchConfig,
|
|
26
|
+
SearchResult,
|
|
27
|
+
)
|
|
28
|
+
from tunacode.tools.grep_components.result_formatter import ResultFormatter
|
|
128
29
|
|
|
129
30
|
|
|
130
31
|
class ParallelGrep(BaseTool):
|
|
@@ -133,6 +34,9 @@ class ParallelGrep(BaseTool):
|
|
|
133
34
|
def __init__(self, ui_logger=None):
|
|
134
35
|
super().__init__(ui_logger)
|
|
135
36
|
self._executor = ThreadPoolExecutor(max_workers=8)
|
|
37
|
+
self._file_filter = FileFilter()
|
|
38
|
+
self._pattern_matcher = PatternMatcher()
|
|
39
|
+
self._result_formatter = ResultFormatter()
|
|
136
40
|
|
|
137
41
|
@property
|
|
138
42
|
def tool_name(self) -> str:
|
|
@@ -150,7 +54,7 @@ class ParallelGrep(BaseTool):
|
|
|
150
54
|
context_lines: int = 2,
|
|
151
55
|
search_type: str = "smart", # smart, ripgrep, python, hybrid
|
|
152
56
|
return_format: str = "string", # "string" (default) or "list" (legacy)
|
|
153
|
-
) -> str:
|
|
57
|
+
) -> Union[str, List[str]]:
|
|
154
58
|
"""
|
|
155
59
|
Execute parallel grep search with fast-glob prefiltering and multiple strategies.
|
|
156
60
|
|
|
@@ -174,7 +78,11 @@ class ParallelGrep(BaseTool):
|
|
|
174
78
|
exclude_pattern = exclude_files
|
|
175
79
|
|
|
176
80
|
candidates = await asyncio.get_event_loop().run_in_executor(
|
|
177
|
-
self._executor,
|
|
81
|
+
self._executor,
|
|
82
|
+
self._file_filter.fast_glob,
|
|
83
|
+
Path(directory),
|
|
84
|
+
include_pattern,
|
|
85
|
+
exclude_pattern,
|
|
178
86
|
)
|
|
179
87
|
|
|
180
88
|
if not candidates:
|
|
@@ -197,8 +105,12 @@ class ParallelGrep(BaseTool):
|
|
|
197
105
|
|
|
198
106
|
# 3️⃣ Create search configuration
|
|
199
107
|
# Note: include_patterns/exclude_patterns now only used for legacy compatibility
|
|
200
|
-
include_patterns =
|
|
201
|
-
|
|
108
|
+
include_patterns = (
|
|
109
|
+
self._file_filter.parse_patterns(include_files) if include_files else ["*"]
|
|
110
|
+
)
|
|
111
|
+
exclude_patterns = (
|
|
112
|
+
self._file_filter.parse_patterns(exclude_files) if exclude_files else []
|
|
113
|
+
)
|
|
202
114
|
config = SearchConfig(
|
|
203
115
|
case_sensitive=case_sensitive,
|
|
204
116
|
use_regex=use_regex,
|
|
@@ -220,8 +132,8 @@ class ParallelGrep(BaseTool):
|
|
|
220
132
|
raise ToolExecutionError(f"Unknown search type: {search_type}")
|
|
221
133
|
|
|
222
134
|
# 5️⃣ Format and return results with strategy info
|
|
223
|
-
strategy_info = f"Strategy: {search_type} (was {original_search_type}), Files: {len(candidates)}/{
|
|
224
|
-
formatted_results = self.
|
|
135
|
+
strategy_info = f"Strategy: {search_type} (was {original_search_type}), Files: {len(candidates)}/{5000}"
|
|
136
|
+
formatted_results = self._result_formatter.format_results(results, pattern, config)
|
|
225
137
|
|
|
226
138
|
if return_format == "list":
|
|
227
139
|
# Legacy: return list of file paths with at least one match
|
|
@@ -342,7 +254,7 @@ class ParallelGrep(BaseTool):
|
|
|
342
254
|
self._executor, run_ripgrep_filtered
|
|
343
255
|
)
|
|
344
256
|
if output:
|
|
345
|
-
parsed = self.
|
|
257
|
+
parsed = self._pattern_matcher.parse_ripgrep_output(output)
|
|
346
258
|
return parsed
|
|
347
259
|
return []
|
|
348
260
|
except TooBroadPatternError:
|
|
@@ -471,172 +383,10 @@ class ParallelGrep(BaseTool):
|
|
|
471
383
|
"""Search a single file for the pattern."""
|
|
472
384
|
|
|
473
385
|
def search_file_sync():
|
|
474
|
-
|
|
475
|
-
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
476
|
-
lines = f.readlines()
|
|
477
|
-
|
|
478
|
-
results = []
|
|
479
|
-
for i, line in enumerate(lines):
|
|
480
|
-
line = line.rstrip("\n\r")
|
|
481
|
-
|
|
482
|
-
# Search for pattern
|
|
483
|
-
if regex_pattern:
|
|
484
|
-
matches = list(regex_pattern.finditer(line))
|
|
485
|
-
else:
|
|
486
|
-
# Simple string search
|
|
487
|
-
search_line = line if config.case_sensitive else line.lower()
|
|
488
|
-
search_pattern = pattern if config.case_sensitive else pattern.lower()
|
|
489
|
-
|
|
490
|
-
matches = []
|
|
491
|
-
start = 0
|
|
492
|
-
while True:
|
|
493
|
-
pos = search_line.find(search_pattern, start)
|
|
494
|
-
if pos == -1:
|
|
495
|
-
break
|
|
496
|
-
|
|
497
|
-
# Create a simple match object
|
|
498
|
-
class SimpleMatch:
|
|
499
|
-
def __init__(self, start_pos, end_pos):
|
|
500
|
-
self._start = start_pos
|
|
501
|
-
self._end = end_pos
|
|
502
|
-
|
|
503
|
-
def start(self):
|
|
504
|
-
return self._start
|
|
505
|
-
|
|
506
|
-
def end(self):
|
|
507
|
-
return self._end
|
|
508
|
-
|
|
509
|
-
match = SimpleMatch(pos, pos + len(search_pattern))
|
|
510
|
-
matches.append(match)
|
|
511
|
-
start = pos + 1
|
|
512
|
-
|
|
513
|
-
# Create results for each match
|
|
514
|
-
for match in matches:
|
|
515
|
-
# Get context lines
|
|
516
|
-
context_start = max(0, i - config.context_lines)
|
|
517
|
-
context_end = min(len(lines), i + config.context_lines + 1)
|
|
518
|
-
|
|
519
|
-
context_before = [lines[j].rstrip("\n\r") for j in range(context_start, i)]
|
|
520
|
-
context_after = [lines[j].rstrip("\n\r") for j in range(i + 1, context_end)]
|
|
521
|
-
|
|
522
|
-
# Calculate relevance score
|
|
523
|
-
relevance = self._calculate_relevance(str(file_path), line, pattern, match)
|
|
524
|
-
|
|
525
|
-
result = SearchResult(
|
|
526
|
-
file_path=str(file_path),
|
|
527
|
-
line_number=i + 1,
|
|
528
|
-
line_content=line,
|
|
529
|
-
match_start=match.start() if hasattr(match, "start") else match.start(),
|
|
530
|
-
match_end=match.end() if hasattr(match, "end") else match.end(),
|
|
531
|
-
context_before=context_before,
|
|
532
|
-
context_after=context_after,
|
|
533
|
-
relevance_score=relevance,
|
|
534
|
-
)
|
|
535
|
-
results.append(result)
|
|
536
|
-
|
|
537
|
-
return results
|
|
538
|
-
|
|
539
|
-
except Exception:
|
|
540
|
-
return []
|
|
386
|
+
return self._pattern_matcher.search_file(file_path, pattern, regex_pattern, config)
|
|
541
387
|
|
|
542
388
|
return await asyncio.get_event_loop().run_in_executor(self._executor, search_file_sync)
|
|
543
389
|
|
|
544
|
-
def _calculate_relevance(self, file_path: str, line: str, pattern: str, match) -> float:
|
|
545
|
-
"""Calculate relevance score for a search result."""
|
|
546
|
-
score = 0.0
|
|
547
|
-
|
|
548
|
-
# Base score
|
|
549
|
-
score += 1.0
|
|
550
|
-
|
|
551
|
-
# Boost for exact matches
|
|
552
|
-
if pattern.lower() in line.lower():
|
|
553
|
-
score += 0.5
|
|
554
|
-
|
|
555
|
-
# Boost for matches at word boundaries
|
|
556
|
-
if match.start() == 0 or not line[match.start() - 1].isalnum():
|
|
557
|
-
score += 0.3
|
|
558
|
-
|
|
559
|
-
# Boost for certain file types
|
|
560
|
-
if file_path.endswith((".py", ".js", ".ts", ".java", ".cpp", ".c")):
|
|
561
|
-
score += 0.2
|
|
562
|
-
|
|
563
|
-
# Boost for matches in comments or docstrings
|
|
564
|
-
stripped_line = line.strip()
|
|
565
|
-
if stripped_line.startswith(("#", "//", "/*", '"""', "'''")):
|
|
566
|
-
score += 0.1
|
|
567
|
-
|
|
568
|
-
return score
|
|
569
|
-
|
|
570
|
-
def _parse_ripgrep_output(self, output: str) -> List[SearchResult]:
|
|
571
|
-
"""Parse ripgrep JSON output into SearchResult objects."""
|
|
572
|
-
import json
|
|
573
|
-
|
|
574
|
-
results = []
|
|
575
|
-
for line in output.strip().split("\n"):
|
|
576
|
-
if not line:
|
|
577
|
-
continue
|
|
578
|
-
|
|
579
|
-
try:
|
|
580
|
-
data = json.loads(line)
|
|
581
|
-
if data.get("type") != "match":
|
|
582
|
-
continue
|
|
583
|
-
|
|
584
|
-
match_data = data["data"]
|
|
585
|
-
result = SearchResult(
|
|
586
|
-
file_path=match_data["path"]["text"],
|
|
587
|
-
line_number=match_data["line_number"],
|
|
588
|
-
line_content=match_data["lines"]["text"].rstrip("\n\r"),
|
|
589
|
-
match_start=match_data["submatches"][0]["start"],
|
|
590
|
-
match_end=match_data["submatches"][0]["end"],
|
|
591
|
-
context_before=[], # Ripgrep context handling would go here
|
|
592
|
-
context_after=[],
|
|
593
|
-
relevance_score=1.0,
|
|
594
|
-
)
|
|
595
|
-
results.append(result)
|
|
596
|
-
except (json.JSONDecodeError, KeyError):
|
|
597
|
-
continue
|
|
598
|
-
|
|
599
|
-
return results
|
|
600
|
-
|
|
601
|
-
def _parse_patterns(self, patterns: str) -> List[str]:
|
|
602
|
-
"""Parse comma-separated file patterns."""
|
|
603
|
-
return [p.strip() for p in patterns.split(",") if p.strip()]
|
|
604
|
-
|
|
605
|
-
def _format_results(
|
|
606
|
-
self, results: List[SearchResult], pattern: str, config: SearchConfig
|
|
607
|
-
) -> str:
|
|
608
|
-
"""Format search results for display."""
|
|
609
|
-
if not results:
|
|
610
|
-
return f"No matches found for pattern: {pattern}"
|
|
611
|
-
|
|
612
|
-
output = []
|
|
613
|
-
output.append(f"Found {len(results)} matches for pattern: {pattern}")
|
|
614
|
-
output.append("=" * 60)
|
|
615
|
-
|
|
616
|
-
for result in results:
|
|
617
|
-
# File header
|
|
618
|
-
output.append(f"\n📁 {result.file_path}:{result.line_number}")
|
|
619
|
-
|
|
620
|
-
# Context before
|
|
621
|
-
for i, context_line in enumerate(result.context_before):
|
|
622
|
-
line_num = result.line_number - len(result.context_before) + i
|
|
623
|
-
output.append(f" {line_num:4d}│ {context_line}")
|
|
624
|
-
|
|
625
|
-
# Main match line with highlighting
|
|
626
|
-
line_content = result.line_content
|
|
627
|
-
before_match = line_content[: result.match_start]
|
|
628
|
-
match_text = line_content[result.match_start : result.match_end]
|
|
629
|
-
after_match = line_content[result.match_end :]
|
|
630
|
-
|
|
631
|
-
output.append(f"▶ {result.line_number:4d}│ {before_match}⟨{match_text}⟩{after_match}")
|
|
632
|
-
|
|
633
|
-
# Context after
|
|
634
|
-
for i, context_line in enumerate(result.context_after):
|
|
635
|
-
line_num = result.line_number + i + 1
|
|
636
|
-
output.append(f" {line_num:4d}│ {context_line}")
|
|
637
|
-
|
|
638
|
-
return "\n".join(output)
|
|
639
|
-
|
|
640
390
|
|
|
641
391
|
# Create tool instance for pydantic-ai
|
|
642
392
|
async def grep(
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Grep tool components for modular organization.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .file_filter import FileFilter
|
|
6
|
+
from .pattern_matcher import PatternMatcher
|
|
7
|
+
from .search_result import SearchConfig, SearchResult
|
|
8
|
+
|
|
9
|
+
__all__ = ["PatternMatcher", "FileFilter", "SearchResult", "SearchConfig"]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File filtering functionality for the grep tool.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
# Fast-Glob Prefilter Configuration
|
|
12
|
+
MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
|
|
13
|
+
GLOB_BATCH = 500 # Streaming batch size
|
|
14
|
+
EXCLUDE_DIRS = { # Common directories to skip
|
|
15
|
+
"node_modules",
|
|
16
|
+
".git",
|
|
17
|
+
"__pycache__",
|
|
18
|
+
".venv",
|
|
19
|
+
"venv",
|
|
20
|
+
"dist",
|
|
21
|
+
"build",
|
|
22
|
+
".pytest_cache",
|
|
23
|
+
".mypy_cache",
|
|
24
|
+
".tox",
|
|
25
|
+
"target",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FileFilter:
|
|
30
|
+
"""Handles file filtering and globbing for the grep tool."""
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def fast_glob(root: Path, include: str, exclude: Optional[str] = None) -> List[Path]:
|
|
34
|
+
"""
|
|
35
|
+
Lightning-fast filename filtering using os.scandir.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
root: Directory to search
|
|
39
|
+
include: Include pattern (e.g., "*.py", "*.{js,ts}")
|
|
40
|
+
exclude: Exclude pattern (optional)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of matching file paths (bounded by MAX_GLOB)
|
|
44
|
+
"""
|
|
45
|
+
matches: List[Path] = []
|
|
46
|
+
stack = [root]
|
|
47
|
+
|
|
48
|
+
# Handle multiple extensions in include pattern like "*.{py,js,ts}"
|
|
49
|
+
if "{" in include and "}" in include:
|
|
50
|
+
# Convert *.{py,js,ts} to multiple patterns
|
|
51
|
+
base, ext_part = include.split("{", 1)
|
|
52
|
+
ext_part = ext_part.split("}", 1)[0]
|
|
53
|
+
extensions = ext_part.split(",")
|
|
54
|
+
include_patterns = [base + ext.strip() for ext in extensions]
|
|
55
|
+
include_regexes = [
|
|
56
|
+
re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns
|
|
57
|
+
]
|
|
58
|
+
else:
|
|
59
|
+
include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
|
|
60
|
+
|
|
61
|
+
exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
|
|
62
|
+
|
|
63
|
+
while stack and len(matches) < MAX_GLOB:
|
|
64
|
+
current_dir = stack.pop()
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
with os.scandir(current_dir) as entries:
|
|
68
|
+
for entry in entries:
|
|
69
|
+
# Skip common irrelevant directories
|
|
70
|
+
if entry.is_dir(follow_symlinks=False):
|
|
71
|
+
if entry.name not in EXCLUDE_DIRS:
|
|
72
|
+
stack.append(Path(entry.path))
|
|
73
|
+
|
|
74
|
+
# Check file matches
|
|
75
|
+
elif entry.is_file(follow_symlinks=False):
|
|
76
|
+
# Check against any include pattern
|
|
77
|
+
matches_include = any(
|
|
78
|
+
regex.match(entry.name) for regex in include_regexes
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if matches_include:
|
|
82
|
+
if not exclude_rx or not exclude_rx.match(entry.name):
|
|
83
|
+
matches.append(Path(entry.path))
|
|
84
|
+
|
|
85
|
+
except (PermissionError, OSError):
|
|
86
|
+
continue # Skip inaccessible directories
|
|
87
|
+
|
|
88
|
+
return matches[:MAX_GLOB]
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def parse_patterns(patterns: str) -> List[str]:
|
|
92
|
+
"""Parse comma-separated file patterns."""
|
|
93
|
+
return [p.strip() for p in patterns.split(",") if p.strip()]
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern matching functionality for the grep tool.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, List, Optional
|
|
8
|
+
|
|
9
|
+
from .search_result import SearchConfig, SearchResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SimpleMatch:
|
|
13
|
+
"""Simple match object for non-regex searches."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, start_pos: int, end_pos: int):
|
|
16
|
+
self._start = start_pos
|
|
17
|
+
self._end = end_pos
|
|
18
|
+
|
|
19
|
+
def start(self) -> int:
|
|
20
|
+
return self._start
|
|
21
|
+
|
|
22
|
+
def end(self) -> int:
|
|
23
|
+
return self._end
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PatternMatcher:
|
|
27
|
+
"""Handles pattern matching and relevance scoring for search results."""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def search_file(
|
|
31
|
+
file_path: Path,
|
|
32
|
+
pattern: str,
|
|
33
|
+
regex_pattern: Optional[re.Pattern],
|
|
34
|
+
config: SearchConfig,
|
|
35
|
+
) -> List[SearchResult]:
|
|
36
|
+
"""Search a single file for the pattern."""
|
|
37
|
+
try:
|
|
38
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
39
|
+
lines = f.readlines()
|
|
40
|
+
|
|
41
|
+
results = []
|
|
42
|
+
for i, line in enumerate(lines):
|
|
43
|
+
line = line.rstrip("\n\r")
|
|
44
|
+
|
|
45
|
+
# Search for pattern
|
|
46
|
+
if regex_pattern:
|
|
47
|
+
matches = list(regex_pattern.finditer(line))
|
|
48
|
+
else:
|
|
49
|
+
# Simple string search
|
|
50
|
+
search_line = line if config.case_sensitive else line.lower()
|
|
51
|
+
search_pattern = pattern if config.case_sensitive else pattern.lower()
|
|
52
|
+
|
|
53
|
+
matches = []
|
|
54
|
+
start = 0
|
|
55
|
+
while True:
|
|
56
|
+
pos = search_line.find(search_pattern, start)
|
|
57
|
+
if pos == -1:
|
|
58
|
+
break
|
|
59
|
+
|
|
60
|
+
match = SimpleMatch(pos, pos + len(search_pattern))
|
|
61
|
+
matches.append(match)
|
|
62
|
+
start = pos + 1
|
|
63
|
+
|
|
64
|
+
# Create results for each match
|
|
65
|
+
for match in matches:
|
|
66
|
+
# Get context lines
|
|
67
|
+
context_start = max(0, i - config.context_lines)
|
|
68
|
+
context_end = min(len(lines), i + config.context_lines + 1)
|
|
69
|
+
|
|
70
|
+
context_before = [lines[j].rstrip("\n\r") for j in range(context_start, i)]
|
|
71
|
+
context_after = [lines[j].rstrip("\n\r") for j in range(i + 1, context_end)]
|
|
72
|
+
|
|
73
|
+
# Calculate relevance score
|
|
74
|
+
relevance = PatternMatcher.calculate_relevance(
|
|
75
|
+
str(file_path), line, pattern, match
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
result = SearchResult(
|
|
79
|
+
file_path=str(file_path),
|
|
80
|
+
line_number=i + 1,
|
|
81
|
+
line_content=line,
|
|
82
|
+
match_start=match.start() if hasattr(match, "start") else match.start(),
|
|
83
|
+
match_end=match.end() if hasattr(match, "end") else match.end(),
|
|
84
|
+
context_before=context_before,
|
|
85
|
+
context_after=context_after,
|
|
86
|
+
relevance_score=relevance,
|
|
87
|
+
)
|
|
88
|
+
results.append(result)
|
|
89
|
+
|
|
90
|
+
return results
|
|
91
|
+
|
|
92
|
+
except Exception:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def calculate_relevance(file_path: str, line: str, pattern: str, match: Any) -> float:
|
|
97
|
+
"""Calculate relevance score for a search result."""
|
|
98
|
+
score = 0.0
|
|
99
|
+
|
|
100
|
+
# Base score
|
|
101
|
+
score += 1.0
|
|
102
|
+
|
|
103
|
+
# Boost for exact matches
|
|
104
|
+
if pattern.lower() in line.lower():
|
|
105
|
+
score += 0.5
|
|
106
|
+
|
|
107
|
+
# Boost for matches at word boundaries
|
|
108
|
+
if match.start() == 0 or not line[match.start() - 1].isalnum():
|
|
109
|
+
score += 0.3
|
|
110
|
+
|
|
111
|
+
# Boost for certain file types
|
|
112
|
+
if file_path.endswith((".py", ".js", ".ts", ".java", ".cpp", ".c")):
|
|
113
|
+
score += 0.2
|
|
114
|
+
|
|
115
|
+
# Boost for matches in comments or docstrings
|
|
116
|
+
stripped_line = line.strip()
|
|
117
|
+
if stripped_line.startswith(("#", "//", "/*", '"""', "'''")):
|
|
118
|
+
score += 0.1
|
|
119
|
+
|
|
120
|
+
return score
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def parse_ripgrep_output(output: str) -> List[SearchResult]:
|
|
124
|
+
"""Parse ripgrep JSON output into SearchResult objects."""
|
|
125
|
+
import json
|
|
126
|
+
|
|
127
|
+
results = []
|
|
128
|
+
for line in output.strip().split("\n"):
|
|
129
|
+
if not line:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(line)
|
|
134
|
+
if data.get("type") != "match":
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
match_data = data["data"]
|
|
138
|
+
result = SearchResult(
|
|
139
|
+
file_path=match_data["path"]["text"],
|
|
140
|
+
line_number=match_data["line_number"],
|
|
141
|
+
line_content=match_data["lines"]["text"].rstrip("\n\r"),
|
|
142
|
+
match_start=match_data["submatches"][0]["start"],
|
|
143
|
+
match_end=match_data["submatches"][0]["end"],
|
|
144
|
+
context_before=[], # Ripgrep context handling would go here
|
|
145
|
+
context_after=[],
|
|
146
|
+
relevance_score=1.0,
|
|
147
|
+
)
|
|
148
|
+
results.append(result)
|
|
149
|
+
except (json.JSONDecodeError, KeyError):
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
return results
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Result formatting functionality for the grep tool.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
from .search_result import SearchConfig, SearchResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ResultFormatter:
|
|
11
|
+
"""Handles formatting of search results for display."""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def format_results(results: List[SearchResult], pattern: str, config: SearchConfig) -> str:
|
|
15
|
+
"""Format search results for display."""
|
|
16
|
+
if not results:
|
|
17
|
+
return f"No matches found for pattern: {pattern}"
|
|
18
|
+
|
|
19
|
+
output = []
|
|
20
|
+
output.append(f"Found {len(results)} matches for pattern: {pattern}")
|
|
21
|
+
output.append("=" * 60)
|
|
22
|
+
|
|
23
|
+
for result in results:
|
|
24
|
+
# File header
|
|
25
|
+
output.append(f"\n📁 {result.file_path}:{result.line_number}")
|
|
26
|
+
|
|
27
|
+
# Context before
|
|
28
|
+
for i, context_line in enumerate(result.context_before):
|
|
29
|
+
line_num = result.line_number - len(result.context_before) + i
|
|
30
|
+
output.append(f" {line_num:4d}│ {context_line}")
|
|
31
|
+
|
|
32
|
+
# Main match line with highlighting
|
|
33
|
+
line_content = result.line_content
|
|
34
|
+
before_match = line_content[: result.match_start]
|
|
35
|
+
match_text = line_content[result.match_start : result.match_end]
|
|
36
|
+
after_match = line_content[result.match_end :]
|
|
37
|
+
|
|
38
|
+
output.append(f"▶ {result.line_number:4d}│ {before_match}⟨{match_text}⟩{after_match}")
|
|
39
|
+
|
|
40
|
+
# Context after
|
|
41
|
+
for i, context_line in enumerate(result.context_after):
|
|
42
|
+
line_num = result.line_number + i + 1
|
|
43
|
+
output.append(f" {line_num:4d}│ {context_line}")
|
|
44
|
+
|
|
45
|
+
return "\n".join(output)
|