tunacode-cli 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tunacode-cli might be problematic. Click here for more details.
- tunacode/__init__.py +0 -0
- tunacode/cli/textual_repl.tcss +283 -0
- tunacode/configuration/__init__.py +1 -0
- tunacode/configuration/defaults.py +45 -0
- tunacode/configuration/models.py +147 -0
- tunacode/configuration/models_registry.json +1 -0
- tunacode/configuration/pricing.py +74 -0
- tunacode/configuration/settings.py +35 -0
- tunacode/constants.py +227 -0
- tunacode/core/__init__.py +6 -0
- tunacode/core/agents/__init__.py +39 -0
- tunacode/core/agents/agent_components/__init__.py +48 -0
- tunacode/core/agents/agent_components/agent_config.py +441 -0
- tunacode/core/agents/agent_components/agent_helpers.py +290 -0
- tunacode/core/agents/agent_components/message_handler.py +99 -0
- tunacode/core/agents/agent_components/node_processor.py +477 -0
- tunacode/core/agents/agent_components/response_state.py +129 -0
- tunacode/core/agents/agent_components/result_wrapper.py +51 -0
- tunacode/core/agents/agent_components/state_transition.py +112 -0
- tunacode/core/agents/agent_components/streaming.py +271 -0
- tunacode/core/agents/agent_components/task_completion.py +40 -0
- tunacode/core/agents/agent_components/tool_buffer.py +44 -0
- tunacode/core/agents/agent_components/tool_executor.py +101 -0
- tunacode/core/agents/agent_components/truncation_checker.py +37 -0
- tunacode/core/agents/delegation_tools.py +109 -0
- tunacode/core/agents/main.py +545 -0
- tunacode/core/agents/prompts.py +66 -0
- tunacode/core/agents/research_agent.py +231 -0
- tunacode/core/compaction.py +218 -0
- tunacode/core/prompting/__init__.py +27 -0
- tunacode/core/prompting/loader.py +66 -0
- tunacode/core/prompting/prompting_engine.py +98 -0
- tunacode/core/prompting/sections.py +50 -0
- tunacode/core/prompting/templates.py +69 -0
- tunacode/core/state.py +409 -0
- tunacode/exceptions.py +313 -0
- tunacode/indexing/__init__.py +5 -0
- tunacode/indexing/code_index.py +432 -0
- tunacode/indexing/constants.py +86 -0
- tunacode/lsp/__init__.py +112 -0
- tunacode/lsp/client.py +351 -0
- tunacode/lsp/diagnostics.py +19 -0
- tunacode/lsp/servers.py +101 -0
- tunacode/prompts/default_prompt.md +952 -0
- tunacode/prompts/research/sections/agent_role.xml +5 -0
- tunacode/prompts/research/sections/constraints.xml +14 -0
- tunacode/prompts/research/sections/output_format.xml +57 -0
- tunacode/prompts/research/sections/tool_use.xml +23 -0
- tunacode/prompts/sections/advanced_patterns.xml +255 -0
- tunacode/prompts/sections/agent_role.xml +8 -0
- tunacode/prompts/sections/completion.xml +10 -0
- tunacode/prompts/sections/critical_rules.xml +37 -0
- tunacode/prompts/sections/examples.xml +220 -0
- tunacode/prompts/sections/output_style.xml +94 -0
- tunacode/prompts/sections/parallel_exec.xml +105 -0
- tunacode/prompts/sections/search_pattern.xml +100 -0
- tunacode/prompts/sections/system_info.xml +6 -0
- tunacode/prompts/sections/tool_use.xml +84 -0
- tunacode/prompts/sections/user_instructions.xml +3 -0
- tunacode/py.typed +0 -0
- tunacode/templates/__init__.py +5 -0
- tunacode/templates/loader.py +15 -0
- tunacode/tools/__init__.py +10 -0
- tunacode/tools/authorization/__init__.py +29 -0
- tunacode/tools/authorization/context.py +32 -0
- tunacode/tools/authorization/factory.py +20 -0
- tunacode/tools/authorization/handler.py +58 -0
- tunacode/tools/authorization/notifier.py +35 -0
- tunacode/tools/authorization/policy.py +19 -0
- tunacode/tools/authorization/requests.py +119 -0
- tunacode/tools/authorization/rules.py +72 -0
- tunacode/tools/bash.py +222 -0
- tunacode/tools/decorators.py +213 -0
- tunacode/tools/glob.py +353 -0
- tunacode/tools/grep.py +468 -0
- tunacode/tools/grep_components/__init__.py +9 -0
- tunacode/tools/grep_components/file_filter.py +93 -0
- tunacode/tools/grep_components/pattern_matcher.py +158 -0
- tunacode/tools/grep_components/result_formatter.py +87 -0
- tunacode/tools/grep_components/search_result.py +34 -0
- tunacode/tools/list_dir.py +205 -0
- tunacode/tools/prompts/bash_prompt.xml +10 -0
- tunacode/tools/prompts/glob_prompt.xml +7 -0
- tunacode/tools/prompts/grep_prompt.xml +10 -0
- tunacode/tools/prompts/list_dir_prompt.xml +7 -0
- tunacode/tools/prompts/read_file_prompt.xml +9 -0
- tunacode/tools/prompts/todoclear_prompt.xml +12 -0
- tunacode/tools/prompts/todoread_prompt.xml +16 -0
- tunacode/tools/prompts/todowrite_prompt.xml +28 -0
- tunacode/tools/prompts/update_file_prompt.xml +9 -0
- tunacode/tools/prompts/web_fetch_prompt.xml +11 -0
- tunacode/tools/prompts/write_file_prompt.xml +7 -0
- tunacode/tools/react.py +111 -0
- tunacode/tools/read_file.py +68 -0
- tunacode/tools/todo.py +222 -0
- tunacode/tools/update_file.py +62 -0
- tunacode/tools/utils/__init__.py +1 -0
- tunacode/tools/utils/ripgrep.py +311 -0
- tunacode/tools/utils/text_match.py +352 -0
- tunacode/tools/web_fetch.py +245 -0
- tunacode/tools/write_file.py +34 -0
- tunacode/tools/xml_helper.py +34 -0
- tunacode/types/__init__.py +166 -0
- tunacode/types/base.py +94 -0
- tunacode/types/callbacks.py +53 -0
- tunacode/types/dataclasses.py +121 -0
- tunacode/types/pydantic_ai.py +31 -0
- tunacode/types/state.py +122 -0
- tunacode/ui/__init__.py +6 -0
- tunacode/ui/app.py +542 -0
- tunacode/ui/commands/__init__.py +430 -0
- tunacode/ui/components/__init__.py +1 -0
- tunacode/ui/headless/__init__.py +5 -0
- tunacode/ui/headless/output.py +72 -0
- tunacode/ui/main.py +252 -0
- tunacode/ui/renderers/__init__.py +41 -0
- tunacode/ui/renderers/errors.py +197 -0
- tunacode/ui/renderers/panels.py +550 -0
- tunacode/ui/renderers/search.py +314 -0
- tunacode/ui/renderers/tools/__init__.py +21 -0
- tunacode/ui/renderers/tools/bash.py +247 -0
- tunacode/ui/renderers/tools/diagnostics.py +186 -0
- tunacode/ui/renderers/tools/glob.py +226 -0
- tunacode/ui/renderers/tools/grep.py +228 -0
- tunacode/ui/renderers/tools/list_dir.py +198 -0
- tunacode/ui/renderers/tools/read_file.py +226 -0
- tunacode/ui/renderers/tools/research.py +294 -0
- tunacode/ui/renderers/tools/update_file.py +237 -0
- tunacode/ui/renderers/tools/web_fetch.py +182 -0
- tunacode/ui/repl_support.py +226 -0
- tunacode/ui/screens/__init__.py +16 -0
- tunacode/ui/screens/model_picker.py +303 -0
- tunacode/ui/screens/session_picker.py +181 -0
- tunacode/ui/screens/setup.py +218 -0
- tunacode/ui/screens/theme_picker.py +90 -0
- tunacode/ui/screens/update_confirm.py +69 -0
- tunacode/ui/shell_runner.py +129 -0
- tunacode/ui/styles/layout.tcss +98 -0
- tunacode/ui/styles/modals.tcss +38 -0
- tunacode/ui/styles/panels.tcss +81 -0
- tunacode/ui/styles/theme-nextstep.tcss +303 -0
- tunacode/ui/styles/widgets.tcss +33 -0
- tunacode/ui/styles.py +18 -0
- tunacode/ui/widgets/__init__.py +23 -0
- tunacode/ui/widgets/command_autocomplete.py +62 -0
- tunacode/ui/widgets/editor.py +402 -0
- tunacode/ui/widgets/file_autocomplete.py +47 -0
- tunacode/ui/widgets/messages.py +46 -0
- tunacode/ui/widgets/resource_bar.py +182 -0
- tunacode/ui/widgets/status_bar.py +98 -0
- tunacode/utils/__init__.py +0 -0
- tunacode/utils/config/__init__.py +13 -0
- tunacode/utils/config/user_configuration.py +91 -0
- tunacode/utils/messaging/__init__.py +10 -0
- tunacode/utils/messaging/message_utils.py +34 -0
- tunacode/utils/messaging/token_counter.py +77 -0
- tunacode/utils/parsing/__init__.py +13 -0
- tunacode/utils/parsing/command_parser.py +55 -0
- tunacode/utils/parsing/json_utils.py +188 -0
- tunacode/utils/parsing/retry.py +146 -0
- tunacode/utils/parsing/tool_parser.py +267 -0
- tunacode/utils/security/__init__.py +15 -0
- tunacode/utils/security/command.py +106 -0
- tunacode/utils/system/__init__.py +25 -0
- tunacode/utils/system/gitignore.py +155 -0
- tunacode/utils/system/paths.py +190 -0
- tunacode/utils/ui/__init__.py +9 -0
- tunacode/utils/ui/file_filter.py +135 -0
- tunacode/utils/ui/helpers.py +24 -0
- tunacode_cli-0.1.21.dist-info/METADATA +170 -0
- tunacode_cli-0.1.21.dist-info/RECORD +174 -0
- tunacode_cli-0.1.21.dist-info/WHEEL +4 -0
- tunacode_cli-0.1.21.dist-info/entry_points.txt +2 -0
- tunacode_cli-0.1.21.dist-info/licenses/LICENSE +21 -0
tunacode/tools/grep.py
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parallel grep tool for TunaCode - Enhanced content search with parallel processing.
|
|
3
|
+
|
|
4
|
+
This tool provides sophisticated grep-like functionality with:
|
|
5
|
+
- Parallel file searching across multiple directories
|
|
6
|
+
- Multiple search strategies (literal, regex, fuzzy)
|
|
7
|
+
- Smart result ranking and deduplication
|
|
8
|
+
- Context-aware output formatting
|
|
9
|
+
- Timeout handling for overly broad patterns (3 second deadline for first match)
|
|
10
|
+
|
|
11
|
+
CLAUDE_ANCHOR[grep-module]: Fast parallel file search with 3-second deadline
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import re
|
|
16
|
+
import time
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from tunacode.configuration.defaults import DEFAULT_USER_CONFIG
|
|
22
|
+
from tunacode.exceptions import TooBroadPatternError, ToolExecutionError
|
|
23
|
+
from tunacode.tools.decorators import base_tool
|
|
24
|
+
from tunacode.tools.grep_components import (
|
|
25
|
+
FileFilter,
|
|
26
|
+
PatternMatcher,
|
|
27
|
+
SearchConfig,
|
|
28
|
+
SearchResult,
|
|
29
|
+
)
|
|
30
|
+
from tunacode.tools.grep_components.result_formatter import ResultFormatter
|
|
31
|
+
from tunacode.tools.utils.ripgrep import RipgrepExecutor
|
|
32
|
+
from tunacode.tools.utils.ripgrep import metrics as ripgrep_metrics
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ParallelGrep:
|
|
36
|
+
"""Advanced parallel grep tool with multiple search strategies."""
|
|
37
|
+
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self._executor = ThreadPoolExecutor(max_workers=8)
|
|
40
|
+
self._file_filter = FileFilter()
|
|
41
|
+
self._pattern_matcher = PatternMatcher()
|
|
42
|
+
self._result_formatter = ResultFormatter()
|
|
43
|
+
self._ripgrep_executor = RipgrepExecutor()
|
|
44
|
+
self._config = self._load_ripgrep_config()
|
|
45
|
+
|
|
46
|
+
def _load_ripgrep_config(self) -> dict[str, Any]:
|
|
47
|
+
"""Load ripgrep configuration from defaults."""
|
|
48
|
+
default_ripgrep_config = {
|
|
49
|
+
"timeout": 10,
|
|
50
|
+
"max_buffer_size": 1048576,
|
|
51
|
+
"max_results": 100,
|
|
52
|
+
"enable_metrics": False,
|
|
53
|
+
"debug": False,
|
|
54
|
+
}
|
|
55
|
+
settings = DEFAULT_USER_CONFIG.get("settings", {})
|
|
56
|
+
if not isinstance(settings, dict):
|
|
57
|
+
return default_ripgrep_config
|
|
58
|
+
|
|
59
|
+
config = settings.get("ripgrep", default_ripgrep_config)
|
|
60
|
+
if not isinstance(config, dict):
|
|
61
|
+
return default_ripgrep_config
|
|
62
|
+
|
|
63
|
+
merged = default_ripgrep_config | config
|
|
64
|
+
return merged
|
|
65
|
+
|
|
66
|
+
async def execute(
|
|
67
|
+
self,
|
|
68
|
+
pattern: str,
|
|
69
|
+
directory: str = ".",
|
|
70
|
+
case_sensitive: bool = False,
|
|
71
|
+
use_regex: bool = False,
|
|
72
|
+
include_files: str | None = None,
|
|
73
|
+
exclude_files: str | None = None,
|
|
74
|
+
max_results: int = 50,
|
|
75
|
+
context_lines: int = 2,
|
|
76
|
+
search_type: str = "smart", # smart, ripgrep, python, hybrid
|
|
77
|
+
return_format: str = "string", # "string" (default) or "list" (legacy)
|
|
78
|
+
output_mode: str = "content", # content, files_with_matches, count, json
|
|
79
|
+
) -> str | list[str]:
|
|
80
|
+
"""
|
|
81
|
+
Execute parallel grep search with fast-glob prefiltering and multiple strategies.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
pattern: Search pattern (literal text or regex)
|
|
85
|
+
directory: Directory to search (default: current)
|
|
86
|
+
case_sensitive: Whether search is case sensitive
|
|
87
|
+
use_regex: Whether pattern is a regular expression
|
|
88
|
+
include_files: File patterns to include (e.g., "*.py", "*.{js,ts}")
|
|
89
|
+
exclude_files: File patterns to exclude (e.g., "*.pyc", "node_modules/*")
|
|
90
|
+
max_results: Maximum number of results to return
|
|
91
|
+
context_lines: Number of context lines before/after matches
|
|
92
|
+
search_type: Search strategy to use
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Formatted search results
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
# 1️⃣ Fast-glob prefilter to find candidate files
|
|
99
|
+
include_pattern = include_files or "*"
|
|
100
|
+
exclude_pattern = exclude_files
|
|
101
|
+
|
|
102
|
+
candidates = await asyncio.get_event_loop().run_in_executor(
|
|
103
|
+
self._executor,
|
|
104
|
+
self._file_filter.fast_glob,
|
|
105
|
+
Path(directory),
|
|
106
|
+
include_pattern,
|
|
107
|
+
exclude_pattern,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if not candidates:
|
|
111
|
+
if return_format == "list":
|
|
112
|
+
return []
|
|
113
|
+
return f"No files found matching pattern: {include_pattern}"
|
|
114
|
+
|
|
115
|
+
# 2️⃣ Smart strategy selection based on candidate count
|
|
116
|
+
original_search_type = search_type
|
|
117
|
+
if search_type == "smart":
|
|
118
|
+
if len(candidates) <= 50:
|
|
119
|
+
# Small set - Python strategy more efficient (low startup cost)
|
|
120
|
+
search_type = "python"
|
|
121
|
+
elif len(candidates) <= 1000:
|
|
122
|
+
# Medium set - Ripgrep optimal for this range
|
|
123
|
+
search_type = "ripgrep"
|
|
124
|
+
else:
|
|
125
|
+
# Large set - Hybrid for best coverage and redundancy
|
|
126
|
+
search_type = "hybrid"
|
|
127
|
+
|
|
128
|
+
# 3️⃣ Create search configuration
|
|
129
|
+
# Note: include_patterns/exclude_patterns now only used for legacy compatibility
|
|
130
|
+
include_patterns = (
|
|
131
|
+
self._file_filter.parse_patterns(include_files) if include_files else ["*"]
|
|
132
|
+
)
|
|
133
|
+
exclude_patterns = (
|
|
134
|
+
self._file_filter.parse_patterns(exclude_files) if exclude_files else []
|
|
135
|
+
)
|
|
136
|
+
config = SearchConfig(
|
|
137
|
+
case_sensitive=case_sensitive,
|
|
138
|
+
use_regex=use_regex,
|
|
139
|
+
max_results=max_results,
|
|
140
|
+
context_lines=context_lines,
|
|
141
|
+
include_patterns=include_patterns,
|
|
142
|
+
exclude_patterns=exclude_patterns,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# 4️⃣ Execute chosen strategy with pre-filtered candidates
|
|
146
|
+
# Execute search with pre-filtered candidates
|
|
147
|
+
if search_type == "ripgrep":
|
|
148
|
+
# Try ripgrep first for performance. If ripgrep is unavailable or
|
|
149
|
+
# returns no results (e.g., binary missing), gracefully fallback to
|
|
150
|
+
# the Python implementation so the tool still returns matches.
|
|
151
|
+
results = await self._ripgrep_search_filtered(pattern, candidates, config)
|
|
152
|
+
if not results:
|
|
153
|
+
# Fallback to python search when ripgrep produced no output
|
|
154
|
+
results = await self._python_search_filtered(pattern, candidates, config)
|
|
155
|
+
elif search_type == "python":
|
|
156
|
+
results = await self._python_search_filtered(pattern, candidates, config)
|
|
157
|
+
elif search_type == "hybrid":
|
|
158
|
+
results = await self._hybrid_search_filtered(pattern, candidates, config)
|
|
159
|
+
else:
|
|
160
|
+
raise ToolExecutionError(
|
|
161
|
+
tool_name="grep", message=f"Unknown search type: {search_type}"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# 5️⃣ Format and return results with strategy info
|
|
165
|
+
strategy_info = (
|
|
166
|
+
f"Strategy: {search_type} (was {original_search_type}), "
|
|
167
|
+
f"Files: {len(candidates)}/{5000}"
|
|
168
|
+
)
|
|
169
|
+
formatted_results = self._result_formatter.format_results(
|
|
170
|
+
results, pattern, config, output_mode=output_mode
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if return_format == "list":
|
|
174
|
+
# Legacy: return list of file paths with at least one match
|
|
175
|
+
file_set = set()
|
|
176
|
+
for r in results:
|
|
177
|
+
file_set.add(r.file_path)
|
|
178
|
+
return list(file_set)
|
|
179
|
+
else:
|
|
180
|
+
# Add strategy info to results
|
|
181
|
+
if formatted_results.startswith("Found"):
|
|
182
|
+
lines = formatted_results.split("\n")
|
|
183
|
+
lines[1] = (
|
|
184
|
+
f"Strategy: {search_type} | Candidates: {len(candidates)} files | "
|
|
185
|
+
+ lines[1]
|
|
186
|
+
)
|
|
187
|
+
return "\n".join(lines)
|
|
188
|
+
else:
|
|
189
|
+
return f"{formatted_results}\n\n{strategy_info}"
|
|
190
|
+
|
|
191
|
+
except TooBroadPatternError:
|
|
192
|
+
# Re-raise TooBroadPatternError without wrapping it
|
|
193
|
+
raise
|
|
194
|
+
except Exception as err:
|
|
195
|
+
raise ToolExecutionError(
|
|
196
|
+
tool_name="grep", message=f"Grep search failed: {err}"
|
|
197
|
+
) from err
|
|
198
|
+
|
|
199
|
+
# ====== SEARCH METHODS ======
|
|
200
|
+
|
|
201
|
+
async def _ripgrep_search_filtered(
|
|
202
|
+
self, pattern: str, candidates: list[Path], config: SearchConfig
|
|
203
|
+
) -> list[SearchResult]:
|
|
204
|
+
"""
|
|
205
|
+
Run ripgrep on pre-filtered file list using the enhanced RipgrepExecutor.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def run_enhanced_ripgrep():
|
|
209
|
+
"""Execute ripgrep search using the new executor."""
|
|
210
|
+
start_time = time.time()
|
|
211
|
+
results = []
|
|
212
|
+
|
|
213
|
+
# Configure timeout from settings
|
|
214
|
+
timeout = min(self._config.get("timeout", 10), config.timeout_seconds)
|
|
215
|
+
|
|
216
|
+
# If ripgrep executor is using fallback, skip this method entirely
|
|
217
|
+
if self._ripgrep_executor._use_python_fallback:
|
|
218
|
+
# Return empty to trigger Python fallback in the calling function
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
# Use the enhanced executor with support for context lines
|
|
223
|
+
# Note: Currently searching all files, not using candidates
|
|
224
|
+
# This is a limitation that should be addressed in future enhancement
|
|
225
|
+
search_results = self._ripgrep_executor.search(
|
|
226
|
+
pattern=pattern,
|
|
227
|
+
path=".", # Search in current directory
|
|
228
|
+
timeout=timeout,
|
|
229
|
+
max_matches=config.max_results,
|
|
230
|
+
case_insensitive=not config.case_sensitive,
|
|
231
|
+
context_before=config.context_lines,
|
|
232
|
+
context_after=config.context_lines,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Ripgrep doesn't provide timing info for first match, so we rely on
|
|
236
|
+
# the overall timeout mechanism instead of first_match_deadline
|
|
237
|
+
|
|
238
|
+
# Parse results
|
|
239
|
+
for result_line in search_results:
|
|
240
|
+
# Parse ripgrep output format "file:line:content"
|
|
241
|
+
parts = result_line.split(":", 2)
|
|
242
|
+
if len(parts) >= 3:
|
|
243
|
+
# Filter to only include results from candidates
|
|
244
|
+
file_path = Path(parts[0])
|
|
245
|
+
if file_path not in candidates:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
search_result = SearchResult(
|
|
250
|
+
file_path=parts[0],
|
|
251
|
+
line_number=int(parts[1]),
|
|
252
|
+
line_content=parts[2] if len(parts) > 2 else "",
|
|
253
|
+
match_start=0,
|
|
254
|
+
match_end=len(parts[2]) if len(parts) > 2 else 0,
|
|
255
|
+
context_before=[],
|
|
256
|
+
context_after=[],
|
|
257
|
+
relevance_score=1.0,
|
|
258
|
+
)
|
|
259
|
+
results.append(search_result)
|
|
260
|
+
|
|
261
|
+
# Stop if we have enough results
|
|
262
|
+
if config.max_results and len(results) >= config.max_results:
|
|
263
|
+
break
|
|
264
|
+
except (ValueError, IndexError):
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
except TooBroadPatternError:
|
|
268
|
+
raise
|
|
269
|
+
except Exception:
|
|
270
|
+
# Return empty to trigger fallback
|
|
271
|
+
return []
|
|
272
|
+
|
|
273
|
+
# Record metrics if enabled
|
|
274
|
+
if self._config.get("enable_metrics", False):
|
|
275
|
+
total_time = time.time() - start_time
|
|
276
|
+
ripgrep_metrics.record_search(
|
|
277
|
+
duration=total_time, used_fallback=self._ripgrep_executor._use_python_fallback
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return results
|
|
281
|
+
|
|
282
|
+
# Run the enhanced ripgrep search
|
|
283
|
+
try:
|
|
284
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
285
|
+
self._executor, run_enhanced_ripgrep
|
|
286
|
+
)
|
|
287
|
+
except TooBroadPatternError:
|
|
288
|
+
raise
|
|
289
|
+
|
|
290
|
+
async def _python_search_filtered(
|
|
291
|
+
self, pattern: str, candidates: list[Path], config: SearchConfig
|
|
292
|
+
) -> list[SearchResult]:
|
|
293
|
+
"""
|
|
294
|
+
Run Python parallel search on pre-filtered candidates with first match deadline.
|
|
295
|
+
"""
|
|
296
|
+
# Prepare search pattern
|
|
297
|
+
if config.use_regex:
|
|
298
|
+
flags = 0 if config.case_sensitive else re.IGNORECASE
|
|
299
|
+
regex_pattern = re.compile(pattern, flags)
|
|
300
|
+
else:
|
|
301
|
+
regex_pattern = None
|
|
302
|
+
|
|
303
|
+
# Track search progress
|
|
304
|
+
first_match_event = asyncio.Event()
|
|
305
|
+
|
|
306
|
+
async def search_with_monitoring(file_path: Path):
|
|
307
|
+
"""Search a file and signal when first match is found."""
|
|
308
|
+
try:
|
|
309
|
+
file_results = await self._search_file(file_path, pattern, regex_pattern, config)
|
|
310
|
+
if file_results and not first_match_event.is_set():
|
|
311
|
+
first_match_event.set()
|
|
312
|
+
return file_results
|
|
313
|
+
except Exception:
|
|
314
|
+
return []
|
|
315
|
+
|
|
316
|
+
# Create search tasks for candidates only
|
|
317
|
+
search_tasks = []
|
|
318
|
+
for file_path in candidates:
|
|
319
|
+
task = search_with_monitoring(file_path)
|
|
320
|
+
search_tasks.append(task)
|
|
321
|
+
|
|
322
|
+
# Create a deadline task
|
|
323
|
+
async def check_deadline():
|
|
324
|
+
"""Monitor for first match deadline."""
|
|
325
|
+
await asyncio.sleep(config.first_match_deadline)
|
|
326
|
+
if not first_match_event.is_set():
|
|
327
|
+
# Cancel all pending tasks
|
|
328
|
+
for task in search_tasks:
|
|
329
|
+
if not task.done():
|
|
330
|
+
task.cancel()
|
|
331
|
+
raise TooBroadPatternError(pattern, config.first_match_deadline)
|
|
332
|
+
|
|
333
|
+
deadline_task = asyncio.create_task(check_deadline())
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
# Execute searches in parallel with deadline monitoring
|
|
337
|
+
all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
338
|
+
|
|
339
|
+
# Cancel deadline task if we got results
|
|
340
|
+
deadline_task.cancel()
|
|
341
|
+
|
|
342
|
+
# Flatten results and filter out exceptions
|
|
343
|
+
results = []
|
|
344
|
+
for file_results in all_results:
|
|
345
|
+
if isinstance(file_results, list):
|
|
346
|
+
results.extend(file_results)
|
|
347
|
+
|
|
348
|
+
# Sort by relevance and limit results
|
|
349
|
+
results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
350
|
+
return results[: config.max_results]
|
|
351
|
+
|
|
352
|
+
except asyncio.CancelledError:
|
|
353
|
+
# Re-raise TooBroadPatternError if that's what caused the cancellation
|
|
354
|
+
if deadline_task.done():
|
|
355
|
+
try:
|
|
356
|
+
await deadline_task
|
|
357
|
+
except TooBroadPatternError:
|
|
358
|
+
raise
|
|
359
|
+
return []
|
|
360
|
+
|
|
361
|
+
async def _hybrid_search_filtered(
|
|
362
|
+
self, pattern: str, candidates: list[Path], config: SearchConfig
|
|
363
|
+
) -> list[SearchResult]:
|
|
364
|
+
"""
|
|
365
|
+
Hybrid approach using multiple search methods concurrently on pre-filtered candidates.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
# Run multiple search strategies in parallel
|
|
369
|
+
tasks = [
|
|
370
|
+
self._ripgrep_search_filtered(pattern, candidates, config),
|
|
371
|
+
self._python_search_filtered(pattern, candidates, config),
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
375
|
+
|
|
376
|
+
# Check if any task raised TooBroadPatternError
|
|
377
|
+
too_broad_errors = [r for r in results_list if isinstance(r, TooBroadPatternError)]
|
|
378
|
+
if too_broad_errors:
|
|
379
|
+
# If both strategies timed out, raise the error
|
|
380
|
+
valid_results = [r for r in results_list if isinstance(r, list)]
|
|
381
|
+
if not valid_results:
|
|
382
|
+
raise too_broad_errors[0]
|
|
383
|
+
|
|
384
|
+
# Merge and deduplicate results
|
|
385
|
+
all_results = []
|
|
386
|
+
for results in results_list:
|
|
387
|
+
if isinstance(results, list):
|
|
388
|
+
all_results.extend(results)
|
|
389
|
+
|
|
390
|
+
# Deduplicate by file path and line number
|
|
391
|
+
seen = set()
|
|
392
|
+
unique_results = []
|
|
393
|
+
for result in all_results:
|
|
394
|
+
key = (result.file_path, result.line_number)
|
|
395
|
+
if key not in seen:
|
|
396
|
+
seen.add(key)
|
|
397
|
+
unique_results.append(result)
|
|
398
|
+
|
|
399
|
+
# Sort and limit
|
|
400
|
+
unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
|
|
401
|
+
return unique_results[: config.max_results]
|
|
402
|
+
|
|
403
|
+
async def _search_file(
|
|
404
|
+
self,
|
|
405
|
+
file_path: Path,
|
|
406
|
+
pattern: str,
|
|
407
|
+
regex_pattern: re.Pattern | None,
|
|
408
|
+
config: SearchConfig,
|
|
409
|
+
) -> list[SearchResult]:
|
|
410
|
+
"""Search a single file for the pattern."""
|
|
411
|
+
|
|
412
|
+
def search_file_sync():
|
|
413
|
+
return self._pattern_matcher.search_file(file_path, pattern, regex_pattern, config)
|
|
414
|
+
|
|
415
|
+
return await asyncio.get_event_loop().run_in_executor(self._executor, search_file_sync)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@base_tool
|
|
419
|
+
async def grep(
|
|
420
|
+
pattern: str,
|
|
421
|
+
directory: str = ".",
|
|
422
|
+
path: str | None = None,
|
|
423
|
+
case_sensitive: bool = False,
|
|
424
|
+
use_regex: bool = False,
|
|
425
|
+
include_files: str | None = None,
|
|
426
|
+
exclude_files: str | None = None,
|
|
427
|
+
max_results: int = 50,
|
|
428
|
+
context_lines: int = 2,
|
|
429
|
+
search_type: str = "smart",
|
|
430
|
+
return_format: str = "string",
|
|
431
|
+
output_mode: str = "content",
|
|
432
|
+
) -> str | list[str]:
|
|
433
|
+
"""Advanced parallel grep search with multiple strategies.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
pattern: Search pattern (literal text or regex).
|
|
437
|
+
directory: Directory to search (default: current directory).
|
|
438
|
+
path: Alias for directory.
|
|
439
|
+
case_sensitive: Whether search is case sensitive.
|
|
440
|
+
use_regex: Whether pattern is a regular expression.
|
|
441
|
+
include_files: File patterns to include (e.g., "*.py,*.js").
|
|
442
|
+
exclude_files: File patterns to exclude.
|
|
443
|
+
max_results: Maximum number of results to return.
|
|
444
|
+
context_lines: Number of context lines before/after matches.
|
|
445
|
+
search_type: Search strategy (smart/ripgrep/python/hybrid).
|
|
446
|
+
return_format: Output format (string or list).
|
|
447
|
+
output_mode: Output mode (content, files_with_matches, count, json).
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Formatted search results with file paths, line numbers, and context.
|
|
451
|
+
"""
|
|
452
|
+
if path is not None:
|
|
453
|
+
directory = path
|
|
454
|
+
|
|
455
|
+
tool = ParallelGrep()
|
|
456
|
+
return await tool.execute(
|
|
457
|
+
pattern=pattern,
|
|
458
|
+
directory=directory,
|
|
459
|
+
case_sensitive=case_sensitive,
|
|
460
|
+
use_regex=use_regex,
|
|
461
|
+
include_files=include_files,
|
|
462
|
+
exclude_files=exclude_files,
|
|
463
|
+
max_results=max_results,
|
|
464
|
+
context_lines=context_lines,
|
|
465
|
+
search_type=search_type,
|
|
466
|
+
return_format=return_format,
|
|
467
|
+
output_mode=output_mode,
|
|
468
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Grep tool components for modular organization.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .file_filter import FileFilter
|
|
6
|
+
from .pattern_matcher import PatternMatcher
|
|
7
|
+
from .search_result import SearchConfig, SearchResult
|
|
8
|
+
|
|
9
|
+
__all__ = ["PatternMatcher", "FileFilter", "SearchResult", "SearchConfig"]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File filtering functionality for the grep tool.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Fast-Glob Prefilter Configuration
|
|
11
|
+
MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
|
|
12
|
+
GLOB_BATCH = 500 # Streaming batch size
|
|
13
|
+
EXCLUDE_DIRS = { # Common directories to skip
|
|
14
|
+
"node_modules",
|
|
15
|
+
".git",
|
|
16
|
+
"__pycache__",
|
|
17
|
+
".venv",
|
|
18
|
+
"venv",
|
|
19
|
+
"dist",
|
|
20
|
+
"build",
|
|
21
|
+
".pytest_cache",
|
|
22
|
+
".mypy_cache",
|
|
23
|
+
".tox",
|
|
24
|
+
"target",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FileFilter:
|
|
29
|
+
"""Handles file filtering and globbing for the grep tool."""
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def fast_glob(root: Path, include: str, exclude: str | None = None) -> list[Path]:
|
|
33
|
+
"""
|
|
34
|
+
Lightning-fast filename filtering using os.scandir.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
root: Directory to search
|
|
38
|
+
include: Include pattern (e.g., "*.py", "*.{js,ts}")
|
|
39
|
+
exclude: Exclude pattern (optional)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of matching file paths (bounded by MAX_GLOB)
|
|
43
|
+
"""
|
|
44
|
+
matches: list[Path] = []
|
|
45
|
+
stack = [root]
|
|
46
|
+
|
|
47
|
+
# Handle multiple extensions in include pattern like "*.{py,js,ts}"
|
|
48
|
+
if "{" in include and "}" in include:
|
|
49
|
+
# Convert *.{py,js,ts} to multiple patterns
|
|
50
|
+
base, ext_part = include.split("{", 1)
|
|
51
|
+
ext_part = ext_part.split("}", 1)[0]
|
|
52
|
+
extensions = ext_part.split(",")
|
|
53
|
+
include_patterns = [base + ext.strip() for ext in extensions]
|
|
54
|
+
include_regexes = [
|
|
55
|
+
re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns
|
|
56
|
+
]
|
|
57
|
+
else:
|
|
58
|
+
include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
|
|
59
|
+
|
|
60
|
+
exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
|
|
61
|
+
|
|
62
|
+
while stack and len(matches) < MAX_GLOB:
|
|
63
|
+
current_dir = stack.pop()
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
with os.scandir(current_dir) as entries:
|
|
67
|
+
for entry in entries:
|
|
68
|
+
# Skip common irrelevant directories
|
|
69
|
+
if entry.is_dir(follow_symlinks=False):
|
|
70
|
+
if entry.name not in EXCLUDE_DIRS:
|
|
71
|
+
stack.append(Path(entry.path))
|
|
72
|
+
|
|
73
|
+
# Check file matches
|
|
74
|
+
elif entry.is_file(follow_symlinks=False):
|
|
75
|
+
# Check against any include pattern
|
|
76
|
+
matches_include = any(
|
|
77
|
+
regex.match(entry.name) for regex in include_regexes
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if matches_include and (
|
|
81
|
+
not exclude_rx or not exclude_rx.match(entry.name)
|
|
82
|
+
):
|
|
83
|
+
matches.append(Path(entry.path))
|
|
84
|
+
|
|
85
|
+
except (PermissionError, OSError):
|
|
86
|
+
continue # Skip inaccessible directories
|
|
87
|
+
|
|
88
|
+
return matches[:MAX_GLOB]
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def parse_patterns(patterns: str) -> list[str]:
|
|
92
|
+
"""Parse comma-separated file patterns."""
|
|
93
|
+
return [p.strip() for p in patterns.split(",") if p.strip()]
|