tunacode-cli 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tunacode-cli might be problematic. Click here for more details.

Files changed (174) hide show
  1. tunacode/__init__.py +0 -0
  2. tunacode/cli/textual_repl.tcss +283 -0
  3. tunacode/configuration/__init__.py +1 -0
  4. tunacode/configuration/defaults.py +45 -0
  5. tunacode/configuration/models.py +147 -0
  6. tunacode/configuration/models_registry.json +1 -0
  7. tunacode/configuration/pricing.py +74 -0
  8. tunacode/configuration/settings.py +35 -0
  9. tunacode/constants.py +227 -0
  10. tunacode/core/__init__.py +6 -0
  11. tunacode/core/agents/__init__.py +39 -0
  12. tunacode/core/agents/agent_components/__init__.py +48 -0
  13. tunacode/core/agents/agent_components/agent_config.py +441 -0
  14. tunacode/core/agents/agent_components/agent_helpers.py +290 -0
  15. tunacode/core/agents/agent_components/message_handler.py +99 -0
  16. tunacode/core/agents/agent_components/node_processor.py +477 -0
  17. tunacode/core/agents/agent_components/response_state.py +129 -0
  18. tunacode/core/agents/agent_components/result_wrapper.py +51 -0
  19. tunacode/core/agents/agent_components/state_transition.py +112 -0
  20. tunacode/core/agents/agent_components/streaming.py +271 -0
  21. tunacode/core/agents/agent_components/task_completion.py +40 -0
  22. tunacode/core/agents/agent_components/tool_buffer.py +44 -0
  23. tunacode/core/agents/agent_components/tool_executor.py +101 -0
  24. tunacode/core/agents/agent_components/truncation_checker.py +37 -0
  25. tunacode/core/agents/delegation_tools.py +109 -0
  26. tunacode/core/agents/main.py +545 -0
  27. tunacode/core/agents/prompts.py +66 -0
  28. tunacode/core/agents/research_agent.py +231 -0
  29. tunacode/core/compaction.py +218 -0
  30. tunacode/core/prompting/__init__.py +27 -0
  31. tunacode/core/prompting/loader.py +66 -0
  32. tunacode/core/prompting/prompting_engine.py +98 -0
  33. tunacode/core/prompting/sections.py +50 -0
  34. tunacode/core/prompting/templates.py +69 -0
  35. tunacode/core/state.py +409 -0
  36. tunacode/exceptions.py +313 -0
  37. tunacode/indexing/__init__.py +5 -0
  38. tunacode/indexing/code_index.py +432 -0
  39. tunacode/indexing/constants.py +86 -0
  40. tunacode/lsp/__init__.py +112 -0
  41. tunacode/lsp/client.py +351 -0
  42. tunacode/lsp/diagnostics.py +19 -0
  43. tunacode/lsp/servers.py +101 -0
  44. tunacode/prompts/default_prompt.md +952 -0
  45. tunacode/prompts/research/sections/agent_role.xml +5 -0
  46. tunacode/prompts/research/sections/constraints.xml +14 -0
  47. tunacode/prompts/research/sections/output_format.xml +57 -0
  48. tunacode/prompts/research/sections/tool_use.xml +23 -0
  49. tunacode/prompts/sections/advanced_patterns.xml +255 -0
  50. tunacode/prompts/sections/agent_role.xml +8 -0
  51. tunacode/prompts/sections/completion.xml +10 -0
  52. tunacode/prompts/sections/critical_rules.xml +37 -0
  53. tunacode/prompts/sections/examples.xml +220 -0
  54. tunacode/prompts/sections/output_style.xml +94 -0
  55. tunacode/prompts/sections/parallel_exec.xml +105 -0
  56. tunacode/prompts/sections/search_pattern.xml +100 -0
  57. tunacode/prompts/sections/system_info.xml +6 -0
  58. tunacode/prompts/sections/tool_use.xml +84 -0
  59. tunacode/prompts/sections/user_instructions.xml +3 -0
  60. tunacode/py.typed +0 -0
  61. tunacode/templates/__init__.py +5 -0
  62. tunacode/templates/loader.py +15 -0
  63. tunacode/tools/__init__.py +10 -0
  64. tunacode/tools/authorization/__init__.py +29 -0
  65. tunacode/tools/authorization/context.py +32 -0
  66. tunacode/tools/authorization/factory.py +20 -0
  67. tunacode/tools/authorization/handler.py +58 -0
  68. tunacode/tools/authorization/notifier.py +35 -0
  69. tunacode/tools/authorization/policy.py +19 -0
  70. tunacode/tools/authorization/requests.py +119 -0
  71. tunacode/tools/authorization/rules.py +72 -0
  72. tunacode/tools/bash.py +222 -0
  73. tunacode/tools/decorators.py +213 -0
  74. tunacode/tools/glob.py +353 -0
  75. tunacode/tools/grep.py +468 -0
  76. tunacode/tools/grep_components/__init__.py +9 -0
  77. tunacode/tools/grep_components/file_filter.py +93 -0
  78. tunacode/tools/grep_components/pattern_matcher.py +158 -0
  79. tunacode/tools/grep_components/result_formatter.py +87 -0
  80. tunacode/tools/grep_components/search_result.py +34 -0
  81. tunacode/tools/list_dir.py +205 -0
  82. tunacode/tools/prompts/bash_prompt.xml +10 -0
  83. tunacode/tools/prompts/glob_prompt.xml +7 -0
  84. tunacode/tools/prompts/grep_prompt.xml +10 -0
  85. tunacode/tools/prompts/list_dir_prompt.xml +7 -0
  86. tunacode/tools/prompts/read_file_prompt.xml +9 -0
  87. tunacode/tools/prompts/todoclear_prompt.xml +12 -0
  88. tunacode/tools/prompts/todoread_prompt.xml +16 -0
  89. tunacode/tools/prompts/todowrite_prompt.xml +28 -0
  90. tunacode/tools/prompts/update_file_prompt.xml +9 -0
  91. tunacode/tools/prompts/web_fetch_prompt.xml +11 -0
  92. tunacode/tools/prompts/write_file_prompt.xml +7 -0
  93. tunacode/tools/react.py +111 -0
  94. tunacode/tools/read_file.py +68 -0
  95. tunacode/tools/todo.py +222 -0
  96. tunacode/tools/update_file.py +62 -0
  97. tunacode/tools/utils/__init__.py +1 -0
  98. tunacode/tools/utils/ripgrep.py +311 -0
  99. tunacode/tools/utils/text_match.py +352 -0
  100. tunacode/tools/web_fetch.py +245 -0
  101. tunacode/tools/write_file.py +34 -0
  102. tunacode/tools/xml_helper.py +34 -0
  103. tunacode/types/__init__.py +166 -0
  104. tunacode/types/base.py +94 -0
  105. tunacode/types/callbacks.py +53 -0
  106. tunacode/types/dataclasses.py +121 -0
  107. tunacode/types/pydantic_ai.py +31 -0
  108. tunacode/types/state.py +122 -0
  109. tunacode/ui/__init__.py +6 -0
  110. tunacode/ui/app.py +542 -0
  111. tunacode/ui/commands/__init__.py +430 -0
  112. tunacode/ui/components/__init__.py +1 -0
  113. tunacode/ui/headless/__init__.py +5 -0
  114. tunacode/ui/headless/output.py +72 -0
  115. tunacode/ui/main.py +252 -0
  116. tunacode/ui/renderers/__init__.py +41 -0
  117. tunacode/ui/renderers/errors.py +197 -0
  118. tunacode/ui/renderers/panels.py +550 -0
  119. tunacode/ui/renderers/search.py +314 -0
  120. tunacode/ui/renderers/tools/__init__.py +21 -0
  121. tunacode/ui/renderers/tools/bash.py +247 -0
  122. tunacode/ui/renderers/tools/diagnostics.py +186 -0
  123. tunacode/ui/renderers/tools/glob.py +226 -0
  124. tunacode/ui/renderers/tools/grep.py +228 -0
  125. tunacode/ui/renderers/tools/list_dir.py +198 -0
  126. tunacode/ui/renderers/tools/read_file.py +226 -0
  127. tunacode/ui/renderers/tools/research.py +294 -0
  128. tunacode/ui/renderers/tools/update_file.py +237 -0
  129. tunacode/ui/renderers/tools/web_fetch.py +182 -0
  130. tunacode/ui/repl_support.py +226 -0
  131. tunacode/ui/screens/__init__.py +16 -0
  132. tunacode/ui/screens/model_picker.py +303 -0
  133. tunacode/ui/screens/session_picker.py +181 -0
  134. tunacode/ui/screens/setup.py +218 -0
  135. tunacode/ui/screens/theme_picker.py +90 -0
  136. tunacode/ui/screens/update_confirm.py +69 -0
  137. tunacode/ui/shell_runner.py +129 -0
  138. tunacode/ui/styles/layout.tcss +98 -0
  139. tunacode/ui/styles/modals.tcss +38 -0
  140. tunacode/ui/styles/panels.tcss +81 -0
  141. tunacode/ui/styles/theme-nextstep.tcss +303 -0
  142. tunacode/ui/styles/widgets.tcss +33 -0
  143. tunacode/ui/styles.py +18 -0
  144. tunacode/ui/widgets/__init__.py +23 -0
  145. tunacode/ui/widgets/command_autocomplete.py +62 -0
  146. tunacode/ui/widgets/editor.py +402 -0
  147. tunacode/ui/widgets/file_autocomplete.py +47 -0
  148. tunacode/ui/widgets/messages.py +46 -0
  149. tunacode/ui/widgets/resource_bar.py +182 -0
  150. tunacode/ui/widgets/status_bar.py +98 -0
  151. tunacode/utils/__init__.py +0 -0
  152. tunacode/utils/config/__init__.py +13 -0
  153. tunacode/utils/config/user_configuration.py +91 -0
  154. tunacode/utils/messaging/__init__.py +10 -0
  155. tunacode/utils/messaging/message_utils.py +34 -0
  156. tunacode/utils/messaging/token_counter.py +77 -0
  157. tunacode/utils/parsing/__init__.py +13 -0
  158. tunacode/utils/parsing/command_parser.py +55 -0
  159. tunacode/utils/parsing/json_utils.py +188 -0
  160. tunacode/utils/parsing/retry.py +146 -0
  161. tunacode/utils/parsing/tool_parser.py +267 -0
  162. tunacode/utils/security/__init__.py +15 -0
  163. tunacode/utils/security/command.py +106 -0
  164. tunacode/utils/system/__init__.py +25 -0
  165. tunacode/utils/system/gitignore.py +155 -0
  166. tunacode/utils/system/paths.py +190 -0
  167. tunacode/utils/ui/__init__.py +9 -0
  168. tunacode/utils/ui/file_filter.py +135 -0
  169. tunacode/utils/ui/helpers.py +24 -0
  170. tunacode_cli-0.1.21.dist-info/METADATA +170 -0
  171. tunacode_cli-0.1.21.dist-info/RECORD +174 -0
  172. tunacode_cli-0.1.21.dist-info/WHEEL +4 -0
  173. tunacode_cli-0.1.21.dist-info/entry_points.txt +2 -0
  174. tunacode_cli-0.1.21.dist-info/licenses/LICENSE +21 -0
tunacode/tools/grep.py ADDED
@@ -0,0 +1,468 @@
1
+ """
2
+ Parallel grep tool for TunaCode - Enhanced content search with parallel processing.
3
+
4
+ This tool provides sophisticated grep-like functionality with:
5
+ - Parallel file searching across multiple directories
6
+ - Multiple search strategies (literal, regex, fuzzy)
7
+ - Smart result ranking and deduplication
8
+ - Context-aware output formatting
9
+ - Timeout handling for overly broad patterns (3 second deadline for first match)
10
+
11
+ CLAUDE_ANCHOR[grep-module]: Fast parallel file search with 3-second deadline
12
+ """
13
+
14
+ import asyncio
15
+ import re
16
+ import time
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from tunacode.configuration.defaults import DEFAULT_USER_CONFIG
22
+ from tunacode.exceptions import TooBroadPatternError, ToolExecutionError
23
+ from tunacode.tools.decorators import base_tool
24
+ from tunacode.tools.grep_components import (
25
+ FileFilter,
26
+ PatternMatcher,
27
+ SearchConfig,
28
+ SearchResult,
29
+ )
30
+ from tunacode.tools.grep_components.result_formatter import ResultFormatter
31
+ from tunacode.tools.utils.ripgrep import RipgrepExecutor
32
+ from tunacode.tools.utils.ripgrep import metrics as ripgrep_metrics
33
+
34
+
35
+ class ParallelGrep:
36
+ """Advanced parallel grep tool with multiple search strategies."""
37
+
38
+ def __init__(self):
39
+ self._executor = ThreadPoolExecutor(max_workers=8)
40
+ self._file_filter = FileFilter()
41
+ self._pattern_matcher = PatternMatcher()
42
+ self._result_formatter = ResultFormatter()
43
+ self._ripgrep_executor = RipgrepExecutor()
44
+ self._config = self._load_ripgrep_config()
45
+
46
+ def _load_ripgrep_config(self) -> dict[str, Any]:
47
+ """Load ripgrep configuration from defaults."""
48
+ default_ripgrep_config = {
49
+ "timeout": 10,
50
+ "max_buffer_size": 1048576,
51
+ "max_results": 100,
52
+ "enable_metrics": False,
53
+ "debug": False,
54
+ }
55
+ settings = DEFAULT_USER_CONFIG.get("settings", {})
56
+ if not isinstance(settings, dict):
57
+ return default_ripgrep_config
58
+
59
+ config = settings.get("ripgrep", default_ripgrep_config)
60
+ if not isinstance(config, dict):
61
+ return default_ripgrep_config
62
+
63
+ merged = default_ripgrep_config | config
64
+ return merged
65
+
66
+ async def execute(
67
+ self,
68
+ pattern: str,
69
+ directory: str = ".",
70
+ case_sensitive: bool = False,
71
+ use_regex: bool = False,
72
+ include_files: str | None = None,
73
+ exclude_files: str | None = None,
74
+ max_results: int = 50,
75
+ context_lines: int = 2,
76
+ search_type: str = "smart", # smart, ripgrep, python, hybrid
77
+ return_format: str = "string", # "string" (default) or "list" (legacy)
78
+ output_mode: str = "content", # content, files_with_matches, count, json
79
+ ) -> str | list[str]:
80
+ """
81
+ Execute parallel grep search with fast-glob prefiltering and multiple strategies.
82
+
83
+ Args:
84
+ pattern: Search pattern (literal text or regex)
85
+ directory: Directory to search (default: current)
86
+ case_sensitive: Whether search is case sensitive
87
+ use_regex: Whether pattern is a regular expression
88
+ include_files: File patterns to include (e.g., "*.py", "*.{js,ts}")
89
+ exclude_files: File patterns to exclude (e.g., "*.pyc", "node_modules/*")
90
+ max_results: Maximum number of results to return
91
+ context_lines: Number of context lines before/after matches
92
+ search_type: Search strategy to use
93
+
94
+ Returns:
95
+ Formatted search results
96
+ """
97
+ try:
98
+ # 1️⃣ Fast-glob prefilter to find candidate files
99
+ include_pattern = include_files or "*"
100
+ exclude_pattern = exclude_files
101
+
102
+ candidates = await asyncio.get_event_loop().run_in_executor(
103
+ self._executor,
104
+ self._file_filter.fast_glob,
105
+ Path(directory),
106
+ include_pattern,
107
+ exclude_pattern,
108
+ )
109
+
110
+ if not candidates:
111
+ if return_format == "list":
112
+ return []
113
+ return f"No files found matching pattern: {include_pattern}"
114
+
115
+ # 2️⃣ Smart strategy selection based on candidate count
116
+ original_search_type = search_type
117
+ if search_type == "smart":
118
+ if len(candidates) <= 50:
119
+ # Small set - Python strategy more efficient (low startup cost)
120
+ search_type = "python"
121
+ elif len(candidates) <= 1000:
122
+ # Medium set - Ripgrep optimal for this range
123
+ search_type = "ripgrep"
124
+ else:
125
+ # Large set - Hybrid for best coverage and redundancy
126
+ search_type = "hybrid"
127
+
128
+ # 3️⃣ Create search configuration
129
+ # Note: include_patterns/exclude_patterns now only used for legacy compatibility
130
+ include_patterns = (
131
+ self._file_filter.parse_patterns(include_files) if include_files else ["*"]
132
+ )
133
+ exclude_patterns = (
134
+ self._file_filter.parse_patterns(exclude_files) if exclude_files else []
135
+ )
136
+ config = SearchConfig(
137
+ case_sensitive=case_sensitive,
138
+ use_regex=use_regex,
139
+ max_results=max_results,
140
+ context_lines=context_lines,
141
+ include_patterns=include_patterns,
142
+ exclude_patterns=exclude_patterns,
143
+ )
144
+
145
+ # 4️⃣ Execute chosen strategy with pre-filtered candidates
146
+ # Execute search with pre-filtered candidates
147
+ if search_type == "ripgrep":
148
+ # Try ripgrep first for performance. If ripgrep is unavailable or
149
+ # returns no results (e.g., binary missing), gracefully fallback to
150
+ # the Python implementation so the tool still returns matches.
151
+ results = await self._ripgrep_search_filtered(pattern, candidates, config)
152
+ if not results:
153
+ # Fallback to python search when ripgrep produced no output
154
+ results = await self._python_search_filtered(pattern, candidates, config)
155
+ elif search_type == "python":
156
+ results = await self._python_search_filtered(pattern, candidates, config)
157
+ elif search_type == "hybrid":
158
+ results = await self._hybrid_search_filtered(pattern, candidates, config)
159
+ else:
160
+ raise ToolExecutionError(
161
+ tool_name="grep", message=f"Unknown search type: {search_type}"
162
+ )
163
+
164
+ # 5️⃣ Format and return results with strategy info
165
+ strategy_info = (
166
+ f"Strategy: {search_type} (was {original_search_type}), "
167
+ f"Files: {len(candidates)}/{5000}"
168
+ )
169
+ formatted_results = self._result_formatter.format_results(
170
+ results, pattern, config, output_mode=output_mode
171
+ )
172
+
173
+ if return_format == "list":
174
+ # Legacy: return list of file paths with at least one match
175
+ file_set = set()
176
+ for r in results:
177
+ file_set.add(r.file_path)
178
+ return list(file_set)
179
+ else:
180
+ # Add strategy info to results
181
+ if formatted_results.startswith("Found"):
182
+ lines = formatted_results.split("\n")
183
+ lines[1] = (
184
+ f"Strategy: {search_type} | Candidates: {len(candidates)} files | "
185
+ + lines[1]
186
+ )
187
+ return "\n".join(lines)
188
+ else:
189
+ return f"{formatted_results}\n\n{strategy_info}"
190
+
191
+ except TooBroadPatternError:
192
+ # Re-raise TooBroadPatternError without wrapping it
193
+ raise
194
+ except Exception as err:
195
+ raise ToolExecutionError(
196
+ tool_name="grep", message=f"Grep search failed: {err}"
197
+ ) from err
198
+
199
+ # ====== SEARCH METHODS ======
200
+
201
+ async def _ripgrep_search_filtered(
202
+ self, pattern: str, candidates: list[Path], config: SearchConfig
203
+ ) -> list[SearchResult]:
204
+ """
205
+ Run ripgrep on pre-filtered file list using the enhanced RipgrepExecutor.
206
+ """
207
+
208
+ def run_enhanced_ripgrep():
209
+ """Execute ripgrep search using the new executor."""
210
+ start_time = time.time()
211
+ results = []
212
+
213
+ # Configure timeout from settings
214
+ timeout = min(self._config.get("timeout", 10), config.timeout_seconds)
215
+
216
+ # If ripgrep executor is using fallback, skip this method entirely
217
+ if self._ripgrep_executor._use_python_fallback:
218
+ # Return empty to trigger Python fallback in the calling function
219
+ return []
220
+
221
+ try:
222
+ # Use the enhanced executor with support for context lines
223
+ # Note: Currently searching all files, not using candidates
224
+ # This is a limitation that should be addressed in future enhancement
225
+ search_results = self._ripgrep_executor.search(
226
+ pattern=pattern,
227
+ path=".", # Search in current directory
228
+ timeout=timeout,
229
+ max_matches=config.max_results,
230
+ case_insensitive=not config.case_sensitive,
231
+ context_before=config.context_lines,
232
+ context_after=config.context_lines,
233
+ )
234
+
235
+ # Ripgrep doesn't provide timing info for first match, so we rely on
236
+ # the overall timeout mechanism instead of first_match_deadline
237
+
238
+ # Parse results
239
+ for result_line in search_results:
240
+ # Parse ripgrep output format "file:line:content"
241
+ parts = result_line.split(":", 2)
242
+ if len(parts) >= 3:
243
+ # Filter to only include results from candidates
244
+ file_path = Path(parts[0])
245
+ if file_path not in candidates:
246
+ continue
247
+
248
+ try:
249
+ search_result = SearchResult(
250
+ file_path=parts[0],
251
+ line_number=int(parts[1]),
252
+ line_content=parts[2] if len(parts) > 2 else "",
253
+ match_start=0,
254
+ match_end=len(parts[2]) if len(parts) > 2 else 0,
255
+ context_before=[],
256
+ context_after=[],
257
+ relevance_score=1.0,
258
+ )
259
+ results.append(search_result)
260
+
261
+ # Stop if we have enough results
262
+ if config.max_results and len(results) >= config.max_results:
263
+ break
264
+ except (ValueError, IndexError):
265
+ continue
266
+
267
+ except TooBroadPatternError:
268
+ raise
269
+ except Exception:
270
+ # Return empty to trigger fallback
271
+ return []
272
+
273
+ # Record metrics if enabled
274
+ if self._config.get("enable_metrics", False):
275
+ total_time = time.time() - start_time
276
+ ripgrep_metrics.record_search(
277
+ duration=total_time, used_fallback=self._ripgrep_executor._use_python_fallback
278
+ )
279
+
280
+ return results
281
+
282
+ # Run the enhanced ripgrep search
283
+ try:
284
+ return await asyncio.get_event_loop().run_in_executor(
285
+ self._executor, run_enhanced_ripgrep
286
+ )
287
+ except TooBroadPatternError:
288
+ raise
289
+
290
+ async def _python_search_filtered(
291
+ self, pattern: str, candidates: list[Path], config: SearchConfig
292
+ ) -> list[SearchResult]:
293
+ """
294
+ Run Python parallel search on pre-filtered candidates with first match deadline.
295
+ """
296
+ # Prepare search pattern
297
+ if config.use_regex:
298
+ flags = 0 if config.case_sensitive else re.IGNORECASE
299
+ regex_pattern = re.compile(pattern, flags)
300
+ else:
301
+ regex_pattern = None
302
+
303
+ # Track search progress
304
+ first_match_event = asyncio.Event()
305
+
306
+ async def search_with_monitoring(file_path: Path):
307
+ """Search a file and signal when first match is found."""
308
+ try:
309
+ file_results = await self._search_file(file_path, pattern, regex_pattern, config)
310
+ if file_results and not first_match_event.is_set():
311
+ first_match_event.set()
312
+ return file_results
313
+ except Exception:
314
+ return []
315
+
316
+ # Create search tasks for candidates only
317
+ search_tasks = []
318
+ for file_path in candidates:
319
+ task = search_with_monitoring(file_path)
320
+ search_tasks.append(task)
321
+
322
+ # Create a deadline task
323
+ async def check_deadline():
324
+ """Monitor for first match deadline."""
325
+ await asyncio.sleep(config.first_match_deadline)
326
+ if not first_match_event.is_set():
327
+ # Cancel all pending tasks
328
+ for task in search_tasks:
329
+ if not task.done():
330
+ task.cancel()
331
+ raise TooBroadPatternError(pattern, config.first_match_deadline)
332
+
333
+ deadline_task = asyncio.create_task(check_deadline())
334
+
335
+ try:
336
+ # Execute searches in parallel with deadline monitoring
337
+ all_results = await asyncio.gather(*search_tasks, return_exceptions=True)
338
+
339
+ # Cancel deadline task if we got results
340
+ deadline_task.cancel()
341
+
342
+ # Flatten results and filter out exceptions
343
+ results = []
344
+ for file_results in all_results:
345
+ if isinstance(file_results, list):
346
+ results.extend(file_results)
347
+
348
+ # Sort by relevance and limit results
349
+ results.sort(key=lambda r: r.relevance_score, reverse=True)
350
+ return results[: config.max_results]
351
+
352
+ except asyncio.CancelledError:
353
+ # Re-raise TooBroadPatternError if that's what caused the cancellation
354
+ if deadline_task.done():
355
+ try:
356
+ await deadline_task
357
+ except TooBroadPatternError:
358
+ raise
359
+ return []
360
+
361
+ async def _hybrid_search_filtered(
362
+ self, pattern: str, candidates: list[Path], config: SearchConfig
363
+ ) -> list[SearchResult]:
364
+ """
365
+ Hybrid approach using multiple search methods concurrently on pre-filtered candidates.
366
+ """
367
+
368
+ # Run multiple search strategies in parallel
369
+ tasks = [
370
+ self._ripgrep_search_filtered(pattern, candidates, config),
371
+ self._python_search_filtered(pattern, candidates, config),
372
+ ]
373
+
374
+ results_list = await asyncio.gather(*tasks, return_exceptions=True)
375
+
376
+ # Check if any task raised TooBroadPatternError
377
+ too_broad_errors = [r for r in results_list if isinstance(r, TooBroadPatternError)]
378
+ if too_broad_errors:
379
+ # If both strategies timed out, raise the error
380
+ valid_results = [r for r in results_list if isinstance(r, list)]
381
+ if not valid_results:
382
+ raise too_broad_errors[0]
383
+
384
+ # Merge and deduplicate results
385
+ all_results = []
386
+ for results in results_list:
387
+ if isinstance(results, list):
388
+ all_results.extend(results)
389
+
390
+ # Deduplicate by file path and line number
391
+ seen = set()
392
+ unique_results = []
393
+ for result in all_results:
394
+ key = (result.file_path, result.line_number)
395
+ if key not in seen:
396
+ seen.add(key)
397
+ unique_results.append(result)
398
+
399
+ # Sort and limit
400
+ unique_results.sort(key=lambda r: r.relevance_score, reverse=True)
401
+ return unique_results[: config.max_results]
402
+
403
+ async def _search_file(
404
+ self,
405
+ file_path: Path,
406
+ pattern: str,
407
+ regex_pattern: re.Pattern | None,
408
+ config: SearchConfig,
409
+ ) -> list[SearchResult]:
410
+ """Search a single file for the pattern."""
411
+
412
+ def search_file_sync():
413
+ return self._pattern_matcher.search_file(file_path, pattern, regex_pattern, config)
414
+
415
+ return await asyncio.get_event_loop().run_in_executor(self._executor, search_file_sync)
416
+
417
+
418
+ @base_tool
419
+ async def grep(
420
+ pattern: str,
421
+ directory: str = ".",
422
+ path: str | None = None,
423
+ case_sensitive: bool = False,
424
+ use_regex: bool = False,
425
+ include_files: str | None = None,
426
+ exclude_files: str | None = None,
427
+ max_results: int = 50,
428
+ context_lines: int = 2,
429
+ search_type: str = "smart",
430
+ return_format: str = "string",
431
+ output_mode: str = "content",
432
+ ) -> str | list[str]:
433
+ """Advanced parallel grep search with multiple strategies.
434
+
435
+ Args:
436
+ pattern: Search pattern (literal text or regex).
437
+ directory: Directory to search (default: current directory).
438
+ path: Alias for directory.
439
+ case_sensitive: Whether search is case sensitive.
440
+ use_regex: Whether pattern is a regular expression.
441
+ include_files: File patterns to include (e.g., "*.py,*.js").
442
+ exclude_files: File patterns to exclude.
443
+ max_results: Maximum number of results to return.
444
+ context_lines: Number of context lines before/after matches.
445
+ search_type: Search strategy (smart/ripgrep/python/hybrid).
446
+ return_format: Output format (string or list).
447
+ output_mode: Output mode (content, files_with_matches, count, json).
448
+
449
+ Returns:
450
+ Formatted search results with file paths, line numbers, and context.
451
+ """
452
+ if path is not None:
453
+ directory = path
454
+
455
+ tool = ParallelGrep()
456
+ return await tool.execute(
457
+ pattern=pattern,
458
+ directory=directory,
459
+ case_sensitive=case_sensitive,
460
+ use_regex=use_regex,
461
+ include_files=include_files,
462
+ exclude_files=exclude_files,
463
+ max_results=max_results,
464
+ context_lines=context_lines,
465
+ search_type=search_type,
466
+ return_format=return_format,
467
+ output_mode=output_mode,
468
+ )
@@ -0,0 +1,9 @@
1
+ """
2
+ Grep tool components for modular organization.
3
+ """
4
+
5
+ from .file_filter import FileFilter
6
+ from .pattern_matcher import PatternMatcher
7
+ from .search_result import SearchConfig, SearchResult
8
+
9
+ __all__ = ["PatternMatcher", "FileFilter", "SearchResult", "SearchConfig"]
@@ -0,0 +1,93 @@
1
+ """
2
+ File filtering functionality for the grep tool.
3
+ """
4
+
5
+ import fnmatch
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+
10
+ # Fast-Glob Prefilter Configuration
11
+ MAX_GLOB = 5_000 # Hard cap - protects memory & tokens
12
+ GLOB_BATCH = 500 # Streaming batch size
13
+ EXCLUDE_DIRS = { # Common directories to skip
14
+ "node_modules",
15
+ ".git",
16
+ "__pycache__",
17
+ ".venv",
18
+ "venv",
19
+ "dist",
20
+ "build",
21
+ ".pytest_cache",
22
+ ".mypy_cache",
23
+ ".tox",
24
+ "target",
25
+ }
26
+
27
+
28
+ class FileFilter:
29
+ """Handles file filtering and globbing for the grep tool."""
30
+
31
+ @staticmethod
32
+ def fast_glob(root: Path, include: str, exclude: str | None = None) -> list[Path]:
33
+ """
34
+ Lightning-fast filename filtering using os.scandir.
35
+
36
+ Args:
37
+ root: Directory to search
38
+ include: Include pattern (e.g., "*.py", "*.{js,ts}")
39
+ exclude: Exclude pattern (optional)
40
+
41
+ Returns:
42
+ List of matching file paths (bounded by MAX_GLOB)
43
+ """
44
+ matches: list[Path] = []
45
+ stack = [root]
46
+
47
+ # Handle multiple extensions in include pattern like "*.{py,js,ts}"
48
+ if "{" in include and "}" in include:
49
+ # Convert *.{py,js,ts} to multiple patterns
50
+ base, ext_part = include.split("{", 1)
51
+ ext_part = ext_part.split("}", 1)[0]
52
+ extensions = ext_part.split(",")
53
+ include_patterns = [base + ext.strip() for ext in extensions]
54
+ include_regexes = [
55
+ re.compile(fnmatch.translate(pat), re.IGNORECASE) for pat in include_patterns
56
+ ]
57
+ else:
58
+ include_regexes = [re.compile(fnmatch.translate(include), re.IGNORECASE)]
59
+
60
+ exclude_rx = re.compile(fnmatch.translate(exclude), re.IGNORECASE) if exclude else None
61
+
62
+ while stack and len(matches) < MAX_GLOB:
63
+ current_dir = stack.pop()
64
+
65
+ try:
66
+ with os.scandir(current_dir) as entries:
67
+ for entry in entries:
68
+ # Skip common irrelevant directories
69
+ if entry.is_dir(follow_symlinks=False):
70
+ if entry.name not in EXCLUDE_DIRS:
71
+ stack.append(Path(entry.path))
72
+
73
+ # Check file matches
74
+ elif entry.is_file(follow_symlinks=False):
75
+ # Check against any include pattern
76
+ matches_include = any(
77
+ regex.match(entry.name) for regex in include_regexes
78
+ )
79
+
80
+ if matches_include and (
81
+ not exclude_rx or not exclude_rx.match(entry.name)
82
+ ):
83
+ matches.append(Path(entry.path))
84
+
85
+ except (PermissionError, OSError):
86
+ continue # Skip inaccessible directories
87
+
88
+ return matches[:MAX_GLOB]
89
+
90
+ @staticmethod
91
+ def parse_patterns(patterns: str) -> list[str]:
92
+ """Parse comma-separated file patterns."""
93
+ return [p.strip() for p in patterns.split(",") if p.strip()]