zrb 1.5.4__py3-none-any.whl → 1.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zrb/__init__.py +2 -0
- zrb/__main__.py +28 -2
- zrb/builtin/llm/history.py +73 -0
- zrb/builtin/llm/input.py +27 -0
- zrb/builtin/llm/llm_chat.py +4 -61
- zrb/builtin/llm/tool/api.py +39 -17
- zrb/builtin/llm/tool/cli.py +19 -5
- zrb/builtin/llm/tool/file.py +408 -405
- zrb/builtin/llm/tool/rag.py +18 -1
- zrb/builtin/llm/tool/web.py +31 -14
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/error.py +6 -8
- zrb/config.py +1 -0
- zrb/llm_config.py +81 -15
- zrb/task/llm/__init__.py +0 -0
- zrb/task/llm/agent_runner.py +53 -0
- zrb/task/llm/context_enricher.py +86 -0
- zrb/task/llm/default_context.py +44 -0
- zrb/task/llm/error.py +77 -0
- zrb/task/llm/history.py +92 -0
- zrb/task/llm/history_summarizer.py +71 -0
- zrb/task/llm/print_node.py +98 -0
- zrb/task/llm/tool_wrapper.py +88 -0
- zrb/task/llm_task.py +279 -246
- zrb/util/file.py +8 -2
- zrb/util/load.py +2 -0
- {zrb-1.5.4.dist-info → zrb-1.5.6.dist-info}/METADATA +1 -1
- {zrb-1.5.4.dist-info → zrb-1.5.6.dist-info}/RECORD +29 -18
- {zrb-1.5.4.dist-info → zrb-1.5.6.dist-info}/WHEEL +0 -0
- {zrb-1.5.4.dist-info → zrb-1.5.6.dist-info}/entry_points.txt +0 -0
zrb/builtin/llm/tool/file.py
CHANGED
@@ -1,140 +1,179 @@
|
|
1
1
|
import fnmatch
|
2
|
+
import json
|
2
3
|
import os
|
3
4
|
import re
|
4
|
-
from typing import
|
5
|
+
from typing import Any, Optional
|
5
6
|
|
6
7
|
from zrb.util.file import read_file as _read_file
|
7
8
|
from zrb.util.file import write_file as _write_file
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
# Version control
|
12
|
-
".git",
|
13
|
-
".svn",
|
14
|
-
".hg",
|
15
|
-
# Dependencies and packages
|
16
|
-
"node_modules",
|
17
|
-
"venv",
|
18
|
-
".venv",
|
19
|
-
"env",
|
20
|
-
".env",
|
21
|
-
# Build and cache
|
10
|
+
DEFAULT_EXCLUDED_PATTERNS = [
|
11
|
+
# Common Python artifacts
|
22
12
|
"__pycache__",
|
23
13
|
"*.pyc",
|
14
|
+
"*.pyo",
|
15
|
+
"*.pyd",
|
16
|
+
".Python",
|
24
17
|
"build",
|
18
|
+
"develop-eggs",
|
25
19
|
"dist",
|
26
|
-
"
|
27
|
-
|
20
|
+
"downloads",
|
21
|
+
"eggs",
|
22
|
+
".eggs",
|
23
|
+
"lib",
|
24
|
+
"lib64",
|
25
|
+
"parts",
|
26
|
+
"sdist",
|
27
|
+
"var",
|
28
|
+
"wheels",
|
29
|
+
"share/python-wheels",
|
30
|
+
"*.egg-info",
|
31
|
+
".installed.cfg",
|
32
|
+
"*.egg",
|
33
|
+
"MANIFEST",
|
34
|
+
# Virtual environments
|
35
|
+
".env",
|
36
|
+
".venv",
|
37
|
+
"env",
|
38
|
+
"venv",
|
39
|
+
"ENV",
|
40
|
+
"VENV",
|
41
|
+
# Editor/IDE specific
|
28
42
|
".idea",
|
29
43
|
".vscode",
|
30
44
|
"*.swp",
|
31
45
|
"*.swo",
|
32
|
-
|
46
|
+
"*.swn",
|
47
|
+
# OS specific
|
33
48
|
".DS_Store",
|
34
49
|
"Thumbs.db",
|
35
|
-
#
|
36
|
-
"
|
37
|
-
"
|
38
|
-
"
|
50
|
+
# Version control
|
51
|
+
".git",
|
52
|
+
".hg",
|
53
|
+
".svn",
|
54
|
+
# Node.js
|
55
|
+
"node_modules",
|
56
|
+
"npm-debug.log*",
|
57
|
+
"yarn-debug.log*",
|
58
|
+
"yarn-error.log*",
|
59
|
+
# Test/Coverage artifacts
|
60
|
+
".history",
|
61
|
+
".tox",
|
62
|
+
".nox",
|
63
|
+
".coverage",
|
64
|
+
".coverage.*",
|
65
|
+
".cache",
|
66
|
+
".pytest_cache",
|
67
|
+
".hypothesis",
|
68
|
+
"htmlcov",
|
69
|
+
# Compiled files
|
70
|
+
"*.so",
|
71
|
+
"*.dylib",
|
72
|
+
"*.dll",
|
39
73
|
]
|
40
74
|
|
41
|
-
# Maximum number of lines to read before truncating
|
42
|
-
_MAX_LINES_BEFORE_TRUNCATION = 1000
|
43
|
-
|
44
|
-
# Number of context lines to show around method definitions when truncating
|
45
|
-
_CONTEXT_LINES = 5
|
46
|
-
|
47
75
|
|
48
76
|
def list_files(
|
49
77
|
path: str = ".",
|
50
78
|
recursive: bool = True,
|
51
|
-
|
52
|
-
excluded_patterns: list[str] =
|
53
|
-
) ->
|
54
|
-
"""
|
55
|
-
List files in a directory that match specified patterns.
|
56
|
-
|
79
|
+
include_hidden: bool = False,
|
80
|
+
excluded_patterns: Optional[list[str]] = None,
|
81
|
+
) -> str:
|
82
|
+
"""List files/directories in a path, excluding specified patterns.
|
57
83
|
Args:
|
58
|
-
path:
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
None by default (all files will be included).
|
64
|
-
excluded_patterns: List of glob patterns to exclude. By default, contains sane values
|
65
|
-
to exclude common directories and files like version control, build artifacts,
|
66
|
-
and temporary files.
|
67
|
-
|
84
|
+
path (str): Path to list. Pass exactly as provided, including '~'. Defaults to ".".
|
85
|
+
recursive (bool): List recursively. Defaults to True.
|
86
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to False.
|
87
|
+
excluded_patterns (Optional[List[str]]): List of glob patterns to exclude.
|
88
|
+
Defaults to a comprehensive list of common temporary/artifact patterns.
|
68
89
|
Returns:
|
69
|
-
|
90
|
+
str: JSON string: {"files": ["file1.txt", ...]} or {"error": "..."}
|
91
|
+
Raises:
|
92
|
+
Exception: If an error occurs.
|
70
93
|
"""
|
71
94
|
all_files: list[str] = []
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
if not
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
95
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
96
|
+
# Determine effective exclusion patterns
|
97
|
+
patterns_to_exclude = (
|
98
|
+
excluded_patterns
|
99
|
+
if excluded_patterns is not None
|
100
|
+
else DEFAULT_EXCLUDED_PATTERNS
|
101
|
+
)
|
102
|
+
try:
|
103
|
+
if recursive:
|
104
|
+
for root, dirs, files in os.walk(abs_path, topdown=True):
|
105
|
+
# Filter directories in-place
|
106
|
+
dirs[:] = [
|
107
|
+
d
|
108
|
+
for d in dirs
|
109
|
+
if (include_hidden or not _is_hidden(d))
|
110
|
+
and not _is_excluded(d, patterns_to_exclude)
|
111
|
+
]
|
112
|
+
# Process files
|
113
|
+
for filename in files:
|
114
|
+
if (
|
115
|
+
include_hidden or not _is_hidden(filename)
|
116
|
+
) and not _is_excluded(filename, patterns_to_exclude):
|
117
|
+
full_path = os.path.join(root, filename)
|
118
|
+
# Check rel path for patterns like '**/node_modules/*'
|
119
|
+
rel_full_path = os.path.relpath(full_path, abs_path)
|
120
|
+
is_rel_path_excluded = _is_excluded(
|
121
|
+
rel_full_path, patterns_to_exclude
|
122
|
+
)
|
123
|
+
if not is_rel_path_excluded:
|
97
124
|
all_files.append(full_path)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
125
|
+
else:
|
126
|
+
# Non-recursive listing (top-level only)
|
127
|
+
for item in os.listdir(abs_path):
|
128
|
+
full_path = os.path.join(abs_path, item)
|
129
|
+
# Include both files and directories if not recursive
|
130
|
+
if (include_hidden or not _is_hidden(item)) and not _is_excluded(
|
131
|
+
item, patterns_to_exclude
|
132
|
+
):
|
133
|
+
all_files.append(full_path)
|
134
|
+
# Return paths relative to the original path requested
|
135
|
+
try:
|
136
|
+
rel_files = [
|
137
|
+
os.path.relpath(f, os.path.dirname(abs_path)) for f in all_files
|
138
|
+
]
|
139
|
+
return json.dumps({"files": sorted(rel_files)})
|
140
|
+
except (
|
141
|
+
ValueError
|
142
|
+
) as e: # Handle case where path is '.' and abs_path is CWD root
|
143
|
+
if "path is on mount '" in str(e) and "' which is not on mount '" in str(e):
|
144
|
+
# If paths are on different mounts, just use absolute paths
|
145
|
+
rel_files = all_files
|
146
|
+
return json.dumps({"files": sorted(rel_files)})
|
147
|
+
raise
|
148
|
+
except (OSError, IOError) as e:
|
149
|
+
raise OSError(f"Error listing files in {path}: {e}")
|
150
|
+
except Exception as e:
|
151
|
+
raise RuntimeError(f"Unexpected error listing files in {path}: {e}")
|
102
152
|
|
103
153
|
|
104
|
-
def
|
105
|
-
full_path: str, excluded_patterns: list[str] = _DEFAULT_EXCLUDES
|
106
|
-
) -> bool:
|
154
|
+
def _is_hidden(path: str) -> bool:
|
107
155
|
"""
|
108
|
-
|
109
|
-
the list of excluded_patterns. Patterns that include a path separator
|
110
|
-
are applied to the full normalized path; otherwise they are matched
|
111
|
-
against each individual component of the path.
|
112
|
-
|
156
|
+
Check if path is hidden (starts with '.').
|
113
157
|
Args:
|
114
|
-
|
115
|
-
excluded_patterns: List of patterns to exclude
|
116
|
-
|
158
|
+
path: File or directory path to check
|
117
159
|
Returns:
|
118
|
-
True if the path
|
160
|
+
True if the path is hidden, False otherwise
|
119
161
|
"""
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
162
|
+
# Extract just the basename to check if it starts with a dot
|
163
|
+
return os.path.basename(path).startswith(".")
|
164
|
+
|
165
|
+
|
166
|
+
def _is_excluded(name: str, patterns: list[str]) -> bool:
|
167
|
+
"""Check if a name/path matches any exclusion patterns."""
|
168
|
+
for pattern in patterns:
|
169
|
+
if fnmatch.fnmatch(name, pattern):
|
170
|
+
return True
|
171
|
+
# Split the path using the OS path separator.
|
172
|
+
parts = name.split(os.path.sep)
|
173
|
+
# Check each part of the path.
|
174
|
+
for part in parts:
|
175
|
+
if fnmatch.fnmatch(part, pattern):
|
127
176
|
return True
|
128
|
-
else:
|
129
|
-
# Otherwise check each part of the path
|
130
|
-
if any(fnmatch.fnmatch(part, pat) for part in path_parts):
|
131
|
-
return True
|
132
|
-
# Also check the filename against the pattern
|
133
|
-
if os.path.isfile(full_path) and fnmatch.fnmatch(
|
134
|
-
os.path.basename(full_path), pat
|
135
|
-
):
|
136
|
-
return True
|
137
|
-
|
138
177
|
return False
|
139
178
|
|
140
179
|
|
@@ -142,369 +181,333 @@ def read_from_file(
|
|
142
181
|
path: str,
|
143
182
|
start_line: Optional[int] = None,
|
144
183
|
end_line: Optional[int] = None,
|
145
|
-
auto_truncate: bool = False,
|
146
184
|
) -> str:
|
147
|
-
"""
|
148
|
-
Read the contents of a file at the specified path.
|
149
|
-
|
185
|
+
"""Read file content (or specific lines) at a path.
|
150
186
|
Args:
|
151
|
-
path:
|
152
|
-
start_line:
|
153
|
-
|
154
|
-
end_line:
|
155
|
-
|
156
|
-
auto_truncate: Whether to automatically truncate large files when start_line
|
157
|
-
and end_line are not specified. If true and the file exceeds a certain
|
158
|
-
line threshold, it will return a subset of lines with information about
|
159
|
-
the total line count and method definitions. Default is False for backward
|
160
|
-
compatibility, but setting to True is recommended for large files.
|
161
|
-
|
187
|
+
path (str): Path to read. Pass exactly as provided, including '~'.
|
188
|
+
start_line (Optional[int]): Starting line number (1-based).
|
189
|
+
Defaults to None (start of file).
|
190
|
+
end_line (Optional[int]): Ending line number (1-based, inclusive).
|
191
|
+
Defaults to None (end of file).
|
162
192
|
Returns:
|
163
|
-
|
164
|
-
|
193
|
+
str: JSON: {"path": "...", "content": "...", "start_line": N, ...} or {"error": "..."}
|
194
|
+
Raises:
|
195
|
+
Exception: If an error occurs.
|
165
196
|
"""
|
166
197
|
try:
|
167
|
-
abs_path = os.path.abspath(path)
|
168
|
-
|
169
|
-
|
198
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
199
|
+
# Check if file exists
|
200
|
+
if not os.path.exists(abs_path):
|
201
|
+
return json.dumps({"error": f"File {path} does not exist"})
|
170
202
|
content = _read_file(abs_path)
|
171
203
|
lines = content.splitlines()
|
172
204
|
total_lines = len(lines)
|
173
|
-
|
174
|
-
# Determine if we should truncate
|
175
|
-
should_truncate = (
|
176
|
-
auto_truncate
|
177
|
-
and start_line is None
|
178
|
-
and end_line is None
|
179
|
-
and total_lines > _MAX_LINES_BEFORE_TRUNCATION
|
180
|
-
)
|
181
|
-
|
182
205
|
# Adjust line indices (convert from 1-based to 0-based)
|
183
206
|
start_idx = (start_line - 1) if start_line is not None else 0
|
184
207
|
end_idx = end_line if end_line is not None else total_lines
|
185
|
-
|
186
208
|
# Validate indices
|
187
209
|
if start_idx < 0:
|
188
210
|
start_idx = 0
|
189
211
|
if end_idx > total_lines:
|
190
212
|
end_idx = total_lines
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
result_lines.append("...")
|
209
|
-
omitted_msg = (
|
210
|
-
f"[{first_chunk+1} - {total_lines-100}] Lines omitted for brevity"
|
211
|
-
)
|
212
|
-
result_lines.append(omitted_msg)
|
213
|
-
result_lines.append("...")
|
214
|
-
|
215
|
-
# Add end of file (last 100 lines)
|
216
|
-
for i in range(max(first_chunk, total_lines - 100), total_lines):
|
217
|
-
result_lines.append(f"{i+1} | {lines[i]}")
|
218
|
-
|
219
|
-
# Add method definitions summary
|
220
|
-
if method_info:
|
221
|
-
result_lines.append("")
|
222
|
-
result_lines.append("Method definitions found:")
|
223
|
-
for method in method_info:
|
224
|
-
method_line = (
|
225
|
-
f"- {method['name']} "
|
226
|
-
f"(lines {method['start_line']}-{method['end_line']})"
|
227
|
-
)
|
228
|
-
result_lines.append(method_line)
|
229
|
-
|
230
|
-
return "\n".join(result_lines)
|
231
|
-
else:
|
232
|
-
# Return the requested range with line numbers
|
233
|
-
result_lines = []
|
234
|
-
for i in range(start_idx, end_idx):
|
235
|
-
result_lines.append(f"{i+1} | {lines[i]}")
|
236
|
-
|
237
|
-
return "\n".join(result_lines)
|
238
|
-
|
213
|
+
if start_idx > end_idx:
|
214
|
+
start_idx = end_idx
|
215
|
+
# Select the lines for the result
|
216
|
+
selected_lines = lines[start_idx:end_idx]
|
217
|
+
content_result = "\n".join(selected_lines)
|
218
|
+
return json.dumps(
|
219
|
+
{
|
220
|
+
"path": path,
|
221
|
+
"content": content_result,
|
222
|
+
"start_line": start_idx + 1, # Convert back to 1-based for output
|
223
|
+
"end_line": end_idx, # end_idx is already exclusive upper bound
|
224
|
+
"total_lines": total_lines,
|
225
|
+
}
|
226
|
+
)
|
227
|
+
except (OSError, IOError) as e:
|
228
|
+
raise OSError(f"Error reading file {path}: {e}")
|
239
229
|
except Exception as e:
|
240
|
-
|
241
|
-
|
230
|
+
raise RuntimeError(f"Unexpected error reading file {path}: {e}")
|
242
231
|
|
243
|
-
def _find_method_definitions(lines: List[str]) -> List[Dict[str, Union[str, int]]]:
|
244
|
-
"""
|
245
|
-
Find method definitions in the given lines of code.
|
246
|
-
|
247
|
-
Args:
|
248
|
-
lines: List of code lines to analyze
|
249
|
-
|
250
|
-
Returns:
|
251
|
-
List of dictionaries containing method name, start line, and end line
|
252
|
-
"""
|
253
|
-
method_info = []
|
254
|
-
|
255
|
-
# Simple regex patterns for common method/function definitions
|
256
|
-
patterns = [
|
257
|
-
# Python
|
258
|
-
r"^\s*def\s+([a-zA-Z0-9_]+)\s*\(",
|
259
|
-
# JavaScript/TypeScript
|
260
|
-
r"^\s*(function\s+([a-zA-Z0-9_]+)|([a-zA-Z0-9_]+)\s*=\s*function|"
|
261
|
-
r"\s*([a-zA-Z0-9_]+)\s*\([^)]*\)\s*{)",
|
262
|
-
# Java/C#/C++
|
263
|
-
r"^\s*(?:public|private|protected|static|final|abstract|synchronized)?"
|
264
|
-
r"\s+(?:[a-zA-Z0-9_<>[\]]+\s+)+([a-zA-Z0-9_]+)\s*\(",
|
265
|
-
]
|
266
|
-
|
267
|
-
current_method = None
|
268
|
-
|
269
|
-
for i, line in enumerate(lines):
|
270
|
-
# Check if this line starts a method definition
|
271
|
-
for pattern in patterns:
|
272
|
-
match = re.search(pattern, line)
|
273
|
-
if match:
|
274
|
-
# If we were tracking a method, close it
|
275
|
-
if current_method:
|
276
|
-
current_method["end_line"] = i
|
277
|
-
method_info.append(current_method)
|
278
|
-
|
279
|
-
# Start tracking a new method
|
280
|
-
method_name = next(
|
281
|
-
group for group in match.groups() if group is not None
|
282
|
-
)
|
283
|
-
current_method = {
|
284
|
-
"name": method_name,
|
285
|
-
"start_line": i + 1, # 1-based line numbering
|
286
|
-
"end_line": None,
|
287
|
-
}
|
288
|
-
break
|
289
|
-
|
290
|
-
# Check for method end (simplistic approach)
|
291
|
-
if current_method and line.strip() == "}":
|
292
|
-
current_method["end_line"] = i + 1
|
293
|
-
method_info.append(current_method)
|
294
|
-
current_method = None
|
295
|
-
|
296
|
-
# Close any open method at the end of the file
|
297
|
-
if current_method:
|
298
|
-
current_method["end_line"] = len(lines)
|
299
|
-
method_info.append(current_method)
|
300
|
-
|
301
|
-
return method_info
|
302
|
-
|
303
|
-
|
304
|
-
def write_to_file(path: str, content: str) -> bool:
|
305
|
-
"""
|
306
|
-
Write content to a file at the specified path.
|
307
232
|
|
233
|
+
def write_to_file(
|
234
|
+
path: str,
|
235
|
+
content: str,
|
236
|
+
line_count: int,
|
237
|
+
) -> str:
|
238
|
+
"""Write full content to a file. Creates/overwrites file.
|
308
239
|
Args:
|
309
|
-
path:
|
310
|
-
content:
|
311
|
-
|
240
|
+
path (str): Path to write. Pass exactly as provided, including '~'.
|
241
|
+
content (str): Full file content.
|
242
|
+
MUST be complete, no truncation/omissions. Exclude line numbers.
|
243
|
+
line_count (int): Number of lines in the provided content.
|
312
244
|
Returns:
|
313
|
-
|
245
|
+
str: JSON: {"success": true, "path": "f.txt", "warning": "..."} or {"error": "..."}
|
246
|
+
Raises:
|
247
|
+
Exception: If an error occurs.
|
314
248
|
"""
|
249
|
+
actual_lines = len(content.splitlines())
|
250
|
+
warning = None
|
251
|
+
if actual_lines != line_count:
|
252
|
+
warning = (
|
253
|
+
f"Provided line_count ({line_count}) does not match actual "
|
254
|
+
f"content lines ({actual_lines}) for file {path}"
|
255
|
+
)
|
315
256
|
try:
|
257
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
316
258
|
# Ensure directory exists
|
317
|
-
directory = os.path.dirname(
|
259
|
+
directory = os.path.dirname(abs_path)
|
318
260
|
if directory and not os.path.exists(directory):
|
319
261
|
os.makedirs(directory, exist_ok=True)
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
262
|
+
_write_file(abs_path, content)
|
263
|
+
result_data = {"success": True, "path": path}
|
264
|
+
if warning:
|
265
|
+
result_data["warning"] = warning
|
266
|
+
return json.dumps(result_data)
|
267
|
+
except (OSError, IOError) as e:
|
268
|
+
raise OSError(f"Error writing file {path}: {e}")
|
324
269
|
except Exception as e:
|
325
|
-
|
326
|
-
return False
|
270
|
+
raise RuntimeError(f"Unexpected error writing file {path}: {e}")
|
327
271
|
|
328
272
|
|
329
273
|
def search_files(
|
330
|
-
path: str,
|
274
|
+
path: str,
|
275
|
+
regex: str,
|
276
|
+
file_pattern: Optional[str] = None,
|
277
|
+
include_hidden: bool = True,
|
331
278
|
) -> str:
|
332
|
-
"""
|
333
|
-
Search for a regex pattern across files in a specified directory.
|
334
|
-
|
279
|
+
"""Search files in a directory using regex, showing context.
|
335
280
|
Args:
|
336
|
-
path:
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
filter to specific file types (but in most cases, leaving as None is better).
|
342
|
-
context_lines: Number of context lines to show before and after each match.
|
343
|
-
Default is 2, which provides good context without overwhelming output.
|
344
|
-
|
281
|
+
path (str): Path to search. Pass exactly as provided, including '~'.
|
282
|
+
regex (str): Python regex pattern to search for.
|
283
|
+
file_pattern (Optional[str]): Glob pattern to filter files
|
284
|
+
(e.g., '*.py'). Defaults to None.
|
285
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to True.
|
345
286
|
Returns:
|
346
|
-
|
287
|
+
str: JSON: {"summary": "...", "results": [{"file":"f.py", ...}]} or {"error": "..."}
|
288
|
+
Raises:
|
289
|
+
Exception: If error occurs or regex is invalid.
|
347
290
|
"""
|
348
291
|
try:
|
349
|
-
# Compile the regex pattern
|
350
292
|
pattern = re.compile(regex)
|
293
|
+
except re.error as e:
|
294
|
+
raise ValueError(f"Invalid regex pattern: {e}")
|
295
|
+
search_results = {"summary": "", "results": []}
|
296
|
+
match_count = 0
|
297
|
+
searched_file_count = 0
|
298
|
+
file_match_count = 0
|
299
|
+
try:
|
300
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
301
|
+
for root, dirs, files in os.walk(abs_path):
|
302
|
+
# Skip hidden directories
|
303
|
+
dirs[:] = [d for d in dirs if include_hidden or not _is_hidden(d)]
|
304
|
+
for filename in files:
|
305
|
+
# Skip hidden files
|
306
|
+
if not include_hidden and _is_hidden(filename):
|
307
|
+
continue
|
308
|
+
# Apply file pattern filter if provided
|
309
|
+
if file_pattern and not fnmatch.fnmatch(filename, file_pattern):
|
310
|
+
continue
|
311
|
+
file_path = os.path.join(root, filename)
|
312
|
+
rel_file_path = os.path.relpath(file_path, os.getcwd())
|
313
|
+
searched_file_count += 1
|
314
|
+
try:
|
315
|
+
matches = _get_file_matches(file_path, pattern)
|
316
|
+
if matches:
|
317
|
+
file_match_count += 1
|
318
|
+
match_count += len(matches)
|
319
|
+
search_results["results"].append(
|
320
|
+
{"file": rel_file_path, "matches": matches}
|
321
|
+
)
|
322
|
+
except IOError as e:
|
323
|
+
search_results["results"].append(
|
324
|
+
{"file": rel_file_path, "error": str(e)}
|
325
|
+
)
|
326
|
+
if match_count == 0:
|
327
|
+
search_results["summary"] = (
|
328
|
+
f"No matches found for pattern '{regex}' in path '{path}' "
|
329
|
+
f"(searched {searched_file_count} files)."
|
330
|
+
)
|
331
|
+
else:
|
332
|
+
search_results["summary"] = (
|
333
|
+
f"Found {match_count} matches in {file_match_count} files "
|
334
|
+
f"(searched {searched_file_count} files)."
|
335
|
+
)
|
336
|
+
return json.dumps(
|
337
|
+
search_results
|
338
|
+
) # No need for pretty printing for LLM consumption
|
339
|
+
except (OSError, IOError) as e:
|
340
|
+
raise OSError(f"Error searching files in {path}: {e}")
|
341
|
+
except Exception as e:
|
342
|
+
raise RuntimeError(f"Unexpected error searching files in {path}: {e}")
|
351
343
|
|
352
|
-
# Get the list of files to search
|
353
|
-
files = list_files(path, recursive=True, file_pattern=file_pattern)
|
354
|
-
|
355
|
-
results = []
|
356
|
-
match_count = 0
|
357
|
-
|
358
|
-
for file_path in files:
|
359
|
-
try:
|
360
|
-
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
361
|
-
lines = f.readlines()
|
362
|
-
|
363
|
-
file_matches = []
|
364
|
-
|
365
|
-
for i, line in enumerate(lines):
|
366
|
-
if pattern.search(line):
|
367
|
-
# Determine context range
|
368
|
-
start = max(0, i - context_lines)
|
369
|
-
end = min(len(lines), i + context_lines + 1)
|
370
|
-
|
371
|
-
# Add file header if this is the first match in the file
|
372
|
-
if not file_matches:
|
373
|
-
file_matches.append(
|
374
|
-
f"\n{'-' * 80}\n{file_path}\n{'-' * 80}"
|
375
|
-
)
|
376
|
-
|
377
|
-
# Add separator if this isn't the first match and isn't contiguous
|
378
|
-
# with previous
|
379
|
-
if (
|
380
|
-
file_matches
|
381
|
-
and file_matches[-1] != f"Line {start+1}-{end}:"
|
382
|
-
):
|
383
|
-
file_matches.append(f"\nLine {start+1}-{end}:")
|
384
|
-
|
385
|
-
# Add context lines
|
386
|
-
for j in range(start, end):
|
387
|
-
prefix = ">" if j == i else " "
|
388
|
-
file_matches.append(f"{prefix} {j+1}: {lines[j].rstrip()}")
|
389
|
-
|
390
|
-
match_count += 1
|
391
|
-
|
392
|
-
if file_matches:
|
393
|
-
results.extend(file_matches)
|
394
|
-
|
395
|
-
except Exception as e:
|
396
|
-
results.append(f"Error reading {file_path}: {str(e)}")
|
397
|
-
|
398
|
-
if not results:
|
399
|
-
return f"No matches found for pattern '{regex}' in {path}"
|
400
|
-
|
401
|
-
# Count unique files by counting headers
|
402
|
-
file_count = len([r for r in results if r.startswith("-" * 80)])
|
403
|
-
summary = f"Found {match_count} matches in {file_count} files:\n"
|
404
|
-
return summary + "\n".join(results)
|
405
344
|
|
345
|
+
def _get_file_matches(
|
346
|
+
file_path: str, pattern: re.Pattern, context_lines: int = 2
|
347
|
+
) -> list[dict[str, Any]]:
|
348
|
+
"""Search for regex matches in a file with context."""
|
349
|
+
try:
|
350
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
351
|
+
lines = f.readlines()
|
352
|
+
matches = []
|
353
|
+
for line_idx, line in enumerate(lines):
|
354
|
+
if pattern.search(line):
|
355
|
+
line_num = line_idx + 1
|
356
|
+
context_start = max(0, line_idx - context_lines)
|
357
|
+
context_end = min(len(lines), line_idx + context_lines + 1)
|
358
|
+
match_data = {
|
359
|
+
"line_number": line_num,
|
360
|
+
"line_content": line.rstrip(),
|
361
|
+
"context_before": [
|
362
|
+
lines[j].rstrip() for j in range(context_start, line_idx)
|
363
|
+
],
|
364
|
+
"context_after": [
|
365
|
+
lines[j].rstrip() for j in range(line_idx + 1, context_end)
|
366
|
+
],
|
367
|
+
}
|
368
|
+
matches.append(match_data)
|
369
|
+
return matches
|
370
|
+
except (OSError, IOError) as e:
|
371
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
406
372
|
except Exception as e:
|
407
|
-
|
373
|
+
raise RuntimeError(f"Unexpected error processing {file_path}: {e}")
|
408
374
|
|
409
375
|
|
410
|
-
def apply_diff(
|
411
|
-
|
412
|
-
|
413
|
-
|
376
|
+
def apply_diff(
|
377
|
+
path: str,
|
378
|
+
diff: str,
|
379
|
+
search_marker: str = "<<<<<< SEARCH",
|
380
|
+
meta_marker: str = "------",
|
381
|
+
separator: str = "======",
|
382
|
+
replace_marker: str = ">>>>>> REPLACE",
|
383
|
+
) -> str:
|
384
|
+
"""Apply a precise search/replace diff to a file.
|
414
385
|
Args:
|
415
|
-
path:
|
416
|
-
diff:
|
417
|
-
|
418
|
-
|
419
|
-
|
386
|
+
path (str): Path to modify. Pass exactly as provided, including '~'.
|
387
|
+
diff (str): Search/replace block defining changes (see format example below).
|
388
|
+
search_marker (str): Marker for start of search block. Defaults to "<<<<<< SEARCH".
|
389
|
+
meta_marker (str): Marker for start of content to search for. Defaults to "------".
|
390
|
+
separator (str): Marker separating search/replace content. Defaults to "======".
|
391
|
+
replace_marker (str): Marker for end of replacement block.
|
392
|
+
Defaults to ">>>>>> REPLACE".
|
393
|
+
SEARCH block must exactly match file content including whitespace/indentation.
|
394
|
+
Format example:
|
395
|
+
[Search Marker, e.g., <<<<<< SEARCH]
|
396
|
+
:start_line:10
|
397
|
+
:end_line:15
|
398
|
+
[Meta Marker, e.g., ------]
|
399
|
+
[exact content to find including whitespace]
|
400
|
+
[Separator, e.g., ======]
|
401
|
+
[new content to replace with]
|
402
|
+
[Replace Marker, e.g., >>>>>> REPLACE]
|
420
403
|
Returns:
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
```
|
425
|
-
<<<<<<< SEARCH
|
426
|
-
[exact content to find including whitespace]
|
427
|
-
=======
|
428
|
-
[new content to replace with]
|
429
|
-
>>>>>>> REPLACE
|
430
|
-
```
|
404
|
+
str: JSON: {"success": true, "path": "f.py"} or {"success": false, "error": "..."}
|
405
|
+
Raises:
|
406
|
+
Exception: If an error occurs.
|
431
407
|
"""
|
432
408
|
try:
|
433
|
-
|
434
|
-
|
409
|
+
start_line, end_line, search_content, replace_content = _parse_diff(
|
410
|
+
diff, search_marker, meta_marker, separator, replace_marker
|
411
|
+
)
|
412
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
413
|
+
if not os.path.exists(abs_path):
|
414
|
+
return json.dumps(
|
415
|
+
{"success": False, "path": path, "error": f"File not found at {path}"}
|
416
|
+
)
|
435
417
|
content = _read_file(abs_path)
|
436
418
|
lines = content.splitlines()
|
437
|
-
|
438
|
-
# Validate line numbers
|
439
419
|
if start_line < 1 or end_line > len(lines) or start_line > end_line:
|
440
|
-
|
441
|
-
|
420
|
+
return json.dumps(
|
421
|
+
{
|
422
|
+
"success": False,
|
423
|
+
"path": path,
|
424
|
+
"error": (
|
425
|
+
f"Invalid line range {start_line}-{end_line} "
|
426
|
+
f"for file with {len(lines)} lines."
|
427
|
+
),
|
428
|
+
}
|
442
429
|
)
|
443
|
-
return False
|
444
|
-
|
445
|
-
# Parse the diff
|
446
|
-
search_content, replace_content = _parse_diff(diff)
|
447
|
-
if search_content is None or replace_content is None:
|
448
|
-
print("Invalid diff format")
|
449
|
-
return False
|
450
|
-
|
451
|
-
# Extract the content to be replaced
|
452
430
|
original_content = "\n".join(lines[start_line - 1 : end_line])
|
453
|
-
|
454
|
-
# Verify the search content matches
|
455
431
|
if original_content != search_content:
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
432
|
+
error_message = (
|
433
|
+
f"Search content does not match file content at "
|
434
|
+
f"lines {start_line}-{end_line}.\n"
|
435
|
+
f"Expected ({len(search_content.splitlines())} lines):\n"
|
436
|
+
f"---\n{search_content}\n---\n"
|
437
|
+
f"Actual ({len(lines[start_line-1:end_line])} lines):\n"
|
438
|
+
f"---\n{original_content}\n---"
|
439
|
+
)
|
440
|
+
return json.dumps({"success": False, "path": path, "error": error_message})
|
460
441
|
new_lines = (
|
461
442
|
lines[: start_line - 1] + replace_content.splitlines() + lines[end_line:]
|
462
443
|
)
|
463
444
|
new_content = "\n".join(new_lines)
|
464
|
-
|
465
|
-
|
445
|
+
if content.endswith("\n"):
|
446
|
+
new_content += "\n"
|
466
447
|
_write_file(abs_path, new_content)
|
467
|
-
return True
|
468
|
-
|
448
|
+
return json.dumps({"success": True, "path": path})
|
449
|
+
except ValueError as e:
|
450
|
+
raise ValueError(f"Error parsing diff: {e}")
|
451
|
+
except (OSError, IOError) as e:
|
452
|
+
raise OSError(f"Error applying diff to {path}: {e}")
|
469
453
|
except Exception as e:
|
470
|
-
|
471
|
-
return False
|
454
|
+
raise RuntimeError(f"Unexpected error applying diff to {path}: {e}")
|
472
455
|
|
473
456
|
|
474
|
-
def _parse_diff(
|
457
|
+
def _parse_diff(
|
458
|
+
diff: str,
|
459
|
+
search_marker: str,
|
460
|
+
meta_marker: str,
|
461
|
+
separator: str,
|
462
|
+
replace_marker: str,
|
463
|
+
) -> tuple[int, int, str, str]:
|
475
464
|
"""
|
476
|
-
Parse
|
477
|
-
|
465
|
+
Parse diff content into components.
|
478
466
|
Args:
|
479
|
-
diff: The diff
|
480
|
-
|
467
|
+
diff: The diff content to parse
|
468
|
+
search_marker: Marker indicating the start of the search block
|
469
|
+
meta_marker: Marker indicating the start of the content to search for
|
470
|
+
separator: Marker separating search content from replacement content
|
471
|
+
replace_marker: Marker indicating the end of the replacement block
|
481
472
|
Returns:
|
482
|
-
|
473
|
+
Tuple of (start_line, end_line, search_content, replace_content)
|
474
|
+
Raises:
|
475
|
+
ValueError: If diff format is invalid or missing required markers
|
476
|
+
ValueError: If start_line or end_line cannot be parsed
|
483
477
|
"""
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
)
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
478
|
+
# Find all marker positions
|
479
|
+
search_start_idx = diff.find(search_marker)
|
480
|
+
meta_start_idx = diff.find(meta_marker)
|
481
|
+
separator_idx = diff.find(separator)
|
482
|
+
replace_end_idx = diff.find(replace_marker)
|
483
|
+
# Validate all markers are present
|
484
|
+
missing_markers = []
|
485
|
+
if search_start_idx == -1:
|
486
|
+
missing_markers.append("search marker")
|
487
|
+
if meta_start_idx == -1:
|
488
|
+
missing_markers.append("meta marker")
|
489
|
+
if separator_idx == -1:
|
490
|
+
missing_markers.append("separator")
|
491
|
+
if replace_end_idx == -1:
|
492
|
+
missing_markers.append("replace marker")
|
493
|
+
if missing_markers:
|
494
|
+
raise ValueError(f"Invalid diff format - missing: {', '.join(missing_markers)}")
|
495
|
+
# Extract metadata
|
496
|
+
meta_content = diff[search_start_idx + len(search_marker) : meta_start_idx].strip()
|
497
|
+
# Parse line numbers
|
498
|
+
start_line_match = re.search(r":start_line:(\d+)", meta_content)
|
499
|
+
end_line_match = re.search(r":end_line:(\d+)", meta_content)
|
500
|
+
if not start_line_match:
|
501
|
+
raise ValueError("Missing start_line in diff metadata")
|
502
|
+
if not end_line_match:
|
503
|
+
raise ValueError("Missing end_line in diff metadata")
|
504
|
+
start_line = int(start_line_match.group(1))
|
505
|
+
end_line = int(end_line_match.group(1))
|
506
|
+
# Extract content sections
|
507
|
+
search_content = diff[meta_start_idx + len(meta_marker) : separator_idx].strip(
|
508
|
+
"\r\n"
|
509
|
+
)
|
510
|
+
replace_content = diff[separator_idx + len(separator) : replace_end_idx].strip(
|
511
|
+
"\r\n"
|
512
|
+
)
|
513
|
+
return start_line, end_line, search_content, replace_content
|