zrb 1.5.5__py3-none-any.whl → 1.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zrb/__init__.py +2 -0
- zrb/__main__.py +28 -2
- zrb/builtin/llm/history.py +73 -0
- zrb/builtin/llm/input.py +27 -0
- zrb/builtin/llm/llm_chat.py +4 -61
- zrb/builtin/llm/tool/api.py +39 -17
- zrb/builtin/llm/tool/cli.py +19 -5
- zrb/builtin/llm/tool/file.py +270 -131
- zrb/builtin/llm/tool/rag.py +18 -1
- zrb/builtin/llm/tool/web.py +31 -14
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/error.py +6 -8
- zrb/config.py +1 -0
- zrb/llm_config.py +81 -15
- zrb/task/llm/__init__.py +0 -0
- zrb/task/llm/agent_runner.py +53 -0
- zrb/task/llm/context_enricher.py +86 -0
- zrb/task/llm/default_context.py +44 -0
- zrb/task/llm/error.py +77 -0
- zrb/task/llm/history.py +92 -0
- zrb/task/llm/history_summarizer.py +71 -0
- zrb/task/llm/print_node.py +98 -0
- zrb/task/llm/tool_wrapper.py +88 -0
- zrb/task/llm_task.py +279 -246
- zrb/util/file.py +8 -2
- zrb/util/load.py +2 -0
- {zrb-1.5.5.dist-info → zrb-1.5.6.dist-info}/METADATA +1 -1
- {zrb-1.5.5.dist-info → zrb-1.5.6.dist-info}/RECORD +29 -18
- {zrb-1.5.5.dist-info → zrb-1.5.6.dist-info}/WHEEL +0 -0
- {zrb-1.5.5.dist-info → zrb-1.5.6.dist-info}/entry_points.txt +0 -0
zrb/builtin/llm/tool/file.py
CHANGED
@@ -2,51 +2,135 @@ import fnmatch
|
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
import re
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, Optional
|
6
6
|
|
7
7
|
from zrb.util.file import read_file as _read_file
|
8
8
|
from zrb.util.file import write_file as _write_file
|
9
9
|
|
10
|
+
DEFAULT_EXCLUDED_PATTERNS = [
|
11
|
+
# Common Python artifacts
|
12
|
+
"__pycache__",
|
13
|
+
"*.pyc",
|
14
|
+
"*.pyo",
|
15
|
+
"*.pyd",
|
16
|
+
".Python",
|
17
|
+
"build",
|
18
|
+
"develop-eggs",
|
19
|
+
"dist",
|
20
|
+
"downloads",
|
21
|
+
"eggs",
|
22
|
+
".eggs",
|
23
|
+
"lib",
|
24
|
+
"lib64",
|
25
|
+
"parts",
|
26
|
+
"sdist",
|
27
|
+
"var",
|
28
|
+
"wheels",
|
29
|
+
"share/python-wheels",
|
30
|
+
"*.egg-info",
|
31
|
+
".installed.cfg",
|
32
|
+
"*.egg",
|
33
|
+
"MANIFEST",
|
34
|
+
# Virtual environments
|
35
|
+
".env",
|
36
|
+
".venv",
|
37
|
+
"env",
|
38
|
+
"venv",
|
39
|
+
"ENV",
|
40
|
+
"VENV",
|
41
|
+
# Editor/IDE specific
|
42
|
+
".idea",
|
43
|
+
".vscode",
|
44
|
+
"*.swp",
|
45
|
+
"*.swo",
|
46
|
+
"*.swn",
|
47
|
+
# OS specific
|
48
|
+
".DS_Store",
|
49
|
+
"Thumbs.db",
|
50
|
+
# Version control
|
51
|
+
".git",
|
52
|
+
".hg",
|
53
|
+
".svn",
|
54
|
+
# Node.js
|
55
|
+
"node_modules",
|
56
|
+
"npm-debug.log*",
|
57
|
+
"yarn-debug.log*",
|
58
|
+
"yarn-error.log*",
|
59
|
+
# Test/Coverage artifacts
|
60
|
+
".history",
|
61
|
+
".tox",
|
62
|
+
".nox",
|
63
|
+
".coverage",
|
64
|
+
".coverage.*",
|
65
|
+
".cache",
|
66
|
+
".pytest_cache",
|
67
|
+
".hypothesis",
|
68
|
+
"htmlcov",
|
69
|
+
# Compiled files
|
70
|
+
"*.so",
|
71
|
+
"*.dylib",
|
72
|
+
"*.dll",
|
73
|
+
]
|
74
|
+
|
10
75
|
|
11
76
|
def list_files(
|
12
|
-
path: str = ".",
|
77
|
+
path: str = ".",
|
78
|
+
recursive: bool = True,
|
79
|
+
include_hidden: bool = False,
|
80
|
+
excluded_patterns: Optional[list[str]] = None,
|
13
81
|
) -> str:
|
14
|
-
"""
|
15
|
-
Request to list files and directories within the specified directory.
|
16
|
-
If recursive is true, it will list all files and directories recursively.
|
17
|
-
If recursive is false or not provided, it will only list the top-level contents.
|
82
|
+
"""List files/directories in a path, excluding specified patterns.
|
18
83
|
Args:
|
19
|
-
path
|
20
|
-
recursive
|
21
|
-
|
22
|
-
|
23
|
-
Defaults to
|
84
|
+
path (str): Path to list. Pass exactly as provided, including '~'. Defaults to ".".
|
85
|
+
recursive (bool): List recursively. Defaults to True.
|
86
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to False.
|
87
|
+
excluded_patterns (Optional[List[str]]): List of glob patterns to exclude.
|
88
|
+
Defaults to a comprehensive list of common temporary/artifact patterns.
|
24
89
|
Returns:
|
25
|
-
|
26
|
-
|
27
|
-
|
90
|
+
str: JSON string: {"files": ["file1.txt", ...]} or {"error": "..."}
|
91
|
+
Raises:
|
92
|
+
Exception: If an error occurs.
|
28
93
|
"""
|
29
|
-
all_files:
|
30
|
-
abs_path = os.path.abspath(path)
|
94
|
+
all_files: list[str] = []
|
95
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
96
|
+
# Determine effective exclusion patterns
|
97
|
+
patterns_to_exclude = (
|
98
|
+
excluded_patterns
|
99
|
+
if excluded_patterns is not None
|
100
|
+
else DEFAULT_EXCLUDED_PATTERNS
|
101
|
+
)
|
31
102
|
try:
|
32
103
|
if recursive:
|
33
|
-
for root, dirs, files in os.walk(abs_path):
|
34
|
-
#
|
35
|
-
dirs[:] = [
|
104
|
+
for root, dirs, files in os.walk(abs_path, topdown=True):
|
105
|
+
# Filter directories in-place
|
106
|
+
dirs[:] = [
|
107
|
+
d
|
108
|
+
for d in dirs
|
109
|
+
if (include_hidden or not _is_hidden(d))
|
110
|
+
and not _is_excluded(d, patterns_to_exclude)
|
111
|
+
]
|
112
|
+
# Process files
|
36
113
|
for filename in files:
|
37
|
-
|
38
|
-
|
39
|
-
|
114
|
+
if (
|
115
|
+
include_hidden or not _is_hidden(filename)
|
116
|
+
) and not _is_excluded(filename, patterns_to_exclude):
|
117
|
+
full_path = os.path.join(root, filename)
|
118
|
+
# Check rel path for patterns like '**/node_modules/*'
|
119
|
+
rel_full_path = os.path.relpath(full_path, abs_path)
|
120
|
+
is_rel_path_excluded = _is_excluded(
|
121
|
+
rel_full_path, patterns_to_exclude
|
122
|
+
)
|
123
|
+
if not is_rel_path_excluded:
|
124
|
+
all_files.append(full_path)
|
40
125
|
else:
|
41
126
|
# Non-recursive listing (top-level only)
|
42
127
|
for item in os.listdir(abs_path):
|
43
128
|
full_path = os.path.join(abs_path, item)
|
44
129
|
# Include both files and directories if not recursive
|
45
|
-
if include_hidden or not _is_hidden(
|
46
|
-
item
|
47
|
-
):
|
130
|
+
if (include_hidden or not _is_hidden(item)) and not _is_excluded(
|
131
|
+
item, patterns_to_exclude
|
132
|
+
):
|
48
133
|
all_files.append(full_path)
|
49
|
-
|
50
134
|
# Return paths relative to the original path requested
|
51
135
|
try:
|
52
136
|
rel_files = [
|
@@ -61,43 +145,57 @@ def list_files(
|
|
61
145
|
rel_files = all_files
|
62
146
|
return json.dumps({"files": sorted(rel_files)})
|
63
147
|
raise
|
148
|
+
except (OSError, IOError) as e:
|
149
|
+
raise OSError(f"Error listing files in {path}: {e}")
|
64
150
|
except Exception as e:
|
65
|
-
raise
|
151
|
+
raise RuntimeError(f"Unexpected error listing files in {path}: {e}")
|
66
152
|
|
67
153
|
|
68
154
|
def _is_hidden(path: str) -> bool:
|
69
|
-
"""
|
155
|
+
"""
|
156
|
+
Check if path is hidden (starts with '.').
|
157
|
+
Args:
|
158
|
+
path: File or directory path to check
|
159
|
+
Returns:
|
160
|
+
True if the path is hidden, False otherwise
|
161
|
+
"""
|
162
|
+
# Extract just the basename to check if it starts with a dot
|
70
163
|
return os.path.basename(path).startswith(".")
|
71
164
|
|
72
165
|
|
166
|
+
def _is_excluded(name: str, patterns: list[str]) -> bool:
|
167
|
+
"""Check if a name/path matches any exclusion patterns."""
|
168
|
+
for pattern in patterns:
|
169
|
+
if fnmatch.fnmatch(name, pattern):
|
170
|
+
return True
|
171
|
+
# Split the path using the OS path separator.
|
172
|
+
parts = name.split(os.path.sep)
|
173
|
+
# Check each part of the path.
|
174
|
+
for part in parts:
|
175
|
+
if fnmatch.fnmatch(part, pattern):
|
176
|
+
return True
|
177
|
+
return False
|
178
|
+
|
179
|
+
|
73
180
|
def read_from_file(
|
74
181
|
path: str,
|
75
182
|
start_line: Optional[int] = None,
|
76
183
|
end_line: Optional[int] = None,
|
77
184
|
) -> str:
|
78
|
-
"""
|
79
|
-
Request to read the contents of a file at the specified path. Use this when you need
|
80
|
-
to examine the contents of an existing file you do not know the contents of, for example
|
81
|
-
to analyze code, review text files, or extract information from configuration files.
|
82
|
-
The output includes line numbers prefixed to each line (e.g. "1 | const x = 1"),
|
83
|
-
making it easier to reference specific lines when creating diffs or discussing code.
|
84
|
-
By specifying start_line and end_line parameters, you can efficiently read specific
|
85
|
-
portions of large files without loading the entire file into memory. Automatically
|
86
|
-
extracts raw text from PDF and DOCX files. May not be suitable for other types of
|
87
|
-
binary files, as it returns the raw content as a string.
|
185
|
+
"""Read file content (or specific lines) at a path.
|
88
186
|
Args:
|
89
|
-
path
|
90
|
-
start_line
|
91
|
-
|
92
|
-
end_line
|
93
|
-
|
187
|
+
path (str): Path to read. Pass exactly as provided, including '~'.
|
188
|
+
start_line (Optional[int]): Starting line number (1-based).
|
189
|
+
Defaults to None (start of file).
|
190
|
+
end_line (Optional[int]): Ending line number (1-based, inclusive).
|
191
|
+
Defaults to None (end of file).
|
94
192
|
Returns:
|
95
|
-
|
96
|
-
|
97
|
-
|
193
|
+
str: JSON: {"path": "...", "content": "...", "start_line": N, ...} or {"error": "..."}
|
194
|
+
Raises:
|
195
|
+
Exception: If an error occurs.
|
98
196
|
"""
|
99
197
|
try:
|
100
|
-
abs_path = os.path.abspath(path)
|
198
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
101
199
|
# Check if file exists
|
102
200
|
if not os.path.exists(abs_path):
|
103
201
|
return json.dumps({"error": f"File {path} does not exist"})
|
@@ -126,31 +224,27 @@ def read_from_file(
|
|
126
224
|
"total_lines": total_lines,
|
127
225
|
}
|
128
226
|
)
|
227
|
+
except (OSError, IOError) as e:
|
228
|
+
raise OSError(f"Error reading file {path}: {e}")
|
129
229
|
except Exception as e:
|
130
|
-
raise
|
230
|
+
raise RuntimeError(f"Unexpected error reading file {path}: {e}")
|
131
231
|
|
132
232
|
|
133
|
-
def write_to_file(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
to
|
233
|
+
def write_to_file(
|
234
|
+
path: str,
|
235
|
+
content: str,
|
236
|
+
line_count: int,
|
237
|
+
) -> str:
|
238
|
+
"""Write full content to a file. Creates/overwrites file.
|
139
239
|
Args:
|
140
|
-
path
|
141
|
-
content
|
142
|
-
|
143
|
-
|
144
|
-
include the line numbers in the content though, just the actual content
|
145
|
-
of the file.
|
146
|
-
line_count: (required) The number of lines in the file. Make sure to compute
|
147
|
-
this based on the actual content of the file, not the number of lines
|
148
|
-
in the content you're providing.
|
240
|
+
path (str): Path to write. Pass exactly as provided, including '~'.
|
241
|
+
content (str): Full file content.
|
242
|
+
MUST be complete, no truncation/omissions. Exclude line numbers.
|
243
|
+
line_count (int): Number of lines in the provided content.
|
149
244
|
Returns:
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
Example error: '{"success": false, "error": "Permission denied: /etc/hosts"}'
|
245
|
+
str: JSON: {"success": true, "path": "f.txt", "warning": "..."} or {"error": "..."}
|
246
|
+
Raises:
|
247
|
+
Exception: If an error occurs.
|
154
248
|
"""
|
155
249
|
actual_lines = len(content.splitlines())
|
156
250
|
warning = None
|
@@ -160,7 +254,7 @@ def write_to_file(path: str, content: str, line_count: int) -> str:
|
|
160
254
|
f"content lines ({actual_lines}) for file {path}"
|
161
255
|
)
|
162
256
|
try:
|
163
|
-
abs_path = os.path.abspath(path)
|
257
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
164
258
|
# Ensure directory exists
|
165
259
|
directory = os.path.dirname(abs_path)
|
166
260
|
if directory and not os.path.exists(directory):
|
@@ -170,45 +264,40 @@ def write_to_file(path: str, content: str, line_count: int) -> str:
|
|
170
264
|
if warning:
|
171
265
|
result_data["warning"] = warning
|
172
266
|
return json.dumps(result_data)
|
267
|
+
except (OSError, IOError) as e:
|
268
|
+
raise OSError(f"Error writing file {path}: {e}")
|
173
269
|
except Exception as e:
|
174
|
-
raise
|
270
|
+
raise RuntimeError(f"Unexpected error writing file {path}: {e}")
|
175
271
|
|
176
272
|
|
177
273
|
def search_files(
|
178
274
|
path: str,
|
179
275
|
regex: str,
|
180
276
|
file_pattern: Optional[str] = None,
|
181
|
-
include_hidden: bool =
|
277
|
+
include_hidden: bool = True,
|
182
278
|
) -> str:
|
183
|
-
"""
|
184
|
-
Request to perform a regex search across files in a specified directory,
|
185
|
-
providing context-rich results. This tool searches for patterns or specific
|
186
|
-
content across multiple files, displaying each match with encapsulating context.
|
279
|
+
"""Search files in a directory using regex, showing context.
|
187
280
|
Args:
|
188
|
-
path
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
If not provided, searches all files (*).
|
194
|
-
include_hidden: (optional) Whether to include hidden files.
|
195
|
-
Defaults to False (exclude hidden files).
|
281
|
+
path (str): Path to search. Pass exactly as provided, including '~'.
|
282
|
+
regex (str): Python regex pattern to search for.
|
283
|
+
file_pattern (Optional[str]): Glob pattern to filter files
|
284
|
+
(e.g., '*.py'). Defaults to None.
|
285
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to True.
|
196
286
|
Returns:
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
Example error: '{"error": "Invalid regex: ..."}'
|
287
|
+
str: JSON: {"summary": "...", "results": [{"file":"f.py", ...}]} or {"error": "..."}
|
288
|
+
Raises:
|
289
|
+
Exception: If error occurs or regex is invalid.
|
201
290
|
"""
|
202
291
|
try:
|
203
292
|
pattern = re.compile(regex)
|
204
293
|
except re.error as e:
|
205
|
-
raise
|
294
|
+
raise ValueError(f"Invalid regex pattern: {e}")
|
206
295
|
search_results = {"summary": "", "results": []}
|
207
296
|
match_count = 0
|
208
297
|
searched_file_count = 0
|
209
298
|
file_match_count = 0
|
210
299
|
try:
|
211
|
-
abs_path = os.path.abspath(path)
|
300
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
212
301
|
for root, dirs, files in os.walk(abs_path):
|
213
302
|
# Skip hidden directories
|
214
303
|
dirs[:] = [d for d in dirs if include_hidden or not _is_hidden(d)]
|
@@ -244,14 +333,18 @@ def search_files(
|
|
244
333
|
f"Found {match_count} matches in {file_match_count} files "
|
245
334
|
f"(searched {searched_file_count} files)."
|
246
335
|
)
|
247
|
-
return json.dumps(
|
336
|
+
return json.dumps(
|
337
|
+
search_results
|
338
|
+
) # No need for pretty printing for LLM consumption
|
339
|
+
except (OSError, IOError) as e:
|
340
|
+
raise OSError(f"Error searching files in {path}: {e}")
|
248
341
|
except Exception as e:
|
249
|
-
raise
|
342
|
+
raise RuntimeError(f"Unexpected error searching files in {path}: {e}")
|
250
343
|
|
251
344
|
|
252
345
|
def _get_file_matches(
|
253
346
|
file_path: str, pattern: re.Pattern, context_lines: int = 2
|
254
|
-
) ->
|
347
|
+
) -> list[dict[str, Any]]:
|
255
348
|
"""Search for regex matches in a file with context."""
|
256
349
|
try:
|
257
350
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
@@ -274,39 +367,49 @@ def _get_file_matches(
|
|
274
367
|
}
|
275
368
|
matches.append(match_data)
|
276
369
|
return matches
|
370
|
+
except (OSError, IOError) as e:
|
371
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
277
372
|
except Exception as e:
|
278
|
-
raise
|
373
|
+
raise RuntimeError(f"Unexpected error processing {file_path}: {e}")
|
279
374
|
|
280
375
|
|
281
|
-
def apply_diff(
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
If you're not confident in the exact content to search for, use the read_file tool
|
291
|
-
first to get the exact content.
|
376
|
+
def apply_diff(
|
377
|
+
path: str,
|
378
|
+
diff: str,
|
379
|
+
search_marker: str = "<<<<<< SEARCH",
|
380
|
+
meta_marker: str = "------",
|
381
|
+
separator: str = "======",
|
382
|
+
replace_marker: str = ">>>>>> REPLACE",
|
383
|
+
) -> str:
|
384
|
+
"""Apply a precise search/replace diff to a file.
|
292
385
|
Args:
|
293
|
-
path
|
294
|
-
diff
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
386
|
+
path (str): Path to modify. Pass exactly as provided, including '~'.
|
387
|
+
diff (str): Search/replace block defining changes (see format example below).
|
388
|
+
search_marker (str): Marker for start of search block. Defaults to "<<<<<< SEARCH".
|
389
|
+
meta_marker (str): Marker for start of content to search for. Defaults to "------".
|
390
|
+
separator (str): Marker separating search/replace content. Defaults to "======".
|
391
|
+
replace_marker (str): Marker for end of replacement block.
|
392
|
+
Defaults to ">>>>>> REPLACE".
|
393
|
+
SEARCH block must exactly match file content including whitespace/indentation.
|
394
|
+
Format example:
|
395
|
+
[Search Marker, e.g., <<<<<< SEARCH]
|
396
|
+
:start_line:10
|
397
|
+
:end_line:15
|
398
|
+
[Meta Marker, e.g., ------]
|
399
|
+
[exact content to find including whitespace]
|
400
|
+
[Separator, e.g., ======]
|
401
|
+
[new content to replace with]
|
402
|
+
[Replace Marker, e.g., >>>>>> REPLACE]
|
304
403
|
Returns:
|
305
|
-
|
404
|
+
str: JSON: {"success": true, "path": "f.py"} or {"success": false, "error": "..."}
|
405
|
+
Raises:
|
406
|
+
Exception: If an error occurs.
|
306
407
|
"""
|
307
408
|
try:
|
308
|
-
start_line, end_line, search_content, replace_content = _parse_diff(
|
309
|
-
|
409
|
+
start_line, end_line, search_content, replace_content = _parse_diff(
|
410
|
+
diff, search_marker, meta_marker, separator, replace_marker
|
411
|
+
)
|
412
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
310
413
|
if not os.path.exists(abs_path):
|
311
414
|
return json.dumps(
|
312
415
|
{"success": False, "path": path, "error": f"File not found at {path}"}
|
@@ -343,28 +446,64 @@ def apply_diff(path: str, diff: str) -> str:
|
|
343
446
|
new_content += "\n"
|
344
447
|
_write_file(abs_path, new_content)
|
345
448
|
return json.dumps({"success": True, "path": path})
|
449
|
+
except ValueError as e:
|
450
|
+
raise ValueError(f"Error parsing diff: {e}")
|
451
|
+
except (OSError, IOError) as e:
|
452
|
+
raise OSError(f"Error applying diff to {path}: {e}")
|
346
453
|
except Exception as e:
|
347
|
-
raise
|
454
|
+
raise RuntimeError(f"Unexpected error applying diff to {path}: {e}")
|
348
455
|
|
349
456
|
|
350
|
-
def _parse_diff(
|
351
|
-
|
352
|
-
search_marker
|
353
|
-
meta_marker
|
354
|
-
separator
|
355
|
-
replace_marker
|
457
|
+
def _parse_diff(
|
458
|
+
diff: str,
|
459
|
+
search_marker: str,
|
460
|
+
meta_marker: str,
|
461
|
+
separator: str,
|
462
|
+
replace_marker: str,
|
463
|
+
) -> tuple[int, int, str, str]:
|
464
|
+
"""
|
465
|
+
Parse diff content into components.
|
466
|
+
Args:
|
467
|
+
diff: The diff content to parse
|
468
|
+
search_marker: Marker indicating the start of the search block
|
469
|
+
meta_marker: Marker indicating the start of the content to search for
|
470
|
+
separator: Marker separating search content from replacement content
|
471
|
+
replace_marker: Marker indicating the end of the replacement block
|
472
|
+
Returns:
|
473
|
+
Tuple of (start_line, end_line, search_content, replace_content)
|
474
|
+
Raises:
|
475
|
+
ValueError: If diff format is invalid or missing required markers
|
476
|
+
ValueError: If start_line or end_line cannot be parsed
|
477
|
+
"""
|
478
|
+
# Find all marker positions
|
356
479
|
search_start_idx = diff.find(search_marker)
|
357
480
|
meta_start_idx = diff.find(meta_marker)
|
358
481
|
separator_idx = diff.find(separator)
|
359
482
|
replace_end_idx = diff.find(replace_marker)
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
483
|
+
# Validate all markers are present
|
484
|
+
missing_markers = []
|
485
|
+
if search_start_idx == -1:
|
486
|
+
missing_markers.append("search marker")
|
487
|
+
if meta_start_idx == -1:
|
488
|
+
missing_markers.append("meta marker")
|
489
|
+
if separator_idx == -1:
|
490
|
+
missing_markers.append("separator")
|
491
|
+
if replace_end_idx == -1:
|
492
|
+
missing_markers.append("replace marker")
|
493
|
+
if missing_markers:
|
494
|
+
raise ValueError(f"Invalid diff format - missing: {', '.join(missing_markers)}")
|
495
|
+
# Extract metadata
|
365
496
|
meta_content = diff[search_start_idx + len(search_marker) : meta_start_idx].strip()
|
366
|
-
|
367
|
-
|
497
|
+
# Parse line numbers
|
498
|
+
start_line_match = re.search(r":start_line:(\d+)", meta_content)
|
499
|
+
end_line_match = re.search(r":end_line:(\d+)", meta_content)
|
500
|
+
if not start_line_match:
|
501
|
+
raise ValueError("Missing start_line in diff metadata")
|
502
|
+
if not end_line_match:
|
503
|
+
raise ValueError("Missing end_line in diff metadata")
|
504
|
+
start_line = int(start_line_match.group(1))
|
505
|
+
end_line = int(end_line_match.group(1))
|
506
|
+
# Extract content sections
|
368
507
|
search_content = diff[meta_start_idx + len(meta_marker) : separator_idx].strip(
|
369
508
|
"\r\n"
|
370
509
|
)
|
zrb/builtin/llm/tool/rag.py
CHANGED
@@ -4,6 +4,7 @@ import json
|
|
4
4
|
import os
|
5
5
|
import sys
|
6
6
|
from collections.abc import Callable
|
7
|
+
from textwrap import dedent
|
7
8
|
|
8
9
|
import ulid
|
9
10
|
|
@@ -20,6 +21,8 @@ from zrb.util.file import read_file
|
|
20
21
|
|
21
22
|
|
22
23
|
class RAGFileReader:
|
24
|
+
"""Helper class to define custom file readers based on glob patterns."""
|
25
|
+
|
23
26
|
def __init__(self, glob_pattern: str, read: Callable[[str], str]):
|
24
27
|
self.glob_pattern = glob_pattern
|
25
28
|
self.read = read
|
@@ -47,7 +50,14 @@ def create_rag_from_directory(
|
|
47
50
|
openai_base_url: str = RAG_EMBEDDING_BASE_URL,
|
48
51
|
openai_embedding_model: str = RAG_EMBEDDING_MODEL,
|
49
52
|
):
|
53
|
+
"""Create a RAG retrieval tool function for LLM use.
|
54
|
+
This factory configures and returns an async function that takes a query,
|
55
|
+
updates a vector database if needed, performs a similarity search,
|
56
|
+
and returns relevant document chunks.
|
57
|
+
"""
|
58
|
+
|
50
59
|
async def retrieve(query: str) -> str:
|
60
|
+
# Docstring will be set dynamically below
|
51
61
|
from chromadb import PersistentClient
|
52
62
|
from chromadb.config import Settings
|
53
63
|
from openai import OpenAI
|
@@ -141,7 +151,14 @@ def create_rag_from_directory(
|
|
141
151
|
return json.dumps(results)
|
142
152
|
|
143
153
|
retrieve.__name__ = tool_name
|
144
|
-
retrieve.__doc__ =
|
154
|
+
retrieve.__doc__ = dedent(
|
155
|
+
f"""{tool_description}
|
156
|
+
Args:
|
157
|
+
query (str): The user query to search for in documents.
|
158
|
+
Returns:
|
159
|
+
str: JSON string with search results: {{"ids": [...], "documents": [...], ...}}
|
160
|
+
"""
|
161
|
+
)
|
145
162
|
return retrieve
|
146
163
|
|
147
164
|
|
zrb/builtin/llm/tool/web.py
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
import json
|
2
2
|
from collections.abc import Callable
|
3
|
-
|
3
|
+
|
4
|
+
# Annotated import removed
|
4
5
|
|
5
6
|
|
6
7
|
async def open_web_page(url: str) -> str:
|
7
|
-
"""Get content from a web page
|
8
|
+
"""Get parsed text content and links from a web page URL.
|
9
|
+
Args:
|
10
|
+
url (str): The URL of the web page to open.
|
11
|
+
Returns:
|
12
|
+
str: JSON: {"content": "parsed text content", "links_on_page": ["url1", ...]}
|
13
|
+
"""
|
8
14
|
|
9
15
|
async def get_page_content(page_url: str):
|
10
16
|
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa
|
@@ -53,11 +59,14 @@ async def open_web_page(url: str) -> str:
|
|
53
59
|
|
54
60
|
|
55
61
|
def create_search_internet_tool(serp_api_key: str) -> Callable[[str, int], str]:
|
56
|
-
def search_internet(
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
62
|
+
def search_internet(query: str, num_results: int = 10) -> str:
|
63
|
+
"""Search the internet using SerpApi (Google Search) and return parsed results.
|
64
|
+
Args:
|
65
|
+
query (str): Search query.
|
66
|
+
num_results (int): Search result count. Defaults to 10.
|
67
|
+
Returns:
|
68
|
+
str: JSON: {"content": "parsed text content", "links_on_page": ["url1", ...]}
|
69
|
+
"""
|
61
70
|
import requests
|
62
71
|
|
63
72
|
response = requests.get(
|
@@ -82,8 +91,13 @@ def create_search_internet_tool(serp_api_key: str) -> Callable[[str, int], str]:
|
|
82
91
|
return search_internet
|
83
92
|
|
84
93
|
|
85
|
-
def search_wikipedia(query:
|
86
|
-
"""Search
|
94
|
+
def search_wikipedia(query: str) -> str:
|
95
|
+
"""Search Wikipedia using its API.
|
96
|
+
Args:
|
97
|
+
query (str): Search query.
|
98
|
+
Returns:
|
99
|
+
str: JSON from Wikipedia API: {"batchcomplete": ..., "query": {"search": [...]}}
|
100
|
+
"""
|
87
101
|
import requests
|
88
102
|
|
89
103
|
params = {"action": "query", "list": "search", "srsearch": query, "format": "json"}
|
@@ -91,11 +105,14 @@ def search_wikipedia(query: Annotated[str, "Search query"]) -> str:
|
|
91
105
|
return response.json()
|
92
106
|
|
93
107
|
|
94
|
-
def search_arxiv(
|
95
|
-
|
96
|
-
|
97
|
-
)
|
98
|
-
|
108
|
+
def search_arxiv(query: str, num_results: int = 10) -> str:
|
109
|
+
"""Search ArXiv for papers using its API.
|
110
|
+
Args:
|
111
|
+
query (str): Search query.
|
112
|
+
num_results (int): Search result count. Defaults to 10.
|
113
|
+
Returns:
|
114
|
+
str: XML string from ArXiv API containing search results.
|
115
|
+
"""
|
99
116
|
import requests
|
100
117
|
|
101
118
|
params = {"search_query": f"all:{query}", "start": 0, "max_results": num_results}
|