zrb 1.5.5__py3-none-any.whl → 1.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zrb/__init__.py +2 -0
- zrb/__main__.py +28 -2
- zrb/builtin/llm/history.py +73 -0
- zrb/builtin/llm/input.py +27 -0
- zrb/builtin/llm/llm_chat.py +4 -61
- zrb/builtin/llm/tool/api.py +39 -17
- zrb/builtin/llm/tool/cli.py +19 -5
- zrb/builtin/llm/tool/file.py +277 -137
- zrb/builtin/llm/tool/rag.py +18 -1
- zrb/builtin/llm/tool/web.py +31 -14
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/error.py +6 -8
- zrb/config.py +1 -0
- zrb/llm_config.py +81 -15
- zrb/task/llm/__init__.py +0 -0
- zrb/task/llm/agent_runner.py +53 -0
- zrb/task/llm/context_enricher.py +86 -0
- zrb/task/llm/default_context.py +45 -0
- zrb/task/llm/error.py +77 -0
- zrb/task/llm/history.py +92 -0
- zrb/task/llm/history_summarizer.py +71 -0
- zrb/task/llm/print_node.py +98 -0
- zrb/task/llm/tool_wrapper.py +88 -0
- zrb/task/llm_task.py +279 -246
- zrb/util/file.py +17 -2
- zrb/util/load.py +2 -0
- {zrb-1.5.5.dist-info → zrb-1.5.7.dist-info}/METADATA +1 -1
- {zrb-1.5.5.dist-info → zrb-1.5.7.dist-info}/RECORD +29 -18
- {zrb-1.5.5.dist-info → zrb-1.5.7.dist-info}/WHEEL +0 -0
- {zrb-1.5.5.dist-info → zrb-1.5.7.dist-info}/entry_points.txt +0 -0
zrb/builtin/llm/tool/file.py
CHANGED
@@ -2,51 +2,134 @@ import fnmatch
|
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
import re
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, Optional
|
6
6
|
|
7
|
-
from zrb.util.file import read_file
|
8
|
-
|
7
|
+
from zrb.util.file import read_file, read_file_with_line_numbers, write_file
|
8
|
+
|
9
|
+
DEFAULT_EXCLUDED_PATTERNS = [
|
10
|
+
# Common Python artifacts
|
11
|
+
"__pycache__",
|
12
|
+
"*.pyc",
|
13
|
+
"*.pyo",
|
14
|
+
"*.pyd",
|
15
|
+
".Python",
|
16
|
+
"build",
|
17
|
+
"develop-eggs",
|
18
|
+
"dist",
|
19
|
+
"downloads",
|
20
|
+
"eggs",
|
21
|
+
".eggs",
|
22
|
+
"lib",
|
23
|
+
"lib64",
|
24
|
+
"parts",
|
25
|
+
"sdist",
|
26
|
+
"var",
|
27
|
+
"wheels",
|
28
|
+
"share/python-wheels",
|
29
|
+
"*.egg-info",
|
30
|
+
".installed.cfg",
|
31
|
+
"*.egg",
|
32
|
+
"MANIFEST",
|
33
|
+
# Virtual environments
|
34
|
+
".env",
|
35
|
+
".venv",
|
36
|
+
"env",
|
37
|
+
"venv",
|
38
|
+
"ENV",
|
39
|
+
"VENV",
|
40
|
+
# Editor/IDE specific
|
41
|
+
".idea",
|
42
|
+
".vscode",
|
43
|
+
"*.swp",
|
44
|
+
"*.swo",
|
45
|
+
"*.swn",
|
46
|
+
# OS specific
|
47
|
+
".DS_Store",
|
48
|
+
"Thumbs.db",
|
49
|
+
# Version control
|
50
|
+
".git",
|
51
|
+
".hg",
|
52
|
+
".svn",
|
53
|
+
# Node.js
|
54
|
+
"node_modules",
|
55
|
+
"npm-debug.log*",
|
56
|
+
"yarn-debug.log*",
|
57
|
+
"yarn-error.log*",
|
58
|
+
# Test/Coverage artifacts
|
59
|
+
".history",
|
60
|
+
".tox",
|
61
|
+
".nox",
|
62
|
+
".coverage",
|
63
|
+
".coverage.*",
|
64
|
+
".cache",
|
65
|
+
".pytest_cache",
|
66
|
+
".hypothesis",
|
67
|
+
"htmlcov",
|
68
|
+
# Compiled files
|
69
|
+
"*.so",
|
70
|
+
"*.dylib",
|
71
|
+
"*.dll",
|
72
|
+
]
|
9
73
|
|
10
74
|
|
11
75
|
def list_files(
|
12
|
-
path: str = ".",
|
76
|
+
path: str = ".",
|
77
|
+
recursive: bool = True,
|
78
|
+
include_hidden: bool = False,
|
79
|
+
excluded_patterns: Optional[list[str]] = None,
|
13
80
|
) -> str:
|
14
|
-
"""
|
15
|
-
Request to list files and directories within the specified directory.
|
16
|
-
If recursive is true, it will list all files and directories recursively.
|
17
|
-
If recursive is false or not provided, it will only list the top-level contents.
|
81
|
+
"""List files/directories in a path, excluding specified patterns.
|
18
82
|
Args:
|
19
|
-
path
|
20
|
-
recursive
|
21
|
-
|
22
|
-
|
23
|
-
Defaults to
|
83
|
+
path (str): Path to list. Pass exactly as provided, including '~'. Defaults to ".".
|
84
|
+
recursive (bool): List recursively. Defaults to True.
|
85
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to False.
|
86
|
+
excluded_patterns (Optional[List[str]]): List of glob patterns to exclude.
|
87
|
+
Defaults to a comprehensive list of common temporary/artifact patterns.
|
24
88
|
Returns:
|
25
|
-
|
26
|
-
|
27
|
-
|
89
|
+
str: JSON string: {"files": ["file1.txt", ...]} or {"error": "..."}
|
90
|
+
Raises:
|
91
|
+
Exception: If an error occurs.
|
28
92
|
"""
|
29
|
-
all_files:
|
30
|
-
abs_path = os.path.abspath(path)
|
93
|
+
all_files: list[str] = []
|
94
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
95
|
+
# Determine effective exclusion patterns
|
96
|
+
patterns_to_exclude = (
|
97
|
+
excluded_patterns
|
98
|
+
if excluded_patterns is not None
|
99
|
+
else DEFAULT_EXCLUDED_PATTERNS
|
100
|
+
)
|
31
101
|
try:
|
32
102
|
if recursive:
|
33
|
-
for root, dirs, files in os.walk(abs_path):
|
34
|
-
#
|
35
|
-
dirs[:] = [
|
103
|
+
for root, dirs, files in os.walk(abs_path, topdown=True):
|
104
|
+
# Filter directories in-place
|
105
|
+
dirs[:] = [
|
106
|
+
d
|
107
|
+
for d in dirs
|
108
|
+
if (include_hidden or not _is_hidden(d))
|
109
|
+
and not _is_excluded(d, patterns_to_exclude)
|
110
|
+
]
|
111
|
+
# Process files
|
36
112
|
for filename in files:
|
37
|
-
|
38
|
-
|
39
|
-
|
113
|
+
if (
|
114
|
+
include_hidden or not _is_hidden(filename)
|
115
|
+
) and not _is_excluded(filename, patterns_to_exclude):
|
116
|
+
full_path = os.path.join(root, filename)
|
117
|
+
# Check rel path for patterns like '**/node_modules/*'
|
118
|
+
rel_full_path = os.path.relpath(full_path, abs_path)
|
119
|
+
is_rel_path_excluded = _is_excluded(
|
120
|
+
rel_full_path, patterns_to_exclude
|
121
|
+
)
|
122
|
+
if not is_rel_path_excluded:
|
123
|
+
all_files.append(full_path)
|
40
124
|
else:
|
41
125
|
# Non-recursive listing (top-level only)
|
42
126
|
for item in os.listdir(abs_path):
|
43
127
|
full_path = os.path.join(abs_path, item)
|
44
128
|
# Include both files and directories if not recursive
|
45
|
-
if include_hidden or not _is_hidden(
|
46
|
-
item
|
47
|
-
):
|
129
|
+
if (include_hidden or not _is_hidden(item)) and not _is_excluded(
|
130
|
+
item, patterns_to_exclude
|
131
|
+
):
|
48
132
|
all_files.append(full_path)
|
49
|
-
|
50
133
|
# Return paths relative to the original path requested
|
51
134
|
try:
|
52
135
|
rel_files = [
|
@@ -61,47 +144,62 @@ def list_files(
|
|
61
144
|
rel_files = all_files
|
62
145
|
return json.dumps({"files": sorted(rel_files)})
|
63
146
|
raise
|
147
|
+
except (OSError, IOError) as e:
|
148
|
+
raise OSError(f"Error listing files in {path}: {e}")
|
64
149
|
except Exception as e:
|
65
|
-
raise
|
150
|
+
raise RuntimeError(f"Unexpected error listing files in {path}: {e}")
|
66
151
|
|
67
152
|
|
68
153
|
def _is_hidden(path: str) -> bool:
|
69
|
-
"""
|
154
|
+
"""
|
155
|
+
Check if path is hidden (starts with '.').
|
156
|
+
Args:
|
157
|
+
path: File or directory path to check
|
158
|
+
Returns:
|
159
|
+
True if the path is hidden, False otherwise
|
160
|
+
"""
|
161
|
+
# Extract just the basename to check if it starts with a dot
|
70
162
|
return os.path.basename(path).startswith(".")
|
71
163
|
|
72
164
|
|
165
|
+
def _is_excluded(name: str, patterns: list[str]) -> bool:
|
166
|
+
"""Check if a name/path matches any exclusion patterns."""
|
167
|
+
for pattern in patterns:
|
168
|
+
if fnmatch.fnmatch(name, pattern):
|
169
|
+
return True
|
170
|
+
# Split the path using the OS path separator.
|
171
|
+
parts = name.split(os.path.sep)
|
172
|
+
# Check each part of the path.
|
173
|
+
for part in parts:
|
174
|
+
if fnmatch.fnmatch(part, pattern):
|
175
|
+
return True
|
176
|
+
return False
|
177
|
+
|
178
|
+
|
73
179
|
def read_from_file(
|
74
180
|
path: str,
|
75
181
|
start_line: Optional[int] = None,
|
76
182
|
end_line: Optional[int] = None,
|
77
183
|
) -> str:
|
78
|
-
"""
|
79
|
-
Request to read the contents of a file at the specified path. Use this when you need
|
80
|
-
to examine the contents of an existing file you do not know the contents of, for example
|
81
|
-
to analyze code, review text files, or extract information from configuration files.
|
82
|
-
The output includes line numbers prefixed to each line (e.g. "1 | const x = 1"),
|
83
|
-
making it easier to reference specific lines when creating diffs or discussing code.
|
84
|
-
By specifying start_line and end_line parameters, you can efficiently read specific
|
85
|
-
portions of large files without loading the entire file into memory. Automatically
|
86
|
-
extracts raw text from PDF and DOCX files. May not be suitable for other types of
|
87
|
-
binary files, as it returns the raw content as a string.
|
184
|
+
"""Read file content (or specific lines) at a path, including line numbers.
|
88
185
|
Args:
|
89
|
-
path
|
90
|
-
start_line
|
91
|
-
|
92
|
-
end_line
|
93
|
-
|
186
|
+
path (str): Path to read. Pass exactly as provided, including '~'.
|
187
|
+
start_line (Optional[int]): Starting line number (1-based).
|
188
|
+
Defaults to None (start of file).
|
189
|
+
end_line (Optional[int]): Ending line number (1-based, inclusive).
|
190
|
+
Defaults to None (end of file).
|
94
191
|
Returns:
|
95
|
-
|
96
|
-
|
97
|
-
|
192
|
+
str: JSON: {"path": "...", "content": "...", "start_line": N, ...} or {"error": "..."}
|
193
|
+
The content includes line numbers.
|
194
|
+
Raises:
|
195
|
+
Exception: If an error occurs.
|
98
196
|
"""
|
99
197
|
try:
|
100
|
-
abs_path = os.path.abspath(path)
|
198
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
101
199
|
# Check if file exists
|
102
200
|
if not os.path.exists(abs_path):
|
103
201
|
return json.dumps({"error": f"File {path} does not exist"})
|
104
|
-
content =
|
202
|
+
content = read_file_with_line_numbers(abs_path)
|
105
203
|
lines = content.splitlines()
|
106
204
|
total_lines = len(lines)
|
107
205
|
# Adjust line indices (convert from 1-based to 0-based)
|
@@ -126,31 +224,27 @@ def read_from_file(
|
|
126
224
|
"total_lines": total_lines,
|
127
225
|
}
|
128
226
|
)
|
227
|
+
except (OSError, IOError) as e:
|
228
|
+
raise OSError(f"Error reading file {path}: {e}")
|
129
229
|
except Exception as e:
|
130
|
-
raise
|
230
|
+
raise RuntimeError(f"Unexpected error reading file {path}: {e}")
|
131
231
|
|
132
232
|
|
133
|
-
def write_to_file(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
to
|
233
|
+
def write_to_file(
|
234
|
+
path: str,
|
235
|
+
content: str,
|
236
|
+
line_count: int,
|
237
|
+
) -> str:
|
238
|
+
"""Write full content to a file. Creates/overwrites file.
|
139
239
|
Args:
|
140
|
-
path
|
141
|
-
content
|
142
|
-
|
143
|
-
|
144
|
-
include the line numbers in the content though, just the actual content
|
145
|
-
of the file.
|
146
|
-
line_count: (required) The number of lines in the file. Make sure to compute
|
147
|
-
this based on the actual content of the file, not the number of lines
|
148
|
-
in the content you're providing.
|
240
|
+
path (str): Path to write. Pass exactly as provided, including '~'.
|
241
|
+
content (str): Full file content.
|
242
|
+
MUST be complete, no truncation/omissions. Exclude line numbers.
|
243
|
+
line_count (int): Number of lines in the provided content.
|
149
244
|
Returns:
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
Example error: '{"success": false, "error": "Permission denied: /etc/hosts"}'
|
245
|
+
str: JSON: {"success": true, "path": "f.txt", "warning": "..."} or {"error": "..."}
|
246
|
+
Raises:
|
247
|
+
Exception: If an error occurs.
|
154
248
|
"""
|
155
249
|
actual_lines = len(content.splitlines())
|
156
250
|
warning = None
|
@@ -160,55 +254,50 @@ def write_to_file(path: str, content: str, line_count: int) -> str:
|
|
160
254
|
f"content lines ({actual_lines}) for file {path}"
|
161
255
|
)
|
162
256
|
try:
|
163
|
-
abs_path = os.path.abspath(path)
|
257
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
164
258
|
# Ensure directory exists
|
165
259
|
directory = os.path.dirname(abs_path)
|
166
260
|
if directory and not os.path.exists(directory):
|
167
261
|
os.makedirs(directory, exist_ok=True)
|
168
|
-
|
262
|
+
write_file(abs_path, content)
|
169
263
|
result_data = {"success": True, "path": path}
|
170
264
|
if warning:
|
171
265
|
result_data["warning"] = warning
|
172
266
|
return json.dumps(result_data)
|
267
|
+
except (OSError, IOError) as e:
|
268
|
+
raise OSError(f"Error writing file {path}: {e}")
|
173
269
|
except Exception as e:
|
174
|
-
raise
|
270
|
+
raise RuntimeError(f"Unexpected error writing file {path}: {e}")
|
175
271
|
|
176
272
|
|
177
273
|
def search_files(
|
178
274
|
path: str,
|
179
275
|
regex: str,
|
180
276
|
file_pattern: Optional[str] = None,
|
181
|
-
include_hidden: bool =
|
277
|
+
include_hidden: bool = True,
|
182
278
|
) -> str:
|
183
|
-
"""
|
184
|
-
Request to perform a regex search across files in a specified directory,
|
185
|
-
providing context-rich results. This tool searches for patterns or specific
|
186
|
-
content across multiple files, displaying each match with encapsulating context.
|
279
|
+
"""Search files in a directory using regex, showing context.
|
187
280
|
Args:
|
188
|
-
path
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
If not provided, searches all files (*).
|
194
|
-
include_hidden: (optional) Whether to include hidden files.
|
195
|
-
Defaults to False (exclude hidden files).
|
281
|
+
path (str): Path to search. Pass exactly as provided, including '~'.
|
282
|
+
regex (str): Python regex pattern to search for.
|
283
|
+
file_pattern (Optional[str]): Glob pattern to filter files
|
284
|
+
(e.g., '*.py'). Defaults to None.
|
285
|
+
include_hidden (bool): Include hidden files/dirs. Defaults to True.
|
196
286
|
Returns:
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
Example error: '{"error": "Invalid regex: ..."}'
|
287
|
+
str: JSON: {"summary": "...", "results": [{"file":"f.py", ...}]} or {"error": "..."}
|
288
|
+
Raises:
|
289
|
+
Exception: If error occurs or regex is invalid.
|
201
290
|
"""
|
202
291
|
try:
|
203
292
|
pattern = re.compile(regex)
|
204
293
|
except re.error as e:
|
205
|
-
raise
|
294
|
+
raise ValueError(f"Invalid regex pattern: {e}")
|
206
295
|
search_results = {"summary": "", "results": []}
|
207
296
|
match_count = 0
|
208
297
|
searched_file_count = 0
|
209
298
|
file_match_count = 0
|
210
299
|
try:
|
211
|
-
abs_path = os.path.abspath(path)
|
300
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
212
301
|
for root, dirs, files in os.walk(abs_path):
|
213
302
|
# Skip hidden directories
|
214
303
|
dirs[:] = [d for d in dirs if include_hidden or not _is_hidden(d)]
|
@@ -244,14 +333,18 @@ def search_files(
|
|
244
333
|
f"Found {match_count} matches in {file_match_count} files "
|
245
334
|
f"(searched {searched_file_count} files)."
|
246
335
|
)
|
247
|
-
return json.dumps(
|
336
|
+
return json.dumps(
|
337
|
+
search_results
|
338
|
+
) # No need for pretty printing for LLM consumption
|
339
|
+
except (OSError, IOError) as e:
|
340
|
+
raise OSError(f"Error searching files in {path}: {e}")
|
248
341
|
except Exception as e:
|
249
|
-
raise
|
342
|
+
raise RuntimeError(f"Unexpected error searching files in {path}: {e}")
|
250
343
|
|
251
344
|
|
252
345
|
def _get_file_matches(
|
253
346
|
file_path: str, pattern: re.Pattern, context_lines: int = 2
|
254
|
-
) ->
|
347
|
+
) -> list[dict[str, Any]]:
|
255
348
|
"""Search for regex matches in a file with context."""
|
256
349
|
try:
|
257
350
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
@@ -274,44 +367,55 @@ def _get_file_matches(
|
|
274
367
|
}
|
275
368
|
matches.append(match_data)
|
276
369
|
return matches
|
370
|
+
except (OSError, IOError) as e:
|
371
|
+
raise IOError(f"Error reading {file_path}: {e}")
|
277
372
|
except Exception as e:
|
278
|
-
raise
|
373
|
+
raise RuntimeError(f"Unexpected error processing {file_path}: {e}")
|
279
374
|
|
280
375
|
|
281
|
-
def apply_diff(
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
If you're not confident in the exact content to search for, use the read_file tool
|
291
|
-
first to get the exact content.
|
376
|
+
def apply_diff(
|
377
|
+
path: str,
|
378
|
+
diff: str,
|
379
|
+
search_marker: str = "<<<<<< SEARCH",
|
380
|
+
meta_marker: str = "------",
|
381
|
+
separator: str = "======",
|
382
|
+
replace_marker: str = ">>>>>> REPLACE",
|
383
|
+
) -> str:
|
384
|
+
"""Apply a precise search/replace diff to a file.
|
292
385
|
Args:
|
293
|
-
path
|
294
|
-
diff
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
386
|
+
path (str): Path to modify. Pass exactly as provided, including '~'.
|
387
|
+
diff (str): Search/replace block defining changes (see format example below).
|
388
|
+
search_marker (str): Marker for start of search block. Defaults to "<<<<<< SEARCH".
|
389
|
+
meta_marker (str): Marker for start of content to search for. Defaults to "------".
|
390
|
+
separator (str): Marker separating search/replace content. Defaults to "======".
|
391
|
+
replace_marker (str): Marker for end of replacement block.
|
392
|
+
Defaults to ">>>>>> REPLACE".
|
393
|
+
SEARCH block must exactly match file content including whitespace/indentation.
|
394
|
+
SEARCH block should NOT contains line numbers
|
395
|
+
Format example:
|
396
|
+
[Search Marker, e.g., <<<<<< SEARCH]
|
397
|
+
:start_line:10
|
398
|
+
:end_line:15
|
399
|
+
[Meta Marker, e.g., ------]
|
400
|
+
[exact content to find including whitespace]
|
401
|
+
[Separator, e.g., ======]
|
402
|
+
[new content to replace with]
|
403
|
+
[Replace Marker, e.g., >>>>>> REPLACE]
|
304
404
|
Returns:
|
305
|
-
|
405
|
+
str: JSON: {"success": true, "path": "f.py"} or {"success": false, "error": "..."}
|
406
|
+
Raises:
|
407
|
+
Exception: If an error occurs.
|
306
408
|
"""
|
307
409
|
try:
|
308
|
-
start_line, end_line, search_content, replace_content = _parse_diff(
|
309
|
-
|
410
|
+
start_line, end_line, search_content, replace_content = _parse_diff(
|
411
|
+
diff, search_marker, meta_marker, separator, replace_marker
|
412
|
+
)
|
413
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
310
414
|
if not os.path.exists(abs_path):
|
311
415
|
return json.dumps(
|
312
416
|
{"success": False, "path": path, "error": f"File not found at {path}"}
|
313
417
|
)
|
314
|
-
content =
|
418
|
+
content = read_file(abs_path)
|
315
419
|
lines = content.splitlines()
|
316
420
|
if start_line < 1 or end_line > len(lines) or start_line > end_line:
|
317
421
|
return json.dumps(
|
@@ -341,30 +445,66 @@ def apply_diff(path: str, diff: str) -> str:
|
|
341
445
|
new_content = "\n".join(new_lines)
|
342
446
|
if content.endswith("\n"):
|
343
447
|
new_content += "\n"
|
344
|
-
|
448
|
+
write_file(abs_path, new_content)
|
345
449
|
return json.dumps({"success": True, "path": path})
|
450
|
+
except ValueError as e:
|
451
|
+
raise ValueError(f"Error parsing diff: {e}")
|
452
|
+
except (OSError, IOError) as e:
|
453
|
+
raise OSError(f"Error applying diff to {path}: {e}")
|
346
454
|
except Exception as e:
|
347
|
-
raise
|
455
|
+
raise RuntimeError(f"Unexpected error applying diff to {path}: {e}")
|
348
456
|
|
349
457
|
|
350
|
-
def _parse_diff(
|
351
|
-
|
352
|
-
search_marker
|
353
|
-
meta_marker
|
354
|
-
separator
|
355
|
-
replace_marker
|
458
|
+
def _parse_diff(
|
459
|
+
diff: str,
|
460
|
+
search_marker: str,
|
461
|
+
meta_marker: str,
|
462
|
+
separator: str,
|
463
|
+
replace_marker: str,
|
464
|
+
) -> tuple[int, int, str, str]:
|
465
|
+
"""
|
466
|
+
Parse diff content into components.
|
467
|
+
Args:
|
468
|
+
diff: The diff content to parse
|
469
|
+
search_marker: Marker indicating the start of the search block
|
470
|
+
meta_marker: Marker indicating the start of the content to search for
|
471
|
+
separator: Marker separating search content from replacement content
|
472
|
+
replace_marker: Marker indicating the end of the replacement block
|
473
|
+
Returns:
|
474
|
+
Tuple of (start_line, end_line, search_content, replace_content)
|
475
|
+
Raises:
|
476
|
+
ValueError: If diff format is invalid or missing required markers
|
477
|
+
ValueError: If start_line or end_line cannot be parsed
|
478
|
+
"""
|
479
|
+
# Find all marker positions
|
356
480
|
search_start_idx = diff.find(search_marker)
|
357
481
|
meta_start_idx = diff.find(meta_marker)
|
358
482
|
separator_idx = diff.find(separator)
|
359
483
|
replace_end_idx = diff.find(replace_marker)
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
484
|
+
# Validate all markers are present
|
485
|
+
missing_markers = []
|
486
|
+
if search_start_idx == -1:
|
487
|
+
missing_markers.append("search marker")
|
488
|
+
if meta_start_idx == -1:
|
489
|
+
missing_markers.append("meta marker")
|
490
|
+
if separator_idx == -1:
|
491
|
+
missing_markers.append("separator")
|
492
|
+
if replace_end_idx == -1:
|
493
|
+
missing_markers.append("replace marker")
|
494
|
+
if missing_markers:
|
495
|
+
raise ValueError(f"Invalid diff format - missing: {', '.join(missing_markers)}")
|
496
|
+
# Extract metadata
|
365
497
|
meta_content = diff[search_start_idx + len(search_marker) : meta_start_idx].strip()
|
366
|
-
|
367
|
-
|
498
|
+
# Parse line numbers
|
499
|
+
start_line_match = re.search(r":start_line:(\d+)", meta_content)
|
500
|
+
end_line_match = re.search(r":end_line:(\d+)", meta_content)
|
501
|
+
if not start_line_match:
|
502
|
+
raise ValueError("Missing start_line in diff metadata")
|
503
|
+
if not end_line_match:
|
504
|
+
raise ValueError("Missing end_line in diff metadata")
|
505
|
+
start_line = int(start_line_match.group(1))
|
506
|
+
end_line = int(end_line_match.group(1))
|
507
|
+
# Extract content sections
|
368
508
|
search_content = diff[meta_start_idx + len(meta_marker) : separator_idx].strip(
|
369
509
|
"\r\n"
|
370
510
|
)
|
zrb/builtin/llm/tool/rag.py
CHANGED
@@ -4,6 +4,7 @@ import json
|
|
4
4
|
import os
|
5
5
|
import sys
|
6
6
|
from collections.abc import Callable
|
7
|
+
from textwrap import dedent
|
7
8
|
|
8
9
|
import ulid
|
9
10
|
|
@@ -20,6 +21,8 @@ from zrb.util.file import read_file
|
|
20
21
|
|
21
22
|
|
22
23
|
class RAGFileReader:
|
24
|
+
"""Helper class to define custom file readers based on glob patterns."""
|
25
|
+
|
23
26
|
def __init__(self, glob_pattern: str, read: Callable[[str], str]):
|
24
27
|
self.glob_pattern = glob_pattern
|
25
28
|
self.read = read
|
@@ -47,7 +50,14 @@ def create_rag_from_directory(
|
|
47
50
|
openai_base_url: str = RAG_EMBEDDING_BASE_URL,
|
48
51
|
openai_embedding_model: str = RAG_EMBEDDING_MODEL,
|
49
52
|
):
|
53
|
+
"""Create a RAG retrieval tool function for LLM use.
|
54
|
+
This factory configures and returns an async function that takes a query,
|
55
|
+
updates a vector database if needed, performs a similarity search,
|
56
|
+
and returns relevant document chunks.
|
57
|
+
"""
|
58
|
+
|
50
59
|
async def retrieve(query: str) -> str:
|
60
|
+
# Docstring will be set dynamically below
|
51
61
|
from chromadb import PersistentClient
|
52
62
|
from chromadb.config import Settings
|
53
63
|
from openai import OpenAI
|
@@ -141,7 +151,14 @@ def create_rag_from_directory(
|
|
141
151
|
return json.dumps(results)
|
142
152
|
|
143
153
|
retrieve.__name__ = tool_name
|
144
|
-
retrieve.__doc__ =
|
154
|
+
retrieve.__doc__ = dedent(
|
155
|
+
f"""{tool_description}
|
156
|
+
Args:
|
157
|
+
query (str): The user query to search for in documents.
|
158
|
+
Returns:
|
159
|
+
str: JSON string with search results: {{"ids": [...], "documents": [...], ...}}
|
160
|
+
"""
|
161
|
+
)
|
145
162
|
return retrieve
|
146
163
|
|
147
164
|
|