zrb 1.5.5__py3-none-any.whl → 1.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,51 +2,134 @@ import fnmatch
2
2
  import json
3
3
  import os
4
4
  import re
5
- from typing import Any, Dict, List, Optional
5
+ from typing import Any, Optional
6
6
 
7
- from zrb.util.file import read_file as _read_file
8
- from zrb.util.file import write_file as _write_file
7
+ from zrb.util.file import read_file, read_file_with_line_numbers, write_file
8
+
9
+ DEFAULT_EXCLUDED_PATTERNS = [
10
+ # Common Python artifacts
11
+ "__pycache__",
12
+ "*.pyc",
13
+ "*.pyo",
14
+ "*.pyd",
15
+ ".Python",
16
+ "build",
17
+ "develop-eggs",
18
+ "dist",
19
+ "downloads",
20
+ "eggs",
21
+ ".eggs",
22
+ "lib",
23
+ "lib64",
24
+ "parts",
25
+ "sdist",
26
+ "var",
27
+ "wheels",
28
+ "share/python-wheels",
29
+ "*.egg-info",
30
+ ".installed.cfg",
31
+ "*.egg",
32
+ "MANIFEST",
33
+ # Virtual environments
34
+ ".env",
35
+ ".venv",
36
+ "env",
37
+ "venv",
38
+ "ENV",
39
+ "VENV",
40
+ # Editor/IDE specific
41
+ ".idea",
42
+ ".vscode",
43
+ "*.swp",
44
+ "*.swo",
45
+ "*.swn",
46
+ # OS specific
47
+ ".DS_Store",
48
+ "Thumbs.db",
49
+ # Version control
50
+ ".git",
51
+ ".hg",
52
+ ".svn",
53
+ # Node.js
54
+ "node_modules",
55
+ "npm-debug.log*",
56
+ "yarn-debug.log*",
57
+ "yarn-error.log*",
58
+ # Test/Coverage artifacts
59
+ ".history",
60
+ ".tox",
61
+ ".nox",
62
+ ".coverage",
63
+ ".coverage.*",
64
+ ".cache",
65
+ ".pytest_cache",
66
+ ".hypothesis",
67
+ "htmlcov",
68
+ # Compiled files
69
+ "*.so",
70
+ "*.dylib",
71
+ "*.dll",
72
+ ]
9
73
 
10
74
 
11
75
  def list_files(
12
- path: str = ".", recursive: bool = True, include_hidden: bool = False
76
+ path: str = ".",
77
+ recursive: bool = True,
78
+ include_hidden: bool = False,
79
+ excluded_patterns: Optional[list[str]] = None,
13
80
  ) -> str:
14
- """
15
- Request to list files and directories within the specified directory.
16
- If recursive is true, it will list all files and directories recursively.
17
- If recursive is false or not provided, it will only list the top-level contents.
81
+ """List files/directories in a path, excluding specified patterns.
18
82
  Args:
19
- path: (required) The path of the directory to list contents for (relative to the CWD)
20
- recursive: (optional) Whether to list files recursively.
21
- Use true for recursive listing, false or omit for top-level only.
22
- include_hidden: (optional) Whether to include hidden files/directories.
23
- Defaults to False (exclude hidden files).
83
+ path (str): Path to list. Pass exactly as provided, including '~'. Defaults to ".".
84
+ recursive (bool): List recursively. Defaults to True.
85
+ include_hidden (bool): Include hidden files/dirs. Defaults to False.
86
+ excluded_patterns (Optional[List[str]]): List of glob patterns to exclude.
87
+ Defaults to a comprehensive list of common temporary/artifact patterns.
24
88
  Returns:
25
- A JSON string containing a list of file paths or an error message.
26
- Example success: '{"files": ["file1.txt", "subdir/file2.py"]}'
27
- Example error: '{"error": "Error listing files: [Errno 2] No such file..."}'
89
+ str: JSON string: {"files": ["file1.txt", ...]} or {"error": "..."}
90
+ Raises:
91
+ Exception: If an error occurs.
28
92
  """
29
- all_files: List[str] = []
30
- abs_path = os.path.abspath(path)
93
+ all_files: list[str] = []
94
+ abs_path = os.path.abspath(os.path.expanduser(path))
95
+ # Determine effective exclusion patterns
96
+ patterns_to_exclude = (
97
+ excluded_patterns
98
+ if excluded_patterns is not None
99
+ else DEFAULT_EXCLUDED_PATTERNS
100
+ )
31
101
  try:
32
102
  if recursive:
33
- for root, dirs, files in os.walk(abs_path):
34
- # Skip hidden directories (like .git) for performance and relevance
35
- dirs[:] = [d for d in dirs if include_hidden or not _is_hidden(d)]
103
+ for root, dirs, files in os.walk(abs_path, topdown=True):
104
+ # Filter directories in-place
105
+ dirs[:] = [
106
+ d
107
+ for d in dirs
108
+ if (include_hidden or not _is_hidden(d))
109
+ and not _is_excluded(d, patterns_to_exclude)
110
+ ]
111
+ # Process files
36
112
  for filename in files:
37
- # Skip hidden files
38
- if include_hidden or not _is_hidden(filename):
39
- all_files.append(os.path.join(root, filename))
113
+ if (
114
+ include_hidden or not _is_hidden(filename)
115
+ ) and not _is_excluded(filename, patterns_to_exclude):
116
+ full_path = os.path.join(root, filename)
117
+ # Check rel path for patterns like '**/node_modules/*'
118
+ rel_full_path = os.path.relpath(full_path, abs_path)
119
+ is_rel_path_excluded = _is_excluded(
120
+ rel_full_path, patterns_to_exclude
121
+ )
122
+ if not is_rel_path_excluded:
123
+ all_files.append(full_path)
40
124
  else:
41
125
  # Non-recursive listing (top-level only)
42
126
  for item in os.listdir(abs_path):
43
127
  full_path = os.path.join(abs_path, item)
44
128
  # Include both files and directories if not recursive
45
- if include_hidden or not _is_hidden(
46
- item
47
- ): # Skip hidden items unless included
129
+ if (include_hidden or not _is_hidden(item)) and not _is_excluded(
130
+ item, patterns_to_exclude
131
+ ):
48
132
  all_files.append(full_path)
49
-
50
133
  # Return paths relative to the original path requested
51
134
  try:
52
135
  rel_files = [
@@ -61,47 +144,62 @@ def list_files(
61
144
  rel_files = all_files
62
145
  return json.dumps({"files": sorted(rel_files)})
63
146
  raise
147
+ except (OSError, IOError) as e:
148
+ raise OSError(f"Error listing files in {path}: {e}")
64
149
  except Exception as e:
65
- raise Exception(f"Error listing files in {path}: {e}")
150
+ raise RuntimeError(f"Unexpected error listing files in {path}: {e}")
66
151
 
67
152
 
68
153
  def _is_hidden(path: str) -> bool:
69
- """Check if path is hidden (starts with '.')."""
154
+ """
155
+ Check if path is hidden (starts with '.').
156
+ Args:
157
+ path: File or directory path to check
158
+ Returns:
159
+ True if the path is hidden, False otherwise
160
+ """
161
+ # Extract just the basename to check if it starts with a dot
70
162
  return os.path.basename(path).startswith(".")
71
163
 
72
164
 
165
+ def _is_excluded(name: str, patterns: list[str]) -> bool:
166
+ """Check if a name/path matches any exclusion patterns."""
167
+ for pattern in patterns:
168
+ if fnmatch.fnmatch(name, pattern):
169
+ return True
170
+ # Split the path using the OS path separator.
171
+ parts = name.split(os.path.sep)
172
+ # Check each part of the path.
173
+ for part in parts:
174
+ if fnmatch.fnmatch(part, pattern):
175
+ return True
176
+ return False
177
+
178
+
73
179
  def read_from_file(
74
180
  path: str,
75
181
  start_line: Optional[int] = None,
76
182
  end_line: Optional[int] = None,
77
183
  ) -> str:
78
- """
79
- Request to read the contents of a file at the specified path. Use this when you need
80
- to examine the contents of an existing file you do not know the contents of, for example
81
- to analyze code, review text files, or extract information from configuration files.
82
- The output includes line numbers prefixed to each line (e.g. "1 | const x = 1"),
83
- making it easier to reference specific lines when creating diffs or discussing code.
84
- By specifying start_line and end_line parameters, you can efficiently read specific
85
- portions of large files without loading the entire file into memory. Automatically
86
- extracts raw text from PDF and DOCX files. May not be suitable for other types of
87
- binary files, as it returns the raw content as a string.
184
+ """Read file content (or specific lines) at a path, including line numbers.
88
185
  Args:
89
- path: (required) The path of the file to read (relative to the CWD)
90
- start_line: (optional) The starting line number to read from (1-based).
91
- If not provided, it starts from the beginning of the file.
92
- end_line: (optional) The ending line number to read to (1-based, inclusive).
93
- If not provided, it reads to the end of the file.
186
+ path (str): Path to read. Pass exactly as provided, including '~'.
187
+ start_line (Optional[int]): Starting line number (1-based).
188
+ Defaults to None (start of file).
189
+ end_line (Optional[int]): Ending line number (1-based, inclusive).
190
+ Defaults to None (end of file).
94
191
  Returns:
95
- A JSON string containing the file path, content, and line range, or an error.
96
- Example success: '{"path": "f.py", "content": "...", "start_line": 1, "end_line": 2}'
97
- Example error: '{"error": "File not found: data.txt"}'
192
+ str: JSON: {"path": "...", "content": "...", "start_line": N, ...} or {"error": "..."}
193
+ The content includes line numbers.
194
+ Raises:
195
+ Exception: If an error occurs.
98
196
  """
99
197
  try:
100
- abs_path = os.path.abspath(path)
198
+ abs_path = os.path.abspath(os.path.expanduser(path))
101
199
  # Check if file exists
102
200
  if not os.path.exists(abs_path):
103
201
  return json.dumps({"error": f"File {path} does not exist"})
104
- content = _read_file(abs_path)
202
+ content = read_file_with_line_numbers(abs_path)
105
203
  lines = content.splitlines()
106
204
  total_lines = len(lines)
107
205
  # Adjust line indices (convert from 1-based to 0-based)
@@ -126,31 +224,27 @@ def read_from_file(
126
224
  "total_lines": total_lines,
127
225
  }
128
226
  )
227
+ except (OSError, IOError) as e:
228
+ raise OSError(f"Error reading file {path}: {e}")
129
229
  except Exception as e:
130
- raise Exception(f"Error reading file {path}: {e}")
230
+ raise RuntimeError(f"Unexpected error reading file {path}: {e}")
131
231
 
132
232
 
133
- def write_to_file(path: str, content: str, line_count: int) -> str:
134
- """
135
- Request to write full content to a file at the specified path. If the file exists,
136
- it will be overwritten with the provided content. If the file doesn't exist,
137
- it will be created. This tool will automatically create any directories needed
138
- to write the file.
233
+ def write_to_file(
234
+ path: str,
235
+ content: str,
236
+ line_count: int,
237
+ ) -> str:
238
+ """Write full content to a file. Creates/overwrites file.
139
239
  Args:
140
- path: (required) The path of the file to write to (relative to the CWD)
141
- content: (required) The content to write to the file. ALWAYS provide the COMPLETE
142
- intended content of the file, without any truncation or omissions. You MUST
143
- include ALL parts of the file, even if they haven't been modified. Do NOT
144
- include the line numbers in the content though, just the actual content
145
- of the file.
146
- line_count: (required) The number of lines in the file. Make sure to compute
147
- this based on the actual content of the file, not the number of lines
148
- in the content you're providing.
240
+ path (str): Path to write. Pass exactly as provided, including '~'.
241
+ content (str): Full file content.
242
+ MUST be complete, no truncation/omissions. Exclude line numbers.
243
+ line_count (int): Number of lines in the provided content.
149
244
  Returns:
150
- A JSON string indicating success or failure, including any warnings.
151
- Example success: '{"success": true, "path": "new_config.json"}'
152
- Example success with warning: '{"success": true, "path": "f.txt", "warning": "..."}'
153
- Example error: '{"success": false, "error": "Permission denied: /etc/hosts"}'
245
+ str: JSON: {"success": true, "path": "f.txt", "warning": "..."} or {"error": "..."}
246
+ Raises:
247
+ Exception: If an error occurs.
154
248
  """
155
249
  actual_lines = len(content.splitlines())
156
250
  warning = None
@@ -160,55 +254,50 @@ def write_to_file(path: str, content: str, line_count: int) -> str:
160
254
  f"content lines ({actual_lines}) for file {path}"
161
255
  )
162
256
  try:
163
- abs_path = os.path.abspath(path)
257
+ abs_path = os.path.abspath(os.path.expanduser(path))
164
258
  # Ensure directory exists
165
259
  directory = os.path.dirname(abs_path)
166
260
  if directory and not os.path.exists(directory):
167
261
  os.makedirs(directory, exist_ok=True)
168
- _write_file(abs_path, content)
262
+ write_file(abs_path, content)
169
263
  result_data = {"success": True, "path": path}
170
264
  if warning:
171
265
  result_data["warning"] = warning
172
266
  return json.dumps(result_data)
267
+ except (OSError, IOError) as e:
268
+ raise OSError(f"Error writing file {path}: {e}")
173
269
  except Exception as e:
174
- raise Exception(f"Error writing file {e}")
270
+ raise RuntimeError(f"Unexpected error writing file {path}: {e}")
175
271
 
176
272
 
177
273
  def search_files(
178
274
  path: str,
179
275
  regex: str,
180
276
  file_pattern: Optional[str] = None,
181
- include_hidden: bool = False,
277
+ include_hidden: bool = True,
182
278
  ) -> str:
183
- """
184
- Request to perform a regex search across files in a specified directory,
185
- providing context-rich results. This tool searches for patterns or specific
186
- content across multiple files, displaying each match with encapsulating context.
279
+ """Search files in a directory using regex, showing context.
187
280
  Args:
188
- path: (required) The path of the directory to search in (relative to the CWD).
189
- This directory will be recursively searched.
190
- regex: (required) The regular expression pattern to search for. Uses Rust regex syntax.
191
- (Note: Python's `re` module will be used here, which has similar syntax)
192
- file_pattern: (optional) Glob pattern to filter files (e.g., '*.ts').
193
- If not provided, searches all files (*).
194
- include_hidden: (optional) Whether to include hidden files.
195
- Defaults to False (exclude hidden files).
281
+ path (str): Path to search. Pass exactly as provided, including '~'.
282
+ regex (str): Python regex pattern to search for.
283
+ file_pattern (Optional[str]): Glob pattern to filter files
284
+ (e.g., '*.py'). Defaults to None.
285
+ include_hidden (bool): Include hidden files/dirs. Defaults to True.
196
286
  Returns:
197
- A JSON string containing the search results or an error message.
198
- Example success: '{"summary": "Found 5 matches...", "results": [{"file":"f.py", ...}]}'
199
- Example no match: '{"summary": "No matches found...", "results": []}'
200
- Example error: '{"error": "Invalid regex: ..."}'
287
+ str: JSON: {"summary": "...", "results": [{"file":"f.py", ...}]} or {"error": "..."}
288
+ Raises:
289
+ Exception: If error occurs or regex is invalid.
201
290
  """
202
291
  try:
203
292
  pattern = re.compile(regex)
204
293
  except re.error as e:
205
- raise Exception(f"Invalid regex pattern: {e}")
294
+ raise ValueError(f"Invalid regex pattern: {e}")
206
295
  search_results = {"summary": "", "results": []}
207
296
  match_count = 0
208
297
  searched_file_count = 0
209
298
  file_match_count = 0
210
299
  try:
211
- abs_path = os.path.abspath(path)
300
+ abs_path = os.path.abspath(os.path.expanduser(path))
212
301
  for root, dirs, files in os.walk(abs_path):
213
302
  # Skip hidden directories
214
303
  dirs[:] = [d for d in dirs if include_hidden or not _is_hidden(d)]
@@ -244,14 +333,18 @@ def search_files(
244
333
  f"Found {match_count} matches in {file_match_count} files "
245
334
  f"(searched {searched_file_count} files)."
246
335
  )
247
- return json.dumps(search_results, indent=2) # Pretty print for readability
336
+ return json.dumps(
337
+ search_results
338
+ ) # No need for pretty printing for LLM consumption
339
+ except (OSError, IOError) as e:
340
+ raise OSError(f"Error searching files in {path}: {e}")
248
341
  except Exception as e:
249
- raise Exception(f"Error searching files: {e}")
342
+ raise RuntimeError(f"Unexpected error searching files in {path}: {e}")
250
343
 
251
344
 
252
345
  def _get_file_matches(
253
346
  file_path: str, pattern: re.Pattern, context_lines: int = 2
254
- ) -> List[Dict[str, Any]]:
347
+ ) -> list[dict[str, Any]]:
255
348
  """Search for regex matches in a file with context."""
256
349
  try:
257
350
  with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
@@ -274,44 +367,55 @@ def _get_file_matches(
274
367
  }
275
368
  matches.append(match_data)
276
369
  return matches
370
+ except (OSError, IOError) as e:
371
+ raise IOError(f"Error reading {file_path}: {e}")
277
372
  except Exception as e:
278
- raise IOError(f"Error reading {file_path}: {str(e)}")
373
+ raise RuntimeError(f"Unexpected error processing {file_path}: {e}")
279
374
 
280
375
 
281
- def apply_diff(path: str, diff: str) -> str:
282
- """
283
- Request to replace existing code using a search and replace block.
284
- This tool allows for precise, surgical replaces to files by specifying exactly
285
- what content to search for and what to replace it with.
286
- The tool will maintain proper indentation and formatting while making changes.
287
- Only a single operation is allowed per tool use.
288
- The SEARCH section must exactly match existing content including whitespace
289
- and indentation.
290
- If you're not confident in the exact content to search for, use the read_file tool
291
- first to get the exact content.
376
+ def apply_diff(
377
+ path: str,
378
+ diff: str,
379
+ search_marker: str = "<<<<<< SEARCH",
380
+ meta_marker: str = "------",
381
+ separator: str = "======",
382
+ replace_marker: str = ">>>>>> REPLACE",
383
+ ) -> str:
384
+ """Apply a precise search/replace diff to a file.
292
385
  Args:
293
- path: (required) The path of the file to modify (relative to the CWD)
294
- diff: (required) The search/replace block defining the changes.
295
- Format:
296
- <<<<<<< SEARCH
297
- :start_line:START_LINE_NUMBER
298
- :end_line:END_LINE_NUMBER
299
- -------
300
- [exact content to find including whitespace]
301
- =======
302
- [new content to replace with]
303
- >>>>>>> REPLACE
386
+ path (str): Path to modify. Pass exactly as provided, including '~'.
387
+ diff (str): Search/replace block defining changes (see format example below).
388
+ search_marker (str): Marker for start of search block. Defaults to "<<<<<< SEARCH".
389
+ meta_marker (str): Marker for start of content to search for. Defaults to "------".
390
+ separator (str): Marker separating search/replace content. Defaults to "======".
391
+ replace_marker (str): Marker for end of replacement block.
392
+ Defaults to ">>>>>> REPLACE".
393
+ SEARCH block must exactly match file content including whitespace/indentation.
394
+ SEARCH block should NOT contains line numbers
395
+ Format example:
396
+ [Search Marker, e.g., <<<<<< SEARCH]
397
+ :start_line:10
398
+ :end_line:15
399
+ [Meta Marker, e.g., ------]
400
+ [exact content to find including whitespace]
401
+ [Separator, e.g., ======]
402
+ [new content to replace with]
403
+ [Replace Marker, e.g., >>>>>> REPLACE]
304
404
  Returns:
305
- A JSON string indicating success or failure.
405
+ str: JSON: {"success": true, "path": "f.py"} or {"success": false, "error": "..."}
406
+ Raises:
407
+ Exception: If an error occurs.
306
408
  """
307
409
  try:
308
- start_line, end_line, search_content, replace_content = _parse_diff(diff)
309
- abs_path = os.path.abspath(path)
410
+ start_line, end_line, search_content, replace_content = _parse_diff(
411
+ diff, search_marker, meta_marker, separator, replace_marker
412
+ )
413
+ abs_path = os.path.abspath(os.path.expanduser(path))
310
414
  if not os.path.exists(abs_path):
311
415
  return json.dumps(
312
416
  {"success": False, "path": path, "error": f"File not found at {path}"}
313
417
  )
314
- content = _read_file(abs_path)
418
+ content = read_file(abs_path)
315
419
  lines = content.splitlines()
316
420
  if start_line < 1 or end_line > len(lines) or start_line > end_line:
317
421
  return json.dumps(
@@ -341,30 +445,66 @@ def apply_diff(path: str, diff: str) -> str:
341
445
  new_content = "\n".join(new_lines)
342
446
  if content.endswith("\n"):
343
447
  new_content += "\n"
344
- _write_file(abs_path, new_content)
448
+ write_file(abs_path, new_content)
345
449
  return json.dumps({"success": True, "path": path})
450
+ except ValueError as e:
451
+ raise ValueError(f"Error parsing diff: {e}")
452
+ except (OSError, IOError) as e:
453
+ raise OSError(f"Error applying diff to {path}: {e}")
346
454
  except Exception as e:
347
- raise Exception(f"Error applying diff on {path}: {e}")
455
+ raise RuntimeError(f"Unexpected error applying diff to {path}: {e}")
348
456
 
349
457
 
350
- def _parse_diff(diff: str) -> tuple[int, int, str, str]:
351
- """Parse diff content into components."""
352
- search_marker = "<<<<<<< SEARCH"
353
- meta_marker = "-------"
354
- separator = "======="
355
- replace_marker = ">>>>>>> REPLACE"
458
+ def _parse_diff(
459
+ diff: str,
460
+ search_marker: str,
461
+ meta_marker: str,
462
+ separator: str,
463
+ replace_marker: str,
464
+ ) -> tuple[int, int, str, str]:
465
+ """
466
+ Parse diff content into components.
467
+ Args:
468
+ diff: The diff content to parse
469
+ search_marker: Marker indicating the start of the search block
470
+ meta_marker: Marker indicating the start of the content to search for
471
+ separator: Marker separating search content from replacement content
472
+ replace_marker: Marker indicating the end of the replacement block
473
+ Returns:
474
+ Tuple of (start_line, end_line, search_content, replace_content)
475
+ Raises:
476
+ ValueError: If diff format is invalid or missing required markers
477
+ ValueError: If start_line or end_line cannot be parsed
478
+ """
479
+ # Find all marker positions
356
480
  search_start_idx = diff.find(search_marker)
357
481
  meta_start_idx = diff.find(meta_marker)
358
482
  separator_idx = diff.find(separator)
359
483
  replace_end_idx = diff.find(replace_marker)
360
- if any(
361
- idx == -1
362
- for idx in [search_start_idx, meta_start_idx, separator_idx, replace_end_idx]
363
- ):
364
- raise ValueError("Invalid diff format - missing markers")
484
+ # Validate all markers are present
485
+ missing_markers = []
486
+ if search_start_idx == -1:
487
+ missing_markers.append("search marker")
488
+ if meta_start_idx == -1:
489
+ missing_markers.append("meta marker")
490
+ if separator_idx == -1:
491
+ missing_markers.append("separator")
492
+ if replace_end_idx == -1:
493
+ missing_markers.append("replace marker")
494
+ if missing_markers:
495
+ raise ValueError(f"Invalid diff format - missing: {', '.join(missing_markers)}")
496
+ # Extract metadata
365
497
  meta_content = diff[search_start_idx + len(search_marker) : meta_start_idx].strip()
366
- start_line = int(re.search(r":start_line:(\d+)", meta_content).group(1))
367
- end_line = int(re.search(r":end_line:(\d+)", meta_content).group(1))
498
+ # Parse line numbers
499
+ start_line_match = re.search(r":start_line:(\d+)", meta_content)
500
+ end_line_match = re.search(r":end_line:(\d+)", meta_content)
501
+ if not start_line_match:
502
+ raise ValueError("Missing start_line in diff metadata")
503
+ if not end_line_match:
504
+ raise ValueError("Missing end_line in diff metadata")
505
+ start_line = int(start_line_match.group(1))
506
+ end_line = int(end_line_match.group(1))
507
+ # Extract content sections
368
508
  search_content = diff[meta_start_idx + len(meta_marker) : separator_idx].strip(
369
509
  "\r\n"
370
510
  )
@@ -4,6 +4,7 @@ import json
4
4
  import os
5
5
  import sys
6
6
  from collections.abc import Callable
7
+ from textwrap import dedent
7
8
 
8
9
  import ulid
9
10
 
@@ -20,6 +21,8 @@ from zrb.util.file import read_file
20
21
 
21
22
 
22
23
  class RAGFileReader:
24
+ """Helper class to define custom file readers based on glob patterns."""
25
+
23
26
  def __init__(self, glob_pattern: str, read: Callable[[str], str]):
24
27
  self.glob_pattern = glob_pattern
25
28
  self.read = read
@@ -47,7 +50,14 @@ def create_rag_from_directory(
47
50
  openai_base_url: str = RAG_EMBEDDING_BASE_URL,
48
51
  openai_embedding_model: str = RAG_EMBEDDING_MODEL,
49
52
  ):
53
+ """Create a RAG retrieval tool function for LLM use.
54
+ This factory configures and returns an async function that takes a query,
55
+ updates a vector database if needed, performs a similarity search,
56
+ and returns relevant document chunks.
57
+ """
58
+
50
59
  async def retrieve(query: str) -> str:
60
+ # Docstring will be set dynamically below
51
61
  from chromadb import PersistentClient
52
62
  from chromadb.config import Settings
53
63
  from openai import OpenAI
@@ -141,7 +151,14 @@ def create_rag_from_directory(
141
151
  return json.dumps(results)
142
152
 
143
153
  retrieve.__name__ = tool_name
144
- retrieve.__doc__ = tool_description
154
+ retrieve.__doc__ = dedent(
155
+ f"""{tool_description}
156
+ Args:
157
+ query (str): The user query to search for in documents.
158
+ Returns:
159
+ str: JSON string with search results: {{"ids": [...], "documents": [...], ...}}
160
+ """
161
+ )
145
162
  return retrieve
146
163
 
147
164