zrb 1.21.29__py3-none-any.whl → 2.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of zrb might be problematic. Click here for more details.

Files changed (192) hide show
  1. zrb/__init__.py +118 -129
  2. zrb/builtin/__init__.py +54 -2
  3. zrb/builtin/llm/chat.py +147 -0
  4. zrb/callback/callback.py +8 -1
  5. zrb/cmd/cmd_result.py +2 -1
  6. zrb/config/config.py +491 -280
  7. zrb/config/helper.py +84 -0
  8. zrb/config/web_auth_config.py +50 -35
  9. zrb/context/any_shared_context.py +13 -2
  10. zrb/context/context.py +31 -3
  11. zrb/context/print_fn.py +13 -0
  12. zrb/context/shared_context.py +14 -1
  13. zrb/input/option_input.py +30 -2
  14. zrb/llm/agent/__init__.py +9 -0
  15. zrb/llm/agent/agent.py +215 -0
  16. zrb/llm/agent/summarizer.py +20 -0
  17. zrb/llm/app/__init__.py +10 -0
  18. zrb/llm/app/completion.py +281 -0
  19. zrb/llm/app/confirmation/allow_tool.py +66 -0
  20. zrb/llm/app/confirmation/handler.py +178 -0
  21. zrb/llm/app/confirmation/replace_confirmation.py +77 -0
  22. zrb/llm/app/keybinding.py +34 -0
  23. zrb/llm/app/layout.py +117 -0
  24. zrb/llm/app/lexer.py +155 -0
  25. zrb/llm/app/redirection.py +28 -0
  26. zrb/llm/app/style.py +16 -0
  27. zrb/llm/app/ui.py +733 -0
  28. zrb/llm/config/__init__.py +4 -0
  29. zrb/llm/config/config.py +122 -0
  30. zrb/llm/config/limiter.py +247 -0
  31. zrb/llm/history_manager/__init__.py +4 -0
  32. zrb/llm/history_manager/any_history_manager.py +23 -0
  33. zrb/llm/history_manager/file_history_manager.py +91 -0
  34. zrb/llm/history_processor/summarizer.py +108 -0
  35. zrb/llm/note/__init__.py +3 -0
  36. zrb/llm/note/manager.py +122 -0
  37. zrb/llm/prompt/__init__.py +29 -0
  38. zrb/llm/prompt/claude_compatibility.py +92 -0
  39. zrb/llm/prompt/compose.py +55 -0
  40. zrb/llm/prompt/default.py +51 -0
  41. zrb/llm/prompt/markdown/mandate.md +23 -0
  42. zrb/llm/prompt/markdown/persona.md +3 -0
  43. zrb/llm/prompt/markdown/summarizer.md +21 -0
  44. zrb/llm/prompt/note.py +41 -0
  45. zrb/llm/prompt/system_context.py +46 -0
  46. zrb/llm/prompt/zrb.py +41 -0
  47. zrb/llm/skill/__init__.py +3 -0
  48. zrb/llm/skill/manager.py +86 -0
  49. zrb/llm/task/__init__.py +4 -0
  50. zrb/llm/task/llm_chat_task.py +316 -0
  51. zrb/llm/task/llm_task.py +245 -0
  52. zrb/llm/tool/__init__.py +39 -0
  53. zrb/llm/tool/bash.py +75 -0
  54. zrb/llm/tool/code.py +266 -0
  55. zrb/llm/tool/file.py +419 -0
  56. zrb/llm/tool/note.py +70 -0
  57. zrb/{builtin/llm → llm}/tool/rag.py +8 -5
  58. zrb/llm/tool/search/brave.py +53 -0
  59. zrb/llm/tool/search/searxng.py +47 -0
  60. zrb/llm/tool/search/serpapi.py +47 -0
  61. zrb/llm/tool/skill.py +19 -0
  62. zrb/llm/tool/sub_agent.py +70 -0
  63. zrb/llm/tool/web.py +97 -0
  64. zrb/llm/tool/zrb_task.py +66 -0
  65. zrb/llm/util/attachment.py +101 -0
  66. zrb/llm/util/prompt.py +104 -0
  67. zrb/llm/util/stream_response.py +178 -0
  68. zrb/session/any_session.py +0 -3
  69. zrb/session/session.py +1 -1
  70. zrb/task/base/context.py +25 -13
  71. zrb/task/base/execution.py +52 -47
  72. zrb/task/base/lifecycle.py +7 -4
  73. zrb/task/base_task.py +48 -49
  74. zrb/task/base_trigger.py +4 -1
  75. zrb/task/cmd_task.py +6 -0
  76. zrb/task/http_check.py +11 -5
  77. zrb/task/make_task.py +3 -0
  78. zrb/task/rsync_task.py +5 -0
  79. zrb/task/scaffolder.py +7 -4
  80. zrb/task/scheduler.py +3 -0
  81. zrb/task/tcp_check.py +6 -4
  82. zrb/util/ascii_art/art/bee.txt +17 -0
  83. zrb/util/ascii_art/art/cat.txt +9 -0
  84. zrb/util/ascii_art/art/ghost.txt +16 -0
  85. zrb/util/ascii_art/art/panda.txt +17 -0
  86. zrb/util/ascii_art/art/rose.txt +14 -0
  87. zrb/util/ascii_art/art/unicorn.txt +15 -0
  88. zrb/util/ascii_art/banner.py +92 -0
  89. zrb/util/cli/markdown.py +22 -2
  90. zrb/util/cmd/command.py +33 -10
  91. zrb/util/file.py +51 -32
  92. zrb/util/match.py +78 -0
  93. zrb/util/run.py +3 -3
  94. {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/METADATA +9 -15
  95. {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/RECORD +100 -128
  96. zrb/attr/__init__.py +0 -0
  97. zrb/builtin/llm/attachment.py +0 -40
  98. zrb/builtin/llm/chat_completion.py +0 -274
  99. zrb/builtin/llm/chat_session.py +0 -270
  100. zrb/builtin/llm/chat_session_cmd.py +0 -288
  101. zrb/builtin/llm/chat_trigger.py +0 -79
  102. zrb/builtin/llm/history.py +0 -71
  103. zrb/builtin/llm/input.py +0 -27
  104. zrb/builtin/llm/llm_ask.py +0 -269
  105. zrb/builtin/llm/previous-session.js +0 -21
  106. zrb/builtin/llm/tool/__init__.py +0 -0
  107. zrb/builtin/llm/tool/api.py +0 -75
  108. zrb/builtin/llm/tool/cli.py +0 -52
  109. zrb/builtin/llm/tool/code.py +0 -236
  110. zrb/builtin/llm/tool/file.py +0 -560
  111. zrb/builtin/llm/tool/note.py +0 -84
  112. zrb/builtin/llm/tool/sub_agent.py +0 -150
  113. zrb/builtin/llm/tool/web.py +0 -171
  114. zrb/builtin/project/__init__.py +0 -0
  115. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/__init__.py +0 -0
  116. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/module/template/app_template/module/my_module/service/__init__.py +0 -0
  117. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/__init__.py +0 -0
  118. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/__init__.py +0 -0
  119. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/__init__.py +0 -0
  120. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/permission/__init__.py +0 -0
  121. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/role/__init__.py +0 -0
  122. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/user/__init__.py +0 -0
  123. zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/schema/__init__.py +0 -0
  124. zrb/builtin/project/create/__init__.py +0 -0
  125. zrb/builtin/shell/__init__.py +0 -0
  126. zrb/builtin/shell/autocomplete/__init__.py +0 -0
  127. zrb/callback/__init__.py +0 -0
  128. zrb/cmd/__init__.py +0 -0
  129. zrb/config/default_prompt/interactive_system_prompt.md +0 -29
  130. zrb/config/default_prompt/persona.md +0 -1
  131. zrb/config/default_prompt/summarization_prompt.md +0 -57
  132. zrb/config/default_prompt/system_prompt.md +0 -38
  133. zrb/config/llm_config.py +0 -339
  134. zrb/config/llm_context/config.py +0 -166
  135. zrb/config/llm_context/config_parser.py +0 -40
  136. zrb/config/llm_context/workflow.py +0 -81
  137. zrb/config/llm_rate_limitter.py +0 -190
  138. zrb/content_transformer/__init__.py +0 -0
  139. zrb/context/__init__.py +0 -0
  140. zrb/dot_dict/__init__.py +0 -0
  141. zrb/env/__init__.py +0 -0
  142. zrb/group/__init__.py +0 -0
  143. zrb/input/__init__.py +0 -0
  144. zrb/runner/__init__.py +0 -0
  145. zrb/runner/web_route/__init__.py +0 -0
  146. zrb/runner/web_route/home_page/__init__.py +0 -0
  147. zrb/session/__init__.py +0 -0
  148. zrb/session_state_log/__init__.py +0 -0
  149. zrb/session_state_logger/__init__.py +0 -0
  150. zrb/task/__init__.py +0 -0
  151. zrb/task/base/__init__.py +0 -0
  152. zrb/task/llm/__init__.py +0 -0
  153. zrb/task/llm/agent.py +0 -204
  154. zrb/task/llm/agent_runner.py +0 -152
  155. zrb/task/llm/config.py +0 -122
  156. zrb/task/llm/conversation_history.py +0 -209
  157. zrb/task/llm/conversation_history_model.py +0 -67
  158. zrb/task/llm/default_workflow/coding/workflow.md +0 -41
  159. zrb/task/llm/default_workflow/copywriting/workflow.md +0 -68
  160. zrb/task/llm/default_workflow/git/workflow.md +0 -118
  161. zrb/task/llm/default_workflow/golang/workflow.md +0 -128
  162. zrb/task/llm/default_workflow/html-css/workflow.md +0 -135
  163. zrb/task/llm/default_workflow/java/workflow.md +0 -146
  164. zrb/task/llm/default_workflow/javascript/workflow.md +0 -158
  165. zrb/task/llm/default_workflow/python/workflow.md +0 -160
  166. zrb/task/llm/default_workflow/researching/workflow.md +0 -153
  167. zrb/task/llm/default_workflow/rust/workflow.md +0 -162
  168. zrb/task/llm/default_workflow/shell/workflow.md +0 -299
  169. zrb/task/llm/error.py +0 -95
  170. zrb/task/llm/file_replacement.py +0 -206
  171. zrb/task/llm/file_tool_model.py +0 -57
  172. zrb/task/llm/history_processor.py +0 -206
  173. zrb/task/llm/history_summarization.py +0 -25
  174. zrb/task/llm/print_node.py +0 -221
  175. zrb/task/llm/prompt.py +0 -321
  176. zrb/task/llm/subagent_conversation_history.py +0 -41
  177. zrb/task/llm/tool_wrapper.py +0 -361
  178. zrb/task/llm/typing.py +0 -3
  179. zrb/task/llm/workflow.py +0 -76
  180. zrb/task/llm_task.py +0 -379
  181. zrb/task_status/__init__.py +0 -0
  182. zrb/util/__init__.py +0 -0
  183. zrb/util/cli/__init__.py +0 -0
  184. zrb/util/cmd/__init__.py +0 -0
  185. zrb/util/codemod/__init__.py +0 -0
  186. zrb/util/string/__init__.py +0 -0
  187. zrb/xcom/__init__.py +0 -0
  188. /zrb/{config/default_prompt/file_extractor_system_prompt.md → llm/prompt/markdown/file_extractor.md} +0 -0
  189. /zrb/{config/default_prompt/repo_extractor_system_prompt.md → llm/prompt/markdown/repo_extractor.md} +0 -0
  190. /zrb/{config/default_prompt/repo_summarizer_system_prompt.md → llm/prompt/markdown/repo_summarizer.md} +0 -0
  191. {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/WHEEL +0 -0
  192. {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/entry_points.txt +0 -0
zrb/llm/tool/code.py ADDED
@@ -0,0 +1,266 @@
1
+ import fnmatch
2
+ import json
3
+ import os
4
+
5
+ from zrb.config.config import CFG
6
+ from zrb.llm.agent.agent import create_agent, run_agent
7
+ from zrb.llm.config.config import llm_config
8
+ from zrb.llm.config.limiter import llm_limiter
9
+ from zrb.llm.prompt.default import (
10
+ get_repo_extractor_system_prompt,
11
+ get_repo_summarizer_system_prompt,
12
+ )
13
+ from zrb.llm.tool.file import DEFAULT_EXCLUDED_PATTERNS
14
+
15
+ _DEFAULT_EXTENSIONS = [
16
+ "py",
17
+ "go",
18
+ "java",
19
+ "ts",
20
+ "js",
21
+ "rs",
22
+ "rb",
23
+ "php",
24
+ "sh",
25
+ "bash",
26
+ "c",
27
+ "cpp",
28
+ "h",
29
+ "hpp",
30
+ "cs",
31
+ "swift",
32
+ "kt",
33
+ "scala",
34
+ "m",
35
+ "pl",
36
+ "lua",
37
+ "sql",
38
+ "html",
39
+ "css",
40
+ "scss",
41
+ "less",
42
+ "json",
43
+ "yaml",
44
+ "yml",
45
+ "toml",
46
+ "ini",
47
+ "xml",
48
+ "md",
49
+ "rst",
50
+ "txt",
51
+ ]
52
+
53
+
54
+ async def analyze_code(
55
+ path: str,
56
+ query: str,
57
+ extensions: list[str] | None = None,
58
+ exclude_patterns: list[str] | None = None,
59
+ ) -> str:
60
+ """
61
+ Performs a deep, semantic analysis of an entire codebase or directory to answer complex architectural or logic-based questions.
62
+ This tool uses a 'map-reduce' strategy to handle large repositories that exceed single-prompt limits.
63
+
64
+ **WHEN TO USE:**
65
+ - To understand system-wide flows, architectural patterns, or cross-file dependencies.
66
+ - When you need a summary of how a feature is implemented across multiple files.
67
+ - To identify potential refactoring opportunities or technical debt.
68
+
69
+ **LIMITATIONS:**
70
+ - It extracts and summarizes information; it does not read every single byte if the repo is massive.
71
+ - For precise line-by-line reading of a known file, use `read_file` instead.
72
+
73
+ Args:
74
+ path (str): Path to the directory or repository.
75
+ query (str): A clear, specific question or analysis goal (e.g., "How is authentication handled?").
76
+ extensions (list[str], optional): File extensions to include (e.g., ["py", "ts"]).
77
+ exclude_patterns (list[str], optional): Glob patterns to ignore (e.g., ["tests/*"]).
78
+
79
+ Returns:
80
+ str: A comprehensive analytical report.
81
+ """
82
+ if extensions is None:
83
+ extensions = _DEFAULT_EXTENSIONS
84
+ if exclude_patterns is None:
85
+ exclude_patterns = DEFAULT_EXCLUDED_PATTERNS
86
+
87
+ abs_path = os.path.abspath(os.path.expanduser(path))
88
+ if not os.path.exists(abs_path):
89
+ return f"Error: Path not found: {path}"
90
+
91
+ # 1. Gather files
92
+ file_metadatas = _get_file_metadatas(abs_path, extensions, exclude_patterns)
93
+ if not file_metadatas:
94
+ return "No files found matching the criteria."
95
+
96
+ print(f" 📝 Extraction ({len(file_metadatas)} files)")
97
+
98
+ # 2. Extract Info
99
+ extraction_token_threshold = CFG.LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD
100
+ extracted_infos = await _extract_info(
101
+ file_metadatas=file_metadatas,
102
+ query=query,
103
+ token_limit=extraction_token_threshold,
104
+ )
105
+
106
+ if not extracted_infos:
107
+ return "No information could be extracted from the files."
108
+
109
+ if len(extracted_infos) == 1:
110
+ return extracted_infos[0]
111
+
112
+ # 3. Summarize Info (Reduce)
113
+ summarization_token_threshold = CFG.LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD
114
+ summarized_infos = extracted_infos
115
+
116
+ while len(summarized_infos) > 1:
117
+ print(f" 📝 Summarization ({len(summarized_infos)} chunks)")
118
+ summarized_infos = await _summarize_info(
119
+ extracted_infos=summarized_infos,
120
+ query=query,
121
+ token_limit=summarization_token_threshold,
122
+ )
123
+
124
+ return summarized_infos[0]
125
+
126
+
127
+ def _get_file_metadatas(
128
+ dir_path: str,
129
+ extensions: list[str],
130
+ exclude_patterns: list[str],
131
+ ) -> list[dict[str, str]]:
132
+ metadata_list = []
133
+ for root, _, files in os.walk(dir_path):
134
+ files.sort()
135
+ for file in files:
136
+ if not any(file.endswith(f".{ext}") for ext in extensions):
137
+ continue
138
+ file_path = os.path.join(root, file)
139
+ try:
140
+ rel_path = os.path.relpath(file_path, dir_path)
141
+ if _is_excluded(rel_path, exclude_patterns):
142
+ continue
143
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
144
+ metadata_list.append({"path": rel_path, "content": f.read()})
145
+ except Exception as e:
146
+ print(f"Error reading file {file_path}: {e}")
147
+ metadata_list.sort(key=lambda m: m["path"])
148
+ return metadata_list
149
+
150
+
151
+ def _is_excluded(name: str, patterns: list[str]) -> bool:
152
+ for pattern in patterns:
153
+ if fnmatch.fnmatch(name, pattern):
154
+ return True
155
+ parts = name.split(os.path.sep)
156
+ for part in parts:
157
+ if fnmatch.fnmatch(part, pattern):
158
+ return True
159
+ return False
160
+
161
+
162
+ async def _extract_info(
163
+ file_metadatas: list[dict[str, str]],
164
+ query: str,
165
+ token_limit: int,
166
+ ) -> list[str]:
167
+ agent = create_agent(
168
+ model=llm_config.model,
169
+ system_prompt=get_repo_extractor_system_prompt(),
170
+ )
171
+
172
+ extracted_infos = []
173
+ content_buffer = []
174
+ current_token_count = 0
175
+
176
+ # We estimate token count of the prompt template overhead
177
+ base_overhead = 100
178
+
179
+ for metadata in file_metadatas:
180
+ path = metadata.get("path", "")
181
+ content = metadata.get("content", "")
182
+ file_obj = {"path": path, "content": content}
183
+ file_str = json.dumps(file_obj)
184
+ file_tokens = llm_limiter.count_tokens(file_str)
185
+
186
+ if current_token_count + file_tokens + base_overhead > token_limit:
187
+ if content_buffer:
188
+ await _run_extraction(agent, query, content_buffer, extracted_infos)
189
+
190
+ content_buffer = [file_obj]
191
+ current_token_count = file_tokens
192
+ else:
193
+ content_buffer.append(file_obj)
194
+ current_token_count += file_tokens
195
+
196
+ # Process remaining buffer
197
+ if content_buffer:
198
+ await _run_extraction(agent, query, content_buffer, extracted_infos)
199
+
200
+ return extracted_infos
201
+
202
+
203
+ async def _run_extraction(agent, query, content_buffer, extracted_infos):
204
+ prompt_data = {
205
+ "main_assistant_query": query,
206
+ "files": content_buffer,
207
+ }
208
+ # We serialize to JSON for the prompt
209
+ message = json.dumps(prompt_data)
210
+
211
+ result, _ = await run_agent(
212
+ agent=agent,
213
+ message=message,
214
+ message_history=[], # Stateless
215
+ limiter=llm_limiter,
216
+ )
217
+ extracted_infos.append(str(result))
218
+
219
+
220
+ async def _summarize_info(
221
+ extracted_infos: list[str],
222
+ query: str,
223
+ token_limit: int,
224
+ ) -> list[str]:
225
+ agent = create_agent(
226
+ model=llm_config.model,
227
+ system_prompt=get_repo_summarizer_system_prompt(),
228
+ )
229
+
230
+ summarized_infos = []
231
+ content_buffer = ""
232
+ # Overhead for prompt structure
233
+ base_overhead = 100
234
+
235
+ for info in extracted_infos:
236
+ # Check if adding this info exceeds limit
237
+ if (
238
+ llm_limiter.count_tokens(content_buffer + info) + base_overhead
239
+ > token_limit
240
+ ):
241
+ if content_buffer:
242
+ await _run_summarization(agent, query, content_buffer, summarized_infos)
243
+ content_buffer = info
244
+ else:
245
+ content_buffer += info + "\n"
246
+
247
+ if content_buffer:
248
+ await _run_summarization(agent, query, content_buffer, summarized_infos)
249
+
250
+ return summarized_infos
251
+
252
+
253
+ async def _run_summarization(agent, query, content_buffer, summarized_infos):
254
+ prompt_data = {
255
+ "main_assistant_query": query,
256
+ "extracted_info": content_buffer,
257
+ }
258
+ message = json.dumps(prompt_data)
259
+
260
+ result, _ = await run_agent(
261
+ agent=agent,
262
+ message=message,
263
+ message_history=[], # Stateless
264
+ limiter=llm_limiter,
265
+ )
266
+ summarized_infos.append(str(result))
zrb/llm/tool/file.py ADDED
@@ -0,0 +1,419 @@
1
+ import fnmatch
2
+ import os
3
+ import re
4
+ from typing import Any
5
+
6
+ DEFAULT_EXCLUDED_PATTERNS = [
7
+ "__pycache__",
8
+ "*.pyc",
9
+ "*.pyo",
10
+ "*.pyd",
11
+ ".Python",
12
+ "build",
13
+ "dist",
14
+ ".env",
15
+ ".venv",
16
+ "env",
17
+ "venv",
18
+ ".idea",
19
+ ".vscode",
20
+ ".git",
21
+ "node_modules",
22
+ ".pytest_cache",
23
+ ".coverage",
24
+ "htmlcov",
25
+ ]
26
+
27
+
28
+ def list_files(
29
+ path: str = ".",
30
+ include_hidden: bool = False,
31
+ depth: int = 3,
32
+ excluded_patterns: list[str] | None = None,
33
+ ) -> dict[str, list[str]]:
34
+ """
35
+ Recursively explores and lists files within a directory tree up to a defined depth.
36
+
37
+ **WHEN TO USE:**
38
+ - To discover the project structure or find specific files when the path is unknown.
39
+ - To verify the existence of files in a directory.
40
+
41
+ **EFFICIENCY TIP:**
42
+ - Do NOT use this tool if you already know the file path. Use `read_file` directly.
43
+ - Keep `depth` low (default 3) to avoid overwhelming output.
44
+
45
+ **ARGS:**
46
+ - `path`: The root directory to start the search from.
47
+ - `include_hidden`: If True, includes hidden files and directories (starting with `.`).
48
+ - `depth`: Maximum levels of directories to descend.
49
+ - `excluded_patterns`: List of glob patterns to ignore.
50
+ """
51
+ all_files: list[str] = []
52
+ abs_path = os.path.abspath(os.path.expanduser(path))
53
+ if not os.path.exists(abs_path):
54
+ raise FileNotFoundError(f"Path does not exist: {path}")
55
+
56
+ patterns_to_exclude = (
57
+ excluded_patterns
58
+ if excluded_patterns is not None
59
+ else DEFAULT_EXCLUDED_PATTERNS
60
+ )
61
+ if depth <= 0:
62
+ depth = 1
63
+
64
+ initial_depth = abs_path.rstrip(os.sep).count(os.sep)
65
+ for root, dirs, files in os.walk(abs_path, topdown=True):
66
+ current_depth = root.rstrip(os.sep).count(os.sep) - initial_depth
67
+ if current_depth >= depth - 1:
68
+ del dirs[:]
69
+
70
+ dirs[:] = [
71
+ d
72
+ for d in dirs
73
+ if (include_hidden or not d.startswith("."))
74
+ and not _is_excluded(d, patterns_to_exclude)
75
+ ]
76
+
77
+ for filename in files:
78
+ if (include_hidden or not filename.startswith(".")) and not _is_excluded(
79
+ filename, patterns_to_exclude
80
+ ):
81
+ full_path = os.path.join(root, filename)
82
+ rel_full_path = os.path.relpath(full_path, abs_path)
83
+ if not _is_excluded(rel_full_path, patterns_to_exclude):
84
+ all_files.append(rel_full_path)
85
+ return {"files": sorted(all_files)}
86
+
87
+
88
+ def read_file(
89
+ path: str, start_line: int | None = None, end_line: int | None = None
90
+ ) -> str:
91
+ """
92
+ Reads content from a file, optionally specifying a line range.
93
+
94
+ **EFFICIENCY TIP:**
95
+ - Prefer reading the **entire file** at once for full context (imports, class definitions).
96
+ - Only use `start_line` and `end_line` for extremely large files (e.g., logs).
97
+
98
+ **ARGS:**
99
+ - `path`: Path to the file to read.
100
+ - `start_line`: The 1-based line number to start reading from.
101
+ - `end_line`: The 1-based line number to stop reading at (inclusive).
102
+ """
103
+ abs_path = os.path.abspath(os.path.expanduser(path))
104
+ if not os.path.exists(abs_path):
105
+ return f"Error: File not found: {path}"
106
+
107
+ try:
108
+ with open(abs_path, "r", encoding="utf-8") as f:
109
+ lines = f.readlines()
110
+
111
+ total_lines = len(lines)
112
+ start_idx = (start_line - 1) if start_line is not None else 0
113
+ end_idx = end_line if end_line is not None else total_lines
114
+
115
+ if start_idx < 0:
116
+ start_idx = 0
117
+ if end_idx > total_lines:
118
+ end_idx = total_lines
119
+ if start_idx > end_idx:
120
+ start_idx = end_idx
121
+
122
+ selected_lines = lines[start_idx:end_idx]
123
+ content_result = "".join(selected_lines)
124
+
125
+ if start_line is not None or end_line is not None:
126
+ return f"File: {path} (Lines {start_idx + 1}-{end_idx} of {total_lines})\n{content_result}"
127
+ return content_result
128
+
129
+ except Exception as e:
130
+ return f"Error reading file {path}: {e}"
131
+
132
+
133
+ def read_files(paths: list[str]) -> dict[str, str]:
134
+ """
135
+ Reads content from multiple files simultaneously.
136
+
137
+ **USAGE:**
138
+ - Use this when you need context from several related files (e.g., a class definition and its tests).
139
+
140
+ **ARGS:**
141
+ - `paths`: List of file paths to read.
142
+ """
143
+ results = {}
144
+ for path in paths:
145
+ results[path] = read_file(path)
146
+ return results
147
+
148
+
149
+ def write_file(path: str, content: str, mode: str = "w") -> str:
150
+ """
151
+ Writes or appends content to a file.
152
+
153
+ **CRITICAL - PREVENT ERRORS:**
154
+ 1. **ESCAPING:** Do NOT double-escape quotes in your JSON tool call.
155
+ 2. **SIZE LIMIT:** DO NOT write more than 4000 characters in a single call.
156
+ 3. **CHUNKING:** For large files, use `mode="w"` for the first chunk and `mode="a"` for the rest.
157
+
158
+ **ARGS:**
159
+ - `path`: Target file path.
160
+ - `content`: Text content to write.
161
+ - `mode`: File opening mode ("w" to overwrite, "a" to append).
162
+ """
163
+ abs_path = os.path.abspath(os.path.expanduser(path))
164
+ try:
165
+ os.makedirs(os.path.dirname(abs_path), exist_ok=True)
166
+ with open(abs_path, mode, encoding="utf-8") as f:
167
+ f.write(content)
168
+ return f"Successfully wrote to {path}"
169
+ except Exception as e:
170
+ return f"Error writing to file {path}: {e}"
171
+
172
+
173
+ def write_files(files: list[dict[str, str]]) -> dict[str, str]:
174
+ """
175
+ Performs batch write operations to multiple files.
176
+
177
+ **ARGS:**
178
+ - `files`: A list of dictionaries, each containing:
179
+ - `path` (str): Target file path.
180
+ - `content` (str): Text to write.
181
+ - `mode` (str, optional): "w" (overwrite, default) or "a" (append).
182
+ """
183
+ results = {}
184
+ for file_info in files:
185
+ path = file_info.get("path")
186
+ content = file_info.get("content")
187
+ mode = file_info.get("mode", "w")
188
+ if not path or content is None:
189
+ results[str(path)] = "Error: Missing path or content"
190
+ continue
191
+ results[path] = write_file(path, content, mode)
192
+ return results
193
+
194
+
195
+ def replace_in_file(path: str, old_text: str, new_text: str, count: int = -1) -> str:
196
+ """
197
+ Replaces exact text sequences within a file.
198
+
199
+ **CRITICAL INSTRUCTIONS:**
200
+ 1. **PRECISION:** `old_text` must match the file content EXACTLY.
201
+ 2. **READ FIRST:** Always `read_file` before replacing.
202
+ 3. **MINIMAL CONTEXT:** Include 2-3 lines of context in `old_text` to ensure uniqueness.
203
+
204
+ **ARGS:**
205
+ - `path`: Path to the file to modify.
206
+ - `old_text`: The exact literal text to be replaced.
207
+ - `new_text`: The replacement text.
208
+ - `count`: Number of occurrences to replace (default -1 for all).
209
+ """
210
+ abs_path = os.path.abspath(os.path.expanduser(path))
211
+ if not os.path.exists(abs_path):
212
+ return f"Error: File not found: {path}"
213
+
214
+ try:
215
+ with open(abs_path, "r", encoding="utf-8") as f:
216
+ content = f.read()
217
+
218
+ if old_text not in content:
219
+ return f"Error: '{old_text}' not found in {path}"
220
+
221
+ new_content = content.replace(old_text, new_text, count)
222
+
223
+ if content == new_content:
224
+ return f"No changes made to {path}"
225
+
226
+ with open(abs_path, "w", encoding="utf-8") as f:
227
+ f.write(new_content)
228
+ return f"Successfully updated {path}"
229
+ except Exception as e:
230
+ return f"Error replacing text in {path}: {e}"
231
+
232
+
233
+ def search_files(
234
+ path: str,
235
+ regex: str,
236
+ file_pattern: str | None = None,
237
+ include_hidden: bool = True,
238
+ ) -> dict[str, Any]:
239
+ """
240
+ Searches for a regular expression pattern within files.
241
+
242
+ **WHEN TO USE:**
243
+ - To find usages of a function, variable, or string across the project.
244
+
245
+ **ARGS:**
246
+ - `path`: Root directory to search.
247
+ - `regex`: A standard Python regular expression.
248
+ - `file_pattern`: Optional glob (e.g., "*.py") to restrict the search.
249
+ - `include_hidden`: Whether to search in hidden files/dirs.
250
+ """
251
+ try:
252
+ pattern = re.compile(regex)
253
+ except re.error as e:
254
+ return {"error": f"Invalid regex pattern: {e}"}
255
+
256
+ search_results = {"summary": "", "results": []}
257
+ match_count = 0
258
+ searched_file_count = 0
259
+ file_match_count = 0
260
+
261
+ abs_path = os.path.abspath(os.path.expanduser(path))
262
+ if not os.path.exists(abs_path):
263
+ return {"error": f"Path not found: {path}"}
264
+
265
+ try:
266
+ for root, dirs, files in os.walk(abs_path):
267
+ # Skip hidden directories
268
+ dirs[:] = [d for d in dirs if include_hidden or not d.startswith(".")]
269
+ for filename in files:
270
+ # Skip hidden files
271
+ if not include_hidden and filename.startswith("."):
272
+ continue
273
+ # Apply file pattern filter if provided
274
+ if file_pattern and not fnmatch.fnmatch(filename, file_pattern):
275
+ continue
276
+
277
+ file_path = os.path.join(root, filename)
278
+ rel_file_path = os.path.relpath(file_path, os.getcwd())
279
+ searched_file_count += 1
280
+
281
+ try:
282
+ matches = _get_file_matches(file_path, pattern)
283
+ if matches:
284
+ file_match_count += 1
285
+ match_count += len(matches)
286
+ search_results["results"].append(
287
+ {"file": rel_file_path, "matches": matches}
288
+ )
289
+ except Exception:
290
+ # Ignore read errors for binary files etc
291
+ pass
292
+
293
+ if match_count == 0:
294
+ search_results["summary"] = (
295
+ f"No matches found for pattern '{regex}' in path '{path}' "
296
+ f"(searched {searched_file_count} files)."
297
+ )
298
+ else:
299
+ search_results["summary"] = (
300
+ f"Found {match_count} matches in {file_match_count} files "
301
+ f"(searched {searched_file_count} files)."
302
+ )
303
+ return search_results
304
+
305
+ except Exception as e:
306
+ return {"error": f"Error searching files: {e}"}
307
+
308
+
309
+ def _get_file_matches(
310
+ file_path: str,
311
+ pattern: re.Pattern,
312
+ context_lines: int = 2,
313
+ ) -> list[dict[str, any]]:
314
+ """Search for regex matches in a file with context."""
315
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
316
+ lines = f.readlines()
317
+ matches = []
318
+ for line_idx, line in enumerate(lines):
319
+ if pattern.search(line):
320
+ line_num = line_idx + 1
321
+ context_start = max(0, line_idx - context_lines)
322
+ context_end = min(len(lines), line_idx + context_lines + 1)
323
+ match_data = {
324
+ "line_number": line_num,
325
+ "line_content": line.rstrip(),
326
+ "context_before": [
327
+ lines[j].rstrip() for j in range(context_start, line_idx)
328
+ ],
329
+ "context_after": [
330
+ lines[j].rstrip() for j in range(line_idx + 1, context_end)
331
+ ],
332
+ }
333
+ matches.append(match_data)
334
+ return matches
335
+
336
+
337
+ def _is_excluded(name: str, patterns: list[str]) -> bool:
338
+ for pattern in patterns:
339
+ if fnmatch.fnmatch(name, pattern):
340
+ return True
341
+ parts = name.split(os.path.sep)
342
+ for part in parts:
343
+ if fnmatch.fnmatch(part, pattern):
344
+ return True
345
+ return False
346
+
347
+
348
+ async def analyze_file(path: str, query: str) -> str:
349
+ """
350
+ Delegates deep analysis of a specific file to a specialized sub-agent.
351
+
352
+ **WHEN TO USE:**
353
+ - For complex questions about a file's logic, structure, or potential bugs.
354
+ - When you need a summary or specific details that require "understanding" the code.
355
+
356
+ **NOTE:** For simple data retrieval, use `read_file`.
357
+
358
+ **ARGS:**
359
+ - `path`: Path to the file to analyze.
360
+ - `query`: The specific analytical question or instruction.
361
+ """
362
+ # Lazy imports to avoid circular dependencies
363
+ from zrb.config.config import CFG
364
+ from zrb.llm.agent.agent import create_agent, run_agent
365
+ from zrb.llm.config.config import llm_config
366
+ from zrb.llm.config.limiter import llm_limiter
367
+ from zrb.llm.prompt.default import get_file_extractor_system_prompt
368
+
369
+ abs_path = os.path.abspath(os.path.expanduser(path))
370
+ if not os.path.exists(abs_path):
371
+ return f"Error: File not found: {path}"
372
+
373
+ # Read content
374
+ content = read_file(abs_path)
375
+ if content.startswith("Error:"):
376
+ return content
377
+
378
+ # Check token limit and truncate if necessary
379
+ token_threshold = CFG.LLM_FILE_ANALYSIS_TOKEN_THRESHOLD
380
+ # Simple character-based approximation (1 token ~ 4 chars)
381
+ char_limit = token_threshold * 4
382
+
383
+ clipped_content = content
384
+ if len(content) > char_limit:
385
+ clipped_content = content[:char_limit] + "\n...[TRUNCATED]..."
386
+
387
+ system_prompt = get_file_extractor_system_prompt()
388
+
389
+ # Create the sub-agent
390
+ agent = create_agent(
391
+ model=llm_config.model,
392
+ system_prompt=system_prompt,
393
+ tools=[
394
+ read_file,
395
+ search_files,
396
+ ],
397
+ )
398
+
399
+ # Construct the user message
400
+ user_message = f"""
401
+ Instruction: {query}
402
+ File Path: {abs_path}
403
+ File Content:
404
+ ```
405
+ {clipped_content}
406
+ ```
407
+ """
408
+
409
+ # Run the agent
410
+ # We pass empty history as this is a fresh sub-task
411
+ # We use print as the print_fn (which streams to stdout)
412
+ result, _ = await run_agent(
413
+ agent=agent,
414
+ message=user_message,
415
+ message_history=[],
416
+ limiter=llm_limiter,
417
+ )
418
+
419
+ return str(result)