zrb 1.21.29__py3-none-any.whl → 2.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zrb might be problematic. Click here for more details.
- zrb/__init__.py +118 -129
- zrb/builtin/__init__.py +54 -2
- zrb/builtin/llm/chat.py +147 -0
- zrb/callback/callback.py +8 -1
- zrb/cmd/cmd_result.py +2 -1
- zrb/config/config.py +491 -280
- zrb/config/helper.py +84 -0
- zrb/config/web_auth_config.py +50 -35
- zrb/context/any_shared_context.py +13 -2
- zrb/context/context.py +31 -3
- zrb/context/print_fn.py +13 -0
- zrb/context/shared_context.py +14 -1
- zrb/input/option_input.py +30 -2
- zrb/llm/agent/__init__.py +9 -0
- zrb/llm/agent/agent.py +215 -0
- zrb/llm/agent/summarizer.py +20 -0
- zrb/llm/app/__init__.py +10 -0
- zrb/llm/app/completion.py +281 -0
- zrb/llm/app/confirmation/allow_tool.py +66 -0
- zrb/llm/app/confirmation/handler.py +178 -0
- zrb/llm/app/confirmation/replace_confirmation.py +77 -0
- zrb/llm/app/keybinding.py +34 -0
- zrb/llm/app/layout.py +117 -0
- zrb/llm/app/lexer.py +155 -0
- zrb/llm/app/redirection.py +28 -0
- zrb/llm/app/style.py +16 -0
- zrb/llm/app/ui.py +733 -0
- zrb/llm/config/__init__.py +4 -0
- zrb/llm/config/config.py +122 -0
- zrb/llm/config/limiter.py +247 -0
- zrb/llm/history_manager/__init__.py +4 -0
- zrb/llm/history_manager/any_history_manager.py +23 -0
- zrb/llm/history_manager/file_history_manager.py +91 -0
- zrb/llm/history_processor/summarizer.py +108 -0
- zrb/llm/note/__init__.py +3 -0
- zrb/llm/note/manager.py +122 -0
- zrb/llm/prompt/__init__.py +29 -0
- zrb/llm/prompt/claude_compatibility.py +92 -0
- zrb/llm/prompt/compose.py +55 -0
- zrb/llm/prompt/default.py +51 -0
- zrb/llm/prompt/markdown/mandate.md +23 -0
- zrb/llm/prompt/markdown/persona.md +3 -0
- zrb/llm/prompt/markdown/summarizer.md +21 -0
- zrb/llm/prompt/note.py +41 -0
- zrb/llm/prompt/system_context.py +46 -0
- zrb/llm/prompt/zrb.py +41 -0
- zrb/llm/skill/__init__.py +3 -0
- zrb/llm/skill/manager.py +86 -0
- zrb/llm/task/__init__.py +4 -0
- zrb/llm/task/llm_chat_task.py +316 -0
- zrb/llm/task/llm_task.py +245 -0
- zrb/llm/tool/__init__.py +39 -0
- zrb/llm/tool/bash.py +75 -0
- zrb/llm/tool/code.py +266 -0
- zrb/llm/tool/file.py +419 -0
- zrb/llm/tool/note.py +70 -0
- zrb/{builtin/llm → llm}/tool/rag.py +8 -5
- zrb/llm/tool/search/brave.py +53 -0
- zrb/llm/tool/search/searxng.py +47 -0
- zrb/llm/tool/search/serpapi.py +47 -0
- zrb/llm/tool/skill.py +19 -0
- zrb/llm/tool/sub_agent.py +70 -0
- zrb/llm/tool/web.py +97 -0
- zrb/llm/tool/zrb_task.py +66 -0
- zrb/llm/util/attachment.py +101 -0
- zrb/llm/util/prompt.py +104 -0
- zrb/llm/util/stream_response.py +178 -0
- zrb/session/any_session.py +0 -3
- zrb/session/session.py +1 -1
- zrb/task/base/context.py +25 -13
- zrb/task/base/execution.py +52 -47
- zrb/task/base/lifecycle.py +7 -4
- zrb/task/base_task.py +48 -49
- zrb/task/base_trigger.py +4 -1
- zrb/task/cmd_task.py +6 -0
- zrb/task/http_check.py +11 -5
- zrb/task/make_task.py +3 -0
- zrb/task/rsync_task.py +5 -0
- zrb/task/scaffolder.py +7 -4
- zrb/task/scheduler.py +3 -0
- zrb/task/tcp_check.py +6 -4
- zrb/util/ascii_art/art/bee.txt +17 -0
- zrb/util/ascii_art/art/cat.txt +9 -0
- zrb/util/ascii_art/art/ghost.txt +16 -0
- zrb/util/ascii_art/art/panda.txt +17 -0
- zrb/util/ascii_art/art/rose.txt +14 -0
- zrb/util/ascii_art/art/unicorn.txt +15 -0
- zrb/util/ascii_art/banner.py +92 -0
- zrb/util/cli/markdown.py +22 -2
- zrb/util/cmd/command.py +33 -10
- zrb/util/file.py +51 -32
- zrb/util/match.py +78 -0
- zrb/util/run.py +3 -3
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/METADATA +9 -15
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/RECORD +100 -128
- zrb/attr/__init__.py +0 -0
- zrb/builtin/llm/attachment.py +0 -40
- zrb/builtin/llm/chat_completion.py +0 -274
- zrb/builtin/llm/chat_session.py +0 -270
- zrb/builtin/llm/chat_session_cmd.py +0 -288
- zrb/builtin/llm/chat_trigger.py +0 -79
- zrb/builtin/llm/history.py +0 -71
- zrb/builtin/llm/input.py +0 -27
- zrb/builtin/llm/llm_ask.py +0 -269
- zrb/builtin/llm/previous-session.js +0 -21
- zrb/builtin/llm/tool/__init__.py +0 -0
- zrb/builtin/llm/tool/api.py +0 -75
- zrb/builtin/llm/tool/cli.py +0 -52
- zrb/builtin/llm/tool/code.py +0 -236
- zrb/builtin/llm/tool/file.py +0 -560
- zrb/builtin/llm/tool/note.py +0 -84
- zrb/builtin/llm/tool/sub_agent.py +0 -150
- zrb/builtin/llm/tool/web.py +0 -171
- zrb/builtin/project/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/module/template/app_template/module/my_module/service/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/permission/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/role/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/user/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/schema/__init__.py +0 -0
- zrb/builtin/project/create/__init__.py +0 -0
- zrb/builtin/shell/__init__.py +0 -0
- zrb/builtin/shell/autocomplete/__init__.py +0 -0
- zrb/callback/__init__.py +0 -0
- zrb/cmd/__init__.py +0 -0
- zrb/config/default_prompt/interactive_system_prompt.md +0 -29
- zrb/config/default_prompt/persona.md +0 -1
- zrb/config/default_prompt/summarization_prompt.md +0 -57
- zrb/config/default_prompt/system_prompt.md +0 -38
- zrb/config/llm_config.py +0 -339
- zrb/config/llm_context/config.py +0 -166
- zrb/config/llm_context/config_parser.py +0 -40
- zrb/config/llm_context/workflow.py +0 -81
- zrb/config/llm_rate_limitter.py +0 -190
- zrb/content_transformer/__init__.py +0 -0
- zrb/context/__init__.py +0 -0
- zrb/dot_dict/__init__.py +0 -0
- zrb/env/__init__.py +0 -0
- zrb/group/__init__.py +0 -0
- zrb/input/__init__.py +0 -0
- zrb/runner/__init__.py +0 -0
- zrb/runner/web_route/__init__.py +0 -0
- zrb/runner/web_route/home_page/__init__.py +0 -0
- zrb/session/__init__.py +0 -0
- zrb/session_state_log/__init__.py +0 -0
- zrb/session_state_logger/__init__.py +0 -0
- zrb/task/__init__.py +0 -0
- zrb/task/base/__init__.py +0 -0
- zrb/task/llm/__init__.py +0 -0
- zrb/task/llm/agent.py +0 -204
- zrb/task/llm/agent_runner.py +0 -152
- zrb/task/llm/config.py +0 -122
- zrb/task/llm/conversation_history.py +0 -209
- zrb/task/llm/conversation_history_model.py +0 -67
- zrb/task/llm/default_workflow/coding/workflow.md +0 -41
- zrb/task/llm/default_workflow/copywriting/workflow.md +0 -68
- zrb/task/llm/default_workflow/git/workflow.md +0 -118
- zrb/task/llm/default_workflow/golang/workflow.md +0 -128
- zrb/task/llm/default_workflow/html-css/workflow.md +0 -135
- zrb/task/llm/default_workflow/java/workflow.md +0 -146
- zrb/task/llm/default_workflow/javascript/workflow.md +0 -158
- zrb/task/llm/default_workflow/python/workflow.md +0 -160
- zrb/task/llm/default_workflow/researching/workflow.md +0 -153
- zrb/task/llm/default_workflow/rust/workflow.md +0 -162
- zrb/task/llm/default_workflow/shell/workflow.md +0 -299
- zrb/task/llm/error.py +0 -95
- zrb/task/llm/file_replacement.py +0 -206
- zrb/task/llm/file_tool_model.py +0 -57
- zrb/task/llm/history_processor.py +0 -206
- zrb/task/llm/history_summarization.py +0 -25
- zrb/task/llm/print_node.py +0 -221
- zrb/task/llm/prompt.py +0 -321
- zrb/task/llm/subagent_conversation_history.py +0 -41
- zrb/task/llm/tool_wrapper.py +0 -361
- zrb/task/llm/typing.py +0 -3
- zrb/task/llm/workflow.py +0 -76
- zrb/task/llm_task.py +0 -379
- zrb/task_status/__init__.py +0 -0
- zrb/util/__init__.py +0 -0
- zrb/util/cli/__init__.py +0 -0
- zrb/util/cmd/__init__.py +0 -0
- zrb/util/codemod/__init__.py +0 -0
- zrb/util/string/__init__.py +0 -0
- zrb/xcom/__init__.py +0 -0
- /zrb/{config/default_prompt/file_extractor_system_prompt.md → llm/prompt/markdown/file_extractor.md} +0 -0
- /zrb/{config/default_prompt/repo_extractor_system_prompt.md → llm/prompt/markdown/repo_extractor.md} +0 -0
- /zrb/{config/default_prompt/repo_summarizer_system_prompt.md → llm/prompt/markdown/repo_summarizer.md} +0 -0
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/WHEEL +0 -0
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/entry_points.txt +0 -0
zrb/llm/tool/code.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from zrb.config.config import CFG
|
|
6
|
+
from zrb.llm.agent.agent import create_agent, run_agent
|
|
7
|
+
from zrb.llm.config.config import llm_config
|
|
8
|
+
from zrb.llm.config.limiter import llm_limiter
|
|
9
|
+
from zrb.llm.prompt.default import (
|
|
10
|
+
get_repo_extractor_system_prompt,
|
|
11
|
+
get_repo_summarizer_system_prompt,
|
|
12
|
+
)
|
|
13
|
+
from zrb.llm.tool.file import DEFAULT_EXCLUDED_PATTERNS
|
|
14
|
+
|
|
15
|
+
_DEFAULT_EXTENSIONS = [
|
|
16
|
+
"py",
|
|
17
|
+
"go",
|
|
18
|
+
"java",
|
|
19
|
+
"ts",
|
|
20
|
+
"js",
|
|
21
|
+
"rs",
|
|
22
|
+
"rb",
|
|
23
|
+
"php",
|
|
24
|
+
"sh",
|
|
25
|
+
"bash",
|
|
26
|
+
"c",
|
|
27
|
+
"cpp",
|
|
28
|
+
"h",
|
|
29
|
+
"hpp",
|
|
30
|
+
"cs",
|
|
31
|
+
"swift",
|
|
32
|
+
"kt",
|
|
33
|
+
"scala",
|
|
34
|
+
"m",
|
|
35
|
+
"pl",
|
|
36
|
+
"lua",
|
|
37
|
+
"sql",
|
|
38
|
+
"html",
|
|
39
|
+
"css",
|
|
40
|
+
"scss",
|
|
41
|
+
"less",
|
|
42
|
+
"json",
|
|
43
|
+
"yaml",
|
|
44
|
+
"yml",
|
|
45
|
+
"toml",
|
|
46
|
+
"ini",
|
|
47
|
+
"xml",
|
|
48
|
+
"md",
|
|
49
|
+
"rst",
|
|
50
|
+
"txt",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def analyze_code(
|
|
55
|
+
path: str,
|
|
56
|
+
query: str,
|
|
57
|
+
extensions: list[str] | None = None,
|
|
58
|
+
exclude_patterns: list[str] | None = None,
|
|
59
|
+
) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Performs a deep, semantic analysis of an entire codebase or directory to answer complex architectural or logic-based questions.
|
|
62
|
+
This tool uses a 'map-reduce' strategy to handle large repositories that exceed single-prompt limits.
|
|
63
|
+
|
|
64
|
+
**WHEN TO USE:**
|
|
65
|
+
- To understand system-wide flows, architectural patterns, or cross-file dependencies.
|
|
66
|
+
- When you need a summary of how a feature is implemented across multiple files.
|
|
67
|
+
- To identify potential refactoring opportunities or technical debt.
|
|
68
|
+
|
|
69
|
+
**LIMITATIONS:**
|
|
70
|
+
- It extracts and summarizes information; it does not read every single byte if the repo is massive.
|
|
71
|
+
- For precise line-by-line reading of a known file, use `read_file` instead.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
path (str): Path to the directory or repository.
|
|
75
|
+
query (str): A clear, specific question or analysis goal (e.g., "How is authentication handled?").
|
|
76
|
+
extensions (list[str], optional): File extensions to include (e.g., ["py", "ts"]).
|
|
77
|
+
exclude_patterns (list[str], optional): Glob patterns to ignore (e.g., ["tests/*"]).
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
str: A comprehensive analytical report.
|
|
81
|
+
"""
|
|
82
|
+
if extensions is None:
|
|
83
|
+
extensions = _DEFAULT_EXTENSIONS
|
|
84
|
+
if exclude_patterns is None:
|
|
85
|
+
exclude_patterns = DEFAULT_EXCLUDED_PATTERNS
|
|
86
|
+
|
|
87
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
88
|
+
if not os.path.exists(abs_path):
|
|
89
|
+
return f"Error: Path not found: {path}"
|
|
90
|
+
|
|
91
|
+
# 1. Gather files
|
|
92
|
+
file_metadatas = _get_file_metadatas(abs_path, extensions, exclude_patterns)
|
|
93
|
+
if not file_metadatas:
|
|
94
|
+
return "No files found matching the criteria."
|
|
95
|
+
|
|
96
|
+
print(f" 📝 Extraction ({len(file_metadatas)} files)")
|
|
97
|
+
|
|
98
|
+
# 2. Extract Info
|
|
99
|
+
extraction_token_threshold = CFG.LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD
|
|
100
|
+
extracted_infos = await _extract_info(
|
|
101
|
+
file_metadatas=file_metadatas,
|
|
102
|
+
query=query,
|
|
103
|
+
token_limit=extraction_token_threshold,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if not extracted_infos:
|
|
107
|
+
return "No information could be extracted from the files."
|
|
108
|
+
|
|
109
|
+
if len(extracted_infos) == 1:
|
|
110
|
+
return extracted_infos[0]
|
|
111
|
+
|
|
112
|
+
# 3. Summarize Info (Reduce)
|
|
113
|
+
summarization_token_threshold = CFG.LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD
|
|
114
|
+
summarized_infos = extracted_infos
|
|
115
|
+
|
|
116
|
+
while len(summarized_infos) > 1:
|
|
117
|
+
print(f" 📝 Summarization ({len(summarized_infos)} chunks)")
|
|
118
|
+
summarized_infos = await _summarize_info(
|
|
119
|
+
extracted_infos=summarized_infos,
|
|
120
|
+
query=query,
|
|
121
|
+
token_limit=summarization_token_threshold,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return summarized_infos[0]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _get_file_metadatas(
|
|
128
|
+
dir_path: str,
|
|
129
|
+
extensions: list[str],
|
|
130
|
+
exclude_patterns: list[str],
|
|
131
|
+
) -> list[dict[str, str]]:
|
|
132
|
+
metadata_list = []
|
|
133
|
+
for root, _, files in os.walk(dir_path):
|
|
134
|
+
files.sort()
|
|
135
|
+
for file in files:
|
|
136
|
+
if not any(file.endswith(f".{ext}") for ext in extensions):
|
|
137
|
+
continue
|
|
138
|
+
file_path = os.path.join(root, file)
|
|
139
|
+
try:
|
|
140
|
+
rel_path = os.path.relpath(file_path, dir_path)
|
|
141
|
+
if _is_excluded(rel_path, exclude_patterns):
|
|
142
|
+
continue
|
|
143
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
144
|
+
metadata_list.append({"path": rel_path, "content": f.read()})
|
|
145
|
+
except Exception as e:
|
|
146
|
+
print(f"Error reading file {file_path}: {e}")
|
|
147
|
+
metadata_list.sort(key=lambda m: m["path"])
|
|
148
|
+
return metadata_list
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _is_excluded(name: str, patterns: list[str]) -> bool:
|
|
152
|
+
for pattern in patterns:
|
|
153
|
+
if fnmatch.fnmatch(name, pattern):
|
|
154
|
+
return True
|
|
155
|
+
parts = name.split(os.path.sep)
|
|
156
|
+
for part in parts:
|
|
157
|
+
if fnmatch.fnmatch(part, pattern):
|
|
158
|
+
return True
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def _extract_info(
|
|
163
|
+
file_metadatas: list[dict[str, str]],
|
|
164
|
+
query: str,
|
|
165
|
+
token_limit: int,
|
|
166
|
+
) -> list[str]:
|
|
167
|
+
agent = create_agent(
|
|
168
|
+
model=llm_config.model,
|
|
169
|
+
system_prompt=get_repo_extractor_system_prompt(),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
extracted_infos = []
|
|
173
|
+
content_buffer = []
|
|
174
|
+
current_token_count = 0
|
|
175
|
+
|
|
176
|
+
# We estimate token count of the prompt template overhead
|
|
177
|
+
base_overhead = 100
|
|
178
|
+
|
|
179
|
+
for metadata in file_metadatas:
|
|
180
|
+
path = metadata.get("path", "")
|
|
181
|
+
content = metadata.get("content", "")
|
|
182
|
+
file_obj = {"path": path, "content": content}
|
|
183
|
+
file_str = json.dumps(file_obj)
|
|
184
|
+
file_tokens = llm_limiter.count_tokens(file_str)
|
|
185
|
+
|
|
186
|
+
if current_token_count + file_tokens + base_overhead > token_limit:
|
|
187
|
+
if content_buffer:
|
|
188
|
+
await _run_extraction(agent, query, content_buffer, extracted_infos)
|
|
189
|
+
|
|
190
|
+
content_buffer = [file_obj]
|
|
191
|
+
current_token_count = file_tokens
|
|
192
|
+
else:
|
|
193
|
+
content_buffer.append(file_obj)
|
|
194
|
+
current_token_count += file_tokens
|
|
195
|
+
|
|
196
|
+
# Process remaining buffer
|
|
197
|
+
if content_buffer:
|
|
198
|
+
await _run_extraction(agent, query, content_buffer, extracted_infos)
|
|
199
|
+
|
|
200
|
+
return extracted_infos
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def _run_extraction(agent, query, content_buffer, extracted_infos):
|
|
204
|
+
prompt_data = {
|
|
205
|
+
"main_assistant_query": query,
|
|
206
|
+
"files": content_buffer,
|
|
207
|
+
}
|
|
208
|
+
# We serialize to JSON for the prompt
|
|
209
|
+
message = json.dumps(prompt_data)
|
|
210
|
+
|
|
211
|
+
result, _ = await run_agent(
|
|
212
|
+
agent=agent,
|
|
213
|
+
message=message,
|
|
214
|
+
message_history=[], # Stateless
|
|
215
|
+
limiter=llm_limiter,
|
|
216
|
+
)
|
|
217
|
+
extracted_infos.append(str(result))
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
async def _summarize_info(
|
|
221
|
+
extracted_infos: list[str],
|
|
222
|
+
query: str,
|
|
223
|
+
token_limit: int,
|
|
224
|
+
) -> list[str]:
|
|
225
|
+
agent = create_agent(
|
|
226
|
+
model=llm_config.model,
|
|
227
|
+
system_prompt=get_repo_summarizer_system_prompt(),
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
summarized_infos = []
|
|
231
|
+
content_buffer = ""
|
|
232
|
+
# Overhead for prompt structure
|
|
233
|
+
base_overhead = 100
|
|
234
|
+
|
|
235
|
+
for info in extracted_infos:
|
|
236
|
+
# Check if adding this info exceeds limit
|
|
237
|
+
if (
|
|
238
|
+
llm_limiter.count_tokens(content_buffer + info) + base_overhead
|
|
239
|
+
> token_limit
|
|
240
|
+
):
|
|
241
|
+
if content_buffer:
|
|
242
|
+
await _run_summarization(agent, query, content_buffer, summarized_infos)
|
|
243
|
+
content_buffer = info
|
|
244
|
+
else:
|
|
245
|
+
content_buffer += info + "\n"
|
|
246
|
+
|
|
247
|
+
if content_buffer:
|
|
248
|
+
await _run_summarization(agent, query, content_buffer, summarized_infos)
|
|
249
|
+
|
|
250
|
+
return summarized_infos
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
async def _run_summarization(agent, query, content_buffer, summarized_infos):
|
|
254
|
+
prompt_data = {
|
|
255
|
+
"main_assistant_query": query,
|
|
256
|
+
"extracted_info": content_buffer,
|
|
257
|
+
}
|
|
258
|
+
message = json.dumps(prompt_data)
|
|
259
|
+
|
|
260
|
+
result, _ = await run_agent(
|
|
261
|
+
agent=agent,
|
|
262
|
+
message=message,
|
|
263
|
+
message_history=[], # Stateless
|
|
264
|
+
limiter=llm_limiter,
|
|
265
|
+
)
|
|
266
|
+
summarized_infos.append(str(result))
|
zrb/llm/tool/file.py
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
DEFAULT_EXCLUDED_PATTERNS = [
|
|
7
|
+
"__pycache__",
|
|
8
|
+
"*.pyc",
|
|
9
|
+
"*.pyo",
|
|
10
|
+
"*.pyd",
|
|
11
|
+
".Python",
|
|
12
|
+
"build",
|
|
13
|
+
"dist",
|
|
14
|
+
".env",
|
|
15
|
+
".venv",
|
|
16
|
+
"env",
|
|
17
|
+
"venv",
|
|
18
|
+
".idea",
|
|
19
|
+
".vscode",
|
|
20
|
+
".git",
|
|
21
|
+
"node_modules",
|
|
22
|
+
".pytest_cache",
|
|
23
|
+
".coverage",
|
|
24
|
+
"htmlcov",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def list_files(
|
|
29
|
+
path: str = ".",
|
|
30
|
+
include_hidden: bool = False,
|
|
31
|
+
depth: int = 3,
|
|
32
|
+
excluded_patterns: list[str] | None = None,
|
|
33
|
+
) -> dict[str, list[str]]:
|
|
34
|
+
"""
|
|
35
|
+
Recursively explores and lists files within a directory tree up to a defined depth.
|
|
36
|
+
|
|
37
|
+
**WHEN TO USE:**
|
|
38
|
+
- To discover the project structure or find specific files when the path is unknown.
|
|
39
|
+
- To verify the existence of files in a directory.
|
|
40
|
+
|
|
41
|
+
**EFFICIENCY TIP:**
|
|
42
|
+
- Do NOT use this tool if you already know the file path. Use `read_file` directly.
|
|
43
|
+
- Keep `depth` low (default 3) to avoid overwhelming output.
|
|
44
|
+
|
|
45
|
+
**ARGS:**
|
|
46
|
+
- `path`: The root directory to start the search from.
|
|
47
|
+
- `include_hidden`: If True, includes hidden files and directories (starting with `.`).
|
|
48
|
+
- `depth`: Maximum levels of directories to descend.
|
|
49
|
+
- `excluded_patterns`: List of glob patterns to ignore.
|
|
50
|
+
"""
|
|
51
|
+
all_files: list[str] = []
|
|
52
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
53
|
+
if not os.path.exists(abs_path):
|
|
54
|
+
raise FileNotFoundError(f"Path does not exist: {path}")
|
|
55
|
+
|
|
56
|
+
patterns_to_exclude = (
|
|
57
|
+
excluded_patterns
|
|
58
|
+
if excluded_patterns is not None
|
|
59
|
+
else DEFAULT_EXCLUDED_PATTERNS
|
|
60
|
+
)
|
|
61
|
+
if depth <= 0:
|
|
62
|
+
depth = 1
|
|
63
|
+
|
|
64
|
+
initial_depth = abs_path.rstrip(os.sep).count(os.sep)
|
|
65
|
+
for root, dirs, files in os.walk(abs_path, topdown=True):
|
|
66
|
+
current_depth = root.rstrip(os.sep).count(os.sep) - initial_depth
|
|
67
|
+
if current_depth >= depth - 1:
|
|
68
|
+
del dirs[:]
|
|
69
|
+
|
|
70
|
+
dirs[:] = [
|
|
71
|
+
d
|
|
72
|
+
for d in dirs
|
|
73
|
+
if (include_hidden or not d.startswith("."))
|
|
74
|
+
and not _is_excluded(d, patterns_to_exclude)
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
for filename in files:
|
|
78
|
+
if (include_hidden or not filename.startswith(".")) and not _is_excluded(
|
|
79
|
+
filename, patterns_to_exclude
|
|
80
|
+
):
|
|
81
|
+
full_path = os.path.join(root, filename)
|
|
82
|
+
rel_full_path = os.path.relpath(full_path, abs_path)
|
|
83
|
+
if not _is_excluded(rel_full_path, patterns_to_exclude):
|
|
84
|
+
all_files.append(rel_full_path)
|
|
85
|
+
return {"files": sorted(all_files)}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read_file(
|
|
89
|
+
path: str, start_line: int | None = None, end_line: int | None = None
|
|
90
|
+
) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Reads content from a file, optionally specifying a line range.
|
|
93
|
+
|
|
94
|
+
**EFFICIENCY TIP:**
|
|
95
|
+
- Prefer reading the **entire file** at once for full context (imports, class definitions).
|
|
96
|
+
- Only use `start_line` and `end_line` for extremely large files (e.g., logs).
|
|
97
|
+
|
|
98
|
+
**ARGS:**
|
|
99
|
+
- `path`: Path to the file to read.
|
|
100
|
+
- `start_line`: The 1-based line number to start reading from.
|
|
101
|
+
- `end_line`: The 1-based line number to stop reading at (inclusive).
|
|
102
|
+
"""
|
|
103
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
104
|
+
if not os.path.exists(abs_path):
|
|
105
|
+
return f"Error: File not found: {path}"
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
with open(abs_path, "r", encoding="utf-8") as f:
|
|
109
|
+
lines = f.readlines()
|
|
110
|
+
|
|
111
|
+
total_lines = len(lines)
|
|
112
|
+
start_idx = (start_line - 1) if start_line is not None else 0
|
|
113
|
+
end_idx = end_line if end_line is not None else total_lines
|
|
114
|
+
|
|
115
|
+
if start_idx < 0:
|
|
116
|
+
start_idx = 0
|
|
117
|
+
if end_idx > total_lines:
|
|
118
|
+
end_idx = total_lines
|
|
119
|
+
if start_idx > end_idx:
|
|
120
|
+
start_idx = end_idx
|
|
121
|
+
|
|
122
|
+
selected_lines = lines[start_idx:end_idx]
|
|
123
|
+
content_result = "".join(selected_lines)
|
|
124
|
+
|
|
125
|
+
if start_line is not None or end_line is not None:
|
|
126
|
+
return f"File: {path} (Lines {start_idx + 1}-{end_idx} of {total_lines})\n{content_result}"
|
|
127
|
+
return content_result
|
|
128
|
+
|
|
129
|
+
except Exception as e:
|
|
130
|
+
return f"Error reading file {path}: {e}"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def read_files(paths: list[str]) -> dict[str, str]:
|
|
134
|
+
"""
|
|
135
|
+
Reads content from multiple files simultaneously.
|
|
136
|
+
|
|
137
|
+
**USAGE:**
|
|
138
|
+
- Use this when you need context from several related files (e.g., a class definition and its tests).
|
|
139
|
+
|
|
140
|
+
**ARGS:**
|
|
141
|
+
- `paths`: List of file paths to read.
|
|
142
|
+
"""
|
|
143
|
+
results = {}
|
|
144
|
+
for path in paths:
|
|
145
|
+
results[path] = read_file(path)
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def write_file(path: str, content: str, mode: str = "w") -> str:
|
|
150
|
+
"""
|
|
151
|
+
Writes or appends content to a file.
|
|
152
|
+
|
|
153
|
+
**CRITICAL - PREVENT ERRORS:**
|
|
154
|
+
1. **ESCAPING:** Do NOT double-escape quotes in your JSON tool call.
|
|
155
|
+
2. **SIZE LIMIT:** DO NOT write more than 4000 characters in a single call.
|
|
156
|
+
3. **CHUNKING:** For large files, use `mode="w"` for the first chunk and `mode="a"` for the rest.
|
|
157
|
+
|
|
158
|
+
**ARGS:**
|
|
159
|
+
- `path`: Target file path.
|
|
160
|
+
- `content`: Text content to write.
|
|
161
|
+
- `mode`: File opening mode ("w" to overwrite, "a" to append).
|
|
162
|
+
"""
|
|
163
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
164
|
+
try:
|
|
165
|
+
os.makedirs(os.path.dirname(abs_path), exist_ok=True)
|
|
166
|
+
with open(abs_path, mode, encoding="utf-8") as f:
|
|
167
|
+
f.write(content)
|
|
168
|
+
return f"Successfully wrote to {path}"
|
|
169
|
+
except Exception as e:
|
|
170
|
+
return f"Error writing to file {path}: {e}"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def write_files(files: list[dict[str, str]]) -> dict[str, str]:
|
|
174
|
+
"""
|
|
175
|
+
Performs batch write operations to multiple files.
|
|
176
|
+
|
|
177
|
+
**ARGS:**
|
|
178
|
+
- `files`: A list of dictionaries, each containing:
|
|
179
|
+
- `path` (str): Target file path.
|
|
180
|
+
- `content` (str): Text to write.
|
|
181
|
+
- `mode` (str, optional): "w" (overwrite, default) or "a" (append).
|
|
182
|
+
"""
|
|
183
|
+
results = {}
|
|
184
|
+
for file_info in files:
|
|
185
|
+
path = file_info.get("path")
|
|
186
|
+
content = file_info.get("content")
|
|
187
|
+
mode = file_info.get("mode", "w")
|
|
188
|
+
if not path or content is None:
|
|
189
|
+
results[str(path)] = "Error: Missing path or content"
|
|
190
|
+
continue
|
|
191
|
+
results[path] = write_file(path, content, mode)
|
|
192
|
+
return results
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def replace_in_file(path: str, old_text: str, new_text: str, count: int = -1) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Replaces exact text sequences within a file.
|
|
198
|
+
|
|
199
|
+
**CRITICAL INSTRUCTIONS:**
|
|
200
|
+
1. **PRECISION:** `old_text` must match the file content EXACTLY.
|
|
201
|
+
2. **READ FIRST:** Always `read_file` before replacing.
|
|
202
|
+
3. **MINIMAL CONTEXT:** Include 2-3 lines of context in `old_text` to ensure uniqueness.
|
|
203
|
+
|
|
204
|
+
**ARGS:**
|
|
205
|
+
- `path`: Path to the file to modify.
|
|
206
|
+
- `old_text`: The exact literal text to be replaced.
|
|
207
|
+
- `new_text`: The replacement text.
|
|
208
|
+
- `count`: Number of occurrences to replace (default -1 for all).
|
|
209
|
+
"""
|
|
210
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
211
|
+
if not os.path.exists(abs_path):
|
|
212
|
+
return f"Error: File not found: {path}"
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
with open(abs_path, "r", encoding="utf-8") as f:
|
|
216
|
+
content = f.read()
|
|
217
|
+
|
|
218
|
+
if old_text not in content:
|
|
219
|
+
return f"Error: '{old_text}' not found in {path}"
|
|
220
|
+
|
|
221
|
+
new_content = content.replace(old_text, new_text, count)
|
|
222
|
+
|
|
223
|
+
if content == new_content:
|
|
224
|
+
return f"No changes made to {path}"
|
|
225
|
+
|
|
226
|
+
with open(abs_path, "w", encoding="utf-8") as f:
|
|
227
|
+
f.write(new_content)
|
|
228
|
+
return f"Successfully updated {path}"
|
|
229
|
+
except Exception as e:
|
|
230
|
+
return f"Error replacing text in {path}: {e}"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def search_files(
|
|
234
|
+
path: str,
|
|
235
|
+
regex: str,
|
|
236
|
+
file_pattern: str | None = None,
|
|
237
|
+
include_hidden: bool = True,
|
|
238
|
+
) -> dict[str, Any]:
|
|
239
|
+
"""
|
|
240
|
+
Searches for a regular expression pattern within files.
|
|
241
|
+
|
|
242
|
+
**WHEN TO USE:**
|
|
243
|
+
- To find usages of a function, variable, or string across the project.
|
|
244
|
+
|
|
245
|
+
**ARGS:**
|
|
246
|
+
- `path`: Root directory to search.
|
|
247
|
+
- `regex`: A standard Python regular expression.
|
|
248
|
+
- `file_pattern`: Optional glob (e.g., "*.py") to restrict the search.
|
|
249
|
+
- `include_hidden`: Whether to search in hidden files/dirs.
|
|
250
|
+
"""
|
|
251
|
+
try:
|
|
252
|
+
pattern = re.compile(regex)
|
|
253
|
+
except re.error as e:
|
|
254
|
+
return {"error": f"Invalid regex pattern: {e}"}
|
|
255
|
+
|
|
256
|
+
search_results = {"summary": "", "results": []}
|
|
257
|
+
match_count = 0
|
|
258
|
+
searched_file_count = 0
|
|
259
|
+
file_match_count = 0
|
|
260
|
+
|
|
261
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
262
|
+
if not os.path.exists(abs_path):
|
|
263
|
+
return {"error": f"Path not found: {path}"}
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
for root, dirs, files in os.walk(abs_path):
|
|
267
|
+
# Skip hidden directories
|
|
268
|
+
dirs[:] = [d for d in dirs if include_hidden or not d.startswith(".")]
|
|
269
|
+
for filename in files:
|
|
270
|
+
# Skip hidden files
|
|
271
|
+
if not include_hidden and filename.startswith("."):
|
|
272
|
+
continue
|
|
273
|
+
# Apply file pattern filter if provided
|
|
274
|
+
if file_pattern and not fnmatch.fnmatch(filename, file_pattern):
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
file_path = os.path.join(root, filename)
|
|
278
|
+
rel_file_path = os.path.relpath(file_path, os.getcwd())
|
|
279
|
+
searched_file_count += 1
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
matches = _get_file_matches(file_path, pattern)
|
|
283
|
+
if matches:
|
|
284
|
+
file_match_count += 1
|
|
285
|
+
match_count += len(matches)
|
|
286
|
+
search_results["results"].append(
|
|
287
|
+
{"file": rel_file_path, "matches": matches}
|
|
288
|
+
)
|
|
289
|
+
except Exception:
|
|
290
|
+
# Ignore read errors for binary files etc
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
if match_count == 0:
|
|
294
|
+
search_results["summary"] = (
|
|
295
|
+
f"No matches found for pattern '{regex}' in path '{path}' "
|
|
296
|
+
f"(searched {searched_file_count} files)."
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
search_results["summary"] = (
|
|
300
|
+
f"Found {match_count} matches in {file_match_count} files "
|
|
301
|
+
f"(searched {searched_file_count} files)."
|
|
302
|
+
)
|
|
303
|
+
return search_results
|
|
304
|
+
|
|
305
|
+
except Exception as e:
|
|
306
|
+
return {"error": f"Error searching files: {e}"}
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _get_file_matches(
|
|
310
|
+
file_path: str,
|
|
311
|
+
pattern: re.Pattern,
|
|
312
|
+
context_lines: int = 2,
|
|
313
|
+
) -> list[dict[str, any]]:
|
|
314
|
+
"""Search for regex matches in a file with context."""
|
|
315
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
316
|
+
lines = f.readlines()
|
|
317
|
+
matches = []
|
|
318
|
+
for line_idx, line in enumerate(lines):
|
|
319
|
+
if pattern.search(line):
|
|
320
|
+
line_num = line_idx + 1
|
|
321
|
+
context_start = max(0, line_idx - context_lines)
|
|
322
|
+
context_end = min(len(lines), line_idx + context_lines + 1)
|
|
323
|
+
match_data = {
|
|
324
|
+
"line_number": line_num,
|
|
325
|
+
"line_content": line.rstrip(),
|
|
326
|
+
"context_before": [
|
|
327
|
+
lines[j].rstrip() for j in range(context_start, line_idx)
|
|
328
|
+
],
|
|
329
|
+
"context_after": [
|
|
330
|
+
lines[j].rstrip() for j in range(line_idx + 1, context_end)
|
|
331
|
+
],
|
|
332
|
+
}
|
|
333
|
+
matches.append(match_data)
|
|
334
|
+
return matches
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _is_excluded(name: str, patterns: list[str]) -> bool:
|
|
338
|
+
for pattern in patterns:
|
|
339
|
+
if fnmatch.fnmatch(name, pattern):
|
|
340
|
+
return True
|
|
341
|
+
parts = name.split(os.path.sep)
|
|
342
|
+
for part in parts:
|
|
343
|
+
if fnmatch.fnmatch(part, pattern):
|
|
344
|
+
return True
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
async def analyze_file(path: str, query: str) -> str:
|
|
349
|
+
"""
|
|
350
|
+
Delegates deep analysis of a specific file to a specialized sub-agent.
|
|
351
|
+
|
|
352
|
+
**WHEN TO USE:**
|
|
353
|
+
- For complex questions about a file's logic, structure, or potential bugs.
|
|
354
|
+
- When you need a summary or specific details that require "understanding" the code.
|
|
355
|
+
|
|
356
|
+
**NOTE:** For simple data retrieval, use `read_file`.
|
|
357
|
+
|
|
358
|
+
**ARGS:**
|
|
359
|
+
- `path`: Path to the file to analyze.
|
|
360
|
+
- `query`: The specific analytical question or instruction.
|
|
361
|
+
"""
|
|
362
|
+
# Lazy imports to avoid circular dependencies
|
|
363
|
+
from zrb.config.config import CFG
|
|
364
|
+
from zrb.llm.agent.agent import create_agent, run_agent
|
|
365
|
+
from zrb.llm.config.config import llm_config
|
|
366
|
+
from zrb.llm.config.limiter import llm_limiter
|
|
367
|
+
from zrb.llm.prompt.default import get_file_extractor_system_prompt
|
|
368
|
+
|
|
369
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
370
|
+
if not os.path.exists(abs_path):
|
|
371
|
+
return f"Error: File not found: {path}"
|
|
372
|
+
|
|
373
|
+
# Read content
|
|
374
|
+
content = read_file(abs_path)
|
|
375
|
+
if content.startswith("Error:"):
|
|
376
|
+
return content
|
|
377
|
+
|
|
378
|
+
# Check token limit and truncate if necessary
|
|
379
|
+
token_threshold = CFG.LLM_FILE_ANALYSIS_TOKEN_THRESHOLD
|
|
380
|
+
# Simple character-based approximation (1 token ~ 4 chars)
|
|
381
|
+
char_limit = token_threshold * 4
|
|
382
|
+
|
|
383
|
+
clipped_content = content
|
|
384
|
+
if len(content) > char_limit:
|
|
385
|
+
clipped_content = content[:char_limit] + "\n...[TRUNCATED]..."
|
|
386
|
+
|
|
387
|
+
system_prompt = get_file_extractor_system_prompt()
|
|
388
|
+
|
|
389
|
+
# Create the sub-agent
|
|
390
|
+
agent = create_agent(
|
|
391
|
+
model=llm_config.model,
|
|
392
|
+
system_prompt=system_prompt,
|
|
393
|
+
tools=[
|
|
394
|
+
read_file,
|
|
395
|
+
search_files,
|
|
396
|
+
],
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Construct the user message
|
|
400
|
+
user_message = f"""
|
|
401
|
+
Instruction: {query}
|
|
402
|
+
File Path: {abs_path}
|
|
403
|
+
File Content:
|
|
404
|
+
```
|
|
405
|
+
{clipped_content}
|
|
406
|
+
```
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
# Run the agent
|
|
410
|
+
# We pass empty history as this is a fresh sub-task
|
|
411
|
+
# We use print as the print_fn (which streams to stdout)
|
|
412
|
+
result, _ = await run_agent(
|
|
413
|
+
agent=agent,
|
|
414
|
+
message=user_message,
|
|
415
|
+
message_history=[],
|
|
416
|
+
limiter=llm_limiter,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return str(result)
|