zrb 1.21.29__py3-none-any.whl → 2.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zrb might be problematic. Click here for more details.
- zrb/__init__.py +118 -129
- zrb/builtin/__init__.py +54 -2
- zrb/builtin/llm/chat.py +147 -0
- zrb/callback/callback.py +8 -1
- zrb/cmd/cmd_result.py +2 -1
- zrb/config/config.py +491 -280
- zrb/config/helper.py +84 -0
- zrb/config/web_auth_config.py +50 -35
- zrb/context/any_shared_context.py +13 -2
- zrb/context/context.py +31 -3
- zrb/context/print_fn.py +13 -0
- zrb/context/shared_context.py +14 -1
- zrb/input/option_input.py +30 -2
- zrb/llm/agent/__init__.py +9 -0
- zrb/llm/agent/agent.py +215 -0
- zrb/llm/agent/summarizer.py +20 -0
- zrb/llm/app/__init__.py +10 -0
- zrb/llm/app/completion.py +281 -0
- zrb/llm/app/confirmation/allow_tool.py +66 -0
- zrb/llm/app/confirmation/handler.py +178 -0
- zrb/llm/app/confirmation/replace_confirmation.py +77 -0
- zrb/llm/app/keybinding.py +34 -0
- zrb/llm/app/layout.py +117 -0
- zrb/llm/app/lexer.py +155 -0
- zrb/llm/app/redirection.py +28 -0
- zrb/llm/app/style.py +16 -0
- zrb/llm/app/ui.py +733 -0
- zrb/llm/config/__init__.py +4 -0
- zrb/llm/config/config.py +122 -0
- zrb/llm/config/limiter.py +247 -0
- zrb/llm/history_manager/__init__.py +4 -0
- zrb/llm/history_manager/any_history_manager.py +23 -0
- zrb/llm/history_manager/file_history_manager.py +91 -0
- zrb/llm/history_processor/summarizer.py +108 -0
- zrb/llm/note/__init__.py +3 -0
- zrb/llm/note/manager.py +122 -0
- zrb/llm/prompt/__init__.py +29 -0
- zrb/llm/prompt/claude_compatibility.py +92 -0
- zrb/llm/prompt/compose.py +55 -0
- zrb/llm/prompt/default.py +51 -0
- zrb/llm/prompt/markdown/mandate.md +23 -0
- zrb/llm/prompt/markdown/persona.md +3 -0
- zrb/llm/prompt/markdown/summarizer.md +21 -0
- zrb/llm/prompt/note.py +41 -0
- zrb/llm/prompt/system_context.py +46 -0
- zrb/llm/prompt/zrb.py +41 -0
- zrb/llm/skill/__init__.py +3 -0
- zrb/llm/skill/manager.py +86 -0
- zrb/llm/task/__init__.py +4 -0
- zrb/llm/task/llm_chat_task.py +316 -0
- zrb/llm/task/llm_task.py +245 -0
- zrb/llm/tool/__init__.py +39 -0
- zrb/llm/tool/bash.py +75 -0
- zrb/llm/tool/code.py +266 -0
- zrb/llm/tool/file.py +419 -0
- zrb/llm/tool/note.py +70 -0
- zrb/{builtin/llm → llm}/tool/rag.py +8 -5
- zrb/llm/tool/search/brave.py +53 -0
- zrb/llm/tool/search/searxng.py +47 -0
- zrb/llm/tool/search/serpapi.py +47 -0
- zrb/llm/tool/skill.py +19 -0
- zrb/llm/tool/sub_agent.py +70 -0
- zrb/llm/tool/web.py +97 -0
- zrb/llm/tool/zrb_task.py +66 -0
- zrb/llm/util/attachment.py +101 -0
- zrb/llm/util/prompt.py +104 -0
- zrb/llm/util/stream_response.py +178 -0
- zrb/session/any_session.py +0 -3
- zrb/session/session.py +1 -1
- zrb/task/base/context.py +25 -13
- zrb/task/base/execution.py +52 -47
- zrb/task/base/lifecycle.py +7 -4
- zrb/task/base_task.py +48 -49
- zrb/task/base_trigger.py +4 -1
- zrb/task/cmd_task.py +6 -0
- zrb/task/http_check.py +11 -5
- zrb/task/make_task.py +3 -0
- zrb/task/rsync_task.py +5 -0
- zrb/task/scaffolder.py +7 -4
- zrb/task/scheduler.py +3 -0
- zrb/task/tcp_check.py +6 -4
- zrb/util/ascii_art/art/bee.txt +17 -0
- zrb/util/ascii_art/art/cat.txt +9 -0
- zrb/util/ascii_art/art/ghost.txt +16 -0
- zrb/util/ascii_art/art/panda.txt +17 -0
- zrb/util/ascii_art/art/rose.txt +14 -0
- zrb/util/ascii_art/art/unicorn.txt +15 -0
- zrb/util/ascii_art/banner.py +92 -0
- zrb/util/cli/markdown.py +22 -2
- zrb/util/cmd/command.py +33 -10
- zrb/util/file.py +51 -32
- zrb/util/match.py +78 -0
- zrb/util/run.py +3 -3
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/METADATA +9 -15
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/RECORD +100 -128
- zrb/attr/__init__.py +0 -0
- zrb/builtin/llm/attachment.py +0 -40
- zrb/builtin/llm/chat_completion.py +0 -274
- zrb/builtin/llm/chat_session.py +0 -270
- zrb/builtin/llm/chat_session_cmd.py +0 -288
- zrb/builtin/llm/chat_trigger.py +0 -79
- zrb/builtin/llm/history.py +0 -71
- zrb/builtin/llm/input.py +0 -27
- zrb/builtin/llm/llm_ask.py +0 -269
- zrb/builtin/llm/previous-session.js +0 -21
- zrb/builtin/llm/tool/__init__.py +0 -0
- zrb/builtin/llm/tool/api.py +0 -75
- zrb/builtin/llm/tool/cli.py +0 -52
- zrb/builtin/llm/tool/code.py +0 -236
- zrb/builtin/llm/tool/file.py +0 -560
- zrb/builtin/llm/tool/note.py +0 -84
- zrb/builtin/llm/tool/sub_agent.py +0 -150
- zrb/builtin/llm/tool/web.py +0 -171
- zrb/builtin/project/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/module/template/app_template/module/my_module/service/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/common/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/permission/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/role/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/module/auth/service/user/__init__.py +0 -0
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/schema/__init__.py +0 -0
- zrb/builtin/project/create/__init__.py +0 -0
- zrb/builtin/shell/__init__.py +0 -0
- zrb/builtin/shell/autocomplete/__init__.py +0 -0
- zrb/callback/__init__.py +0 -0
- zrb/cmd/__init__.py +0 -0
- zrb/config/default_prompt/interactive_system_prompt.md +0 -29
- zrb/config/default_prompt/persona.md +0 -1
- zrb/config/default_prompt/summarization_prompt.md +0 -57
- zrb/config/default_prompt/system_prompt.md +0 -38
- zrb/config/llm_config.py +0 -339
- zrb/config/llm_context/config.py +0 -166
- zrb/config/llm_context/config_parser.py +0 -40
- zrb/config/llm_context/workflow.py +0 -81
- zrb/config/llm_rate_limitter.py +0 -190
- zrb/content_transformer/__init__.py +0 -0
- zrb/context/__init__.py +0 -0
- zrb/dot_dict/__init__.py +0 -0
- zrb/env/__init__.py +0 -0
- zrb/group/__init__.py +0 -0
- zrb/input/__init__.py +0 -0
- zrb/runner/__init__.py +0 -0
- zrb/runner/web_route/__init__.py +0 -0
- zrb/runner/web_route/home_page/__init__.py +0 -0
- zrb/session/__init__.py +0 -0
- zrb/session_state_log/__init__.py +0 -0
- zrb/session_state_logger/__init__.py +0 -0
- zrb/task/__init__.py +0 -0
- zrb/task/base/__init__.py +0 -0
- zrb/task/llm/__init__.py +0 -0
- zrb/task/llm/agent.py +0 -204
- zrb/task/llm/agent_runner.py +0 -152
- zrb/task/llm/config.py +0 -122
- zrb/task/llm/conversation_history.py +0 -209
- zrb/task/llm/conversation_history_model.py +0 -67
- zrb/task/llm/default_workflow/coding/workflow.md +0 -41
- zrb/task/llm/default_workflow/copywriting/workflow.md +0 -68
- zrb/task/llm/default_workflow/git/workflow.md +0 -118
- zrb/task/llm/default_workflow/golang/workflow.md +0 -128
- zrb/task/llm/default_workflow/html-css/workflow.md +0 -135
- zrb/task/llm/default_workflow/java/workflow.md +0 -146
- zrb/task/llm/default_workflow/javascript/workflow.md +0 -158
- zrb/task/llm/default_workflow/python/workflow.md +0 -160
- zrb/task/llm/default_workflow/researching/workflow.md +0 -153
- zrb/task/llm/default_workflow/rust/workflow.md +0 -162
- zrb/task/llm/default_workflow/shell/workflow.md +0 -299
- zrb/task/llm/error.py +0 -95
- zrb/task/llm/file_replacement.py +0 -206
- zrb/task/llm/file_tool_model.py +0 -57
- zrb/task/llm/history_processor.py +0 -206
- zrb/task/llm/history_summarization.py +0 -25
- zrb/task/llm/print_node.py +0 -221
- zrb/task/llm/prompt.py +0 -321
- zrb/task/llm/subagent_conversation_history.py +0 -41
- zrb/task/llm/tool_wrapper.py +0 -361
- zrb/task/llm/typing.py +0 -3
- zrb/task/llm/workflow.py +0 -76
- zrb/task/llm_task.py +0 -379
- zrb/task_status/__init__.py +0 -0
- zrb/util/__init__.py +0 -0
- zrb/util/cli/__init__.py +0 -0
- zrb/util/cmd/__init__.py +0 -0
- zrb/util/codemod/__init__.py +0 -0
- zrb/util/string/__init__.py +0 -0
- zrb/xcom/__init__.py +0 -0
- /zrb/{config/default_prompt/file_extractor_system_prompt.md → llm/prompt/markdown/file_extractor.md} +0 -0
- /zrb/{config/default_prompt/repo_extractor_system_prompt.md → llm/prompt/markdown/repo_extractor.md} +0 -0
- /zrb/{config/default_prompt/repo_summarizer_system_prompt.md → llm/prompt/markdown/repo_summarizer.md} +0 -0
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/WHEEL +0 -0
- {zrb-1.21.29.dist-info → zrb-2.0.0a4.dist-info}/entry_points.txt +0 -0
zrb/config/llm_rate_limitter.py
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import json
|
|
3
|
-
import time
|
|
4
|
-
from collections import deque
|
|
5
|
-
from typing import Any, Callable
|
|
6
|
-
|
|
7
|
-
from zrb.config.config import CFG
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class LLMRateLimitter:
|
|
11
|
-
"""
|
|
12
|
-
Helper class to enforce LLM API rate limits and throttling.
|
|
13
|
-
Tracks requests and tokens in a rolling 60-second window.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
def __init__(
|
|
17
|
-
self,
|
|
18
|
-
max_requests_per_minute: int | None = None,
|
|
19
|
-
max_tokens_per_minute: int | None = None,
|
|
20
|
-
max_tokens_per_request: int | None = None,
|
|
21
|
-
max_tokens_per_tool_call_result: int | None = None,
|
|
22
|
-
throttle_sleep: float | None = None,
|
|
23
|
-
use_tiktoken: bool | None = None,
|
|
24
|
-
tiktoken_encoding_name: str | None = None,
|
|
25
|
-
):
|
|
26
|
-
self._max_requests_per_minute = max_requests_per_minute
|
|
27
|
-
self._max_tokens_per_minute = max_tokens_per_minute
|
|
28
|
-
self._max_tokens_per_request = max_tokens_per_request
|
|
29
|
-
self._max_tokens_per_tool_call_result = max_tokens_per_tool_call_result
|
|
30
|
-
self._throttle_sleep = throttle_sleep
|
|
31
|
-
self._use_tiktoken = use_tiktoken
|
|
32
|
-
self._tiktoken_encoding_name = tiktoken_encoding_name
|
|
33
|
-
self.request_times = deque()
|
|
34
|
-
self.token_times = deque()
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def max_requests_per_minute(self) -> int:
|
|
38
|
-
if self._max_requests_per_minute is not None:
|
|
39
|
-
return self._max_requests_per_minute
|
|
40
|
-
return CFG.LLM_MAX_REQUESTS_PER_MINUTE
|
|
41
|
-
|
|
42
|
-
@property
|
|
43
|
-
def max_tokens_per_minute(self) -> int:
|
|
44
|
-
if self._max_tokens_per_minute is not None:
|
|
45
|
-
return self._max_tokens_per_minute
|
|
46
|
-
return CFG.LLM_MAX_TOKENS_PER_MINUTE
|
|
47
|
-
|
|
48
|
-
@property
|
|
49
|
-
def max_tokens_per_request(self) -> int:
|
|
50
|
-
if self._max_tokens_per_request is not None:
|
|
51
|
-
return self._max_tokens_per_request
|
|
52
|
-
return CFG.LLM_MAX_TOKENS_PER_REQUEST
|
|
53
|
-
|
|
54
|
-
@property
|
|
55
|
-
def max_tokens_per_tool_call_result(self) -> int:
|
|
56
|
-
if self._max_tokens_per_tool_call_result is not None:
|
|
57
|
-
return self._max_tokens_per_tool_call_result
|
|
58
|
-
return CFG.LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT
|
|
59
|
-
|
|
60
|
-
@property
|
|
61
|
-
def throttle_sleep(self) -> float:
|
|
62
|
-
if self._throttle_sleep is not None:
|
|
63
|
-
return self._throttle_sleep
|
|
64
|
-
return CFG.LLM_THROTTLE_SLEEP
|
|
65
|
-
|
|
66
|
-
@property
|
|
67
|
-
def use_tiktoken(self) -> bool:
|
|
68
|
-
if self._use_tiktoken is not None:
|
|
69
|
-
return self._use_tiktoken
|
|
70
|
-
return CFG.USE_TIKTOKEN
|
|
71
|
-
|
|
72
|
-
@property
|
|
73
|
-
def tiktoken_encoding_name(self) -> str:
|
|
74
|
-
if self._tiktoken_encoding_name is not None:
|
|
75
|
-
return self._tiktoken_encoding_name
|
|
76
|
-
return CFG.TIKTOKEN_ENCODING_NAME
|
|
77
|
-
|
|
78
|
-
def set_max_requests_per_minute(self, value: int):
|
|
79
|
-
self._max_requests_per_minute = value
|
|
80
|
-
|
|
81
|
-
def set_max_tokens_per_minute(self, value: int):
|
|
82
|
-
self._max_tokens_per_minute = value
|
|
83
|
-
|
|
84
|
-
def set_max_tokens_per_request(self, value: int):
|
|
85
|
-
self._max_tokens_per_request = value
|
|
86
|
-
|
|
87
|
-
def set_max_tokens_per_tool_call_result(self, value: int):
|
|
88
|
-
self._max_tokens_per_tool_call_result = value
|
|
89
|
-
|
|
90
|
-
def set_throttle_sleep(self, value: float):
|
|
91
|
-
self._throttle_sleep = value
|
|
92
|
-
|
|
93
|
-
def count_token(self, prompt: Any) -> int:
|
|
94
|
-
str_prompt = self._prompt_to_str(prompt)
|
|
95
|
-
if not self.use_tiktoken:
|
|
96
|
-
return self._fallback_count_token(str_prompt)
|
|
97
|
-
try:
|
|
98
|
-
import tiktoken
|
|
99
|
-
|
|
100
|
-
enc = tiktoken.get_encoding(self.tiktoken_encoding_name)
|
|
101
|
-
return len(enc.encode(str_prompt))
|
|
102
|
-
except Exception:
|
|
103
|
-
return self._fallback_count_token(str_prompt)
|
|
104
|
-
|
|
105
|
-
def _fallback_count_token(self, str_prompt: str) -> int:
|
|
106
|
-
return len(str_prompt) // 4
|
|
107
|
-
|
|
108
|
-
def clip_prompt(self, prompt: Any, limit: int) -> str:
|
|
109
|
-
str_prompt = self._prompt_to_str(prompt)
|
|
110
|
-
if not self.use_tiktoken:
|
|
111
|
-
return self._fallback_clip_prompt(str_prompt, limit)
|
|
112
|
-
try:
|
|
113
|
-
import tiktoken
|
|
114
|
-
|
|
115
|
-
enc = tiktoken.get_encoding(self.tiktoken_encoding_name)
|
|
116
|
-
tokens = enc.encode(str_prompt)
|
|
117
|
-
if len(tokens) <= limit:
|
|
118
|
-
return str_prompt
|
|
119
|
-
truncated = tokens[: limit - 3]
|
|
120
|
-
clipped_text = enc.decode(truncated)
|
|
121
|
-
return clipped_text + "..."
|
|
122
|
-
except Exception:
|
|
123
|
-
return self._fallback_clip_prompt(str_prompt, limit)
|
|
124
|
-
|
|
125
|
-
def _fallback_clip_prompt(self, str_prompt: str, limit: int) -> str:
|
|
126
|
-
char_limit = limit * 4 if limit * 4 <= 10 else limit * 4 - 10
|
|
127
|
-
return str_prompt[:char_limit] + "..."
|
|
128
|
-
|
|
129
|
-
async def throttle(
|
|
130
|
-
self,
|
|
131
|
-
prompt: Any,
|
|
132
|
-
throttle_notif_callback: Callable[[str], Any] | None = None,
|
|
133
|
-
):
|
|
134
|
-
now = time.time()
|
|
135
|
-
str_prompt = self._prompt_to_str(prompt)
|
|
136
|
-
tokens = self.count_token(str_prompt)
|
|
137
|
-
# Clean up old entries
|
|
138
|
-
while self.request_times and now - self.request_times[0] > 60:
|
|
139
|
-
self.request_times.popleft()
|
|
140
|
-
while self.token_times and now - self.token_times[0][0] > 60:
|
|
141
|
-
self.token_times.popleft()
|
|
142
|
-
# Check per-request token limit
|
|
143
|
-
if tokens > self.max_tokens_per_request:
|
|
144
|
-
raise ValueError(
|
|
145
|
-
(
|
|
146
|
-
"Request exceeds max_tokens_per_request "
|
|
147
|
-
f"({tokens} > {self.max_tokens_per_request})."
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
if tokens > self.max_tokens_per_minute:
|
|
151
|
-
raise ValueError(
|
|
152
|
-
(
|
|
153
|
-
"Request exceeds max_tokens_per_minute "
|
|
154
|
-
f"({tokens} > {self.max_tokens_per_minute})."
|
|
155
|
-
)
|
|
156
|
-
)
|
|
157
|
-
# Wait if over per-minute request or token limit
|
|
158
|
-
while (
|
|
159
|
-
len(self.request_times) >= self.max_requests_per_minute
|
|
160
|
-
or sum(t for _, t in self.token_times) + tokens > self.max_tokens_per_minute
|
|
161
|
-
):
|
|
162
|
-
if throttle_notif_callback is not None:
|
|
163
|
-
if len(self.request_times) >= self.max_requests_per_minute:
|
|
164
|
-
rpm = len(self.request_times)
|
|
165
|
-
throttle_notif_callback(
|
|
166
|
-
f"Max request per minute exceeded: {rpm} of {self.max_requests_per_minute}"
|
|
167
|
-
)
|
|
168
|
-
else:
|
|
169
|
-
tpm = sum(t for _, t in self.token_times) + tokens
|
|
170
|
-
throttle_notif_callback(
|
|
171
|
-
f"Max token per minute exceeded: {tpm} of {self.max_tokens_per_minute}"
|
|
172
|
-
)
|
|
173
|
-
await asyncio.sleep(self.throttle_sleep)
|
|
174
|
-
now = time.time()
|
|
175
|
-
while self.request_times and now - self.request_times[0] > 60:
|
|
176
|
-
self.request_times.popleft()
|
|
177
|
-
while self.token_times and now - self.token_times[0][0] > 60:
|
|
178
|
-
self.token_times.popleft()
|
|
179
|
-
# Record this request
|
|
180
|
-
self.request_times.append(now)
|
|
181
|
-
self.token_times.append((now, tokens))
|
|
182
|
-
|
|
183
|
-
def _prompt_to_str(self, prompt: Any) -> str:
|
|
184
|
-
try:
|
|
185
|
-
return json.dumps(prompt)
|
|
186
|
-
except Exception:
|
|
187
|
-
return f"{prompt}"
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
llm_rate_limitter = LLMRateLimitter()
|
|
File without changes
|
zrb/context/__init__.py
DELETED
|
File without changes
|
zrb/dot_dict/__init__.py
DELETED
|
File without changes
|
zrb/env/__init__.py
DELETED
|
File without changes
|
zrb/group/__init__.py
DELETED
|
File without changes
|
zrb/input/__init__.py
DELETED
|
File without changes
|
zrb/runner/__init__.py
DELETED
|
File without changes
|
zrb/runner/web_route/__init__.py
DELETED
|
File without changes
|
|
File without changes
|
zrb/session/__init__.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
zrb/task/__init__.py
DELETED
|
File without changes
|
zrb/task/base/__init__.py
DELETED
|
File without changes
|
zrb/task/llm/__init__.py
DELETED
|
File without changes
|
zrb/task/llm/agent.py
DELETED
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
import inspect
|
|
2
|
-
from collections.abc import Callable
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
-
|
|
6
|
-
from zrb.config.llm_rate_limitter import LLMRateLimitter
|
|
7
|
-
from zrb.context.any_context import AnyContext
|
|
8
|
-
from zrb.task.llm.history_processor import create_summarize_history_processor
|
|
9
|
-
from zrb.task.llm.tool_wrapper import wrap_func, wrap_tool
|
|
10
|
-
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from pydantic_ai import Agent, Tool
|
|
13
|
-
from pydantic_ai._agent_graph import HistoryProcessor
|
|
14
|
-
from pydantic_ai.models import Model
|
|
15
|
-
from pydantic_ai.output import OutputDataT, OutputSpec
|
|
16
|
-
from pydantic_ai.settings import ModelSettings
|
|
17
|
-
from pydantic_ai.toolsets import AbstractToolset
|
|
18
|
-
|
|
19
|
-
ToolOrCallable = Tool | Callable
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def create_agent_instance(
|
|
23
|
-
ctx: AnyContext,
|
|
24
|
-
model: "str | Model",
|
|
25
|
-
rate_limitter: LLMRateLimitter | None = None,
|
|
26
|
-
output_type: "OutputSpec[OutputDataT]" = str,
|
|
27
|
-
system_prompt: str = "",
|
|
28
|
-
model_settings: "ModelSettings | None" = None,
|
|
29
|
-
tools: list["ToolOrCallable"] = [],
|
|
30
|
-
toolsets: list["AbstractToolset[None]"] = [],
|
|
31
|
-
retries: int = 3,
|
|
32
|
-
yolo_mode: bool | list[str] | None = None,
|
|
33
|
-
summarization_model: "Model | str | None" = None,
|
|
34
|
-
summarization_model_settings: "ModelSettings | None" = None,
|
|
35
|
-
summarization_system_prompt: str | None = None,
|
|
36
|
-
summarization_retries: int = 2,
|
|
37
|
-
summarization_token_threshold: int | None = None,
|
|
38
|
-
history_processors: list["HistoryProcessor"] | None = None,
|
|
39
|
-
auto_summarize: bool = True,
|
|
40
|
-
) -> "Agent[None, Any]":
|
|
41
|
-
"""Creates a new Agent instance with configured tools and servers."""
|
|
42
|
-
from pydantic_ai import Agent, RunContext, Tool
|
|
43
|
-
from pydantic_ai.tools import GenerateToolJsonSchema
|
|
44
|
-
from pydantic_ai.toolsets import ToolsetTool, WrapperToolset
|
|
45
|
-
|
|
46
|
-
@dataclass
|
|
47
|
-
class ConfirmationWrapperToolset(WrapperToolset):
|
|
48
|
-
ctx: AnyContext
|
|
49
|
-
yolo_mode: bool | list[str]
|
|
50
|
-
|
|
51
|
-
async def call_tool(
|
|
52
|
-
self, name: str, tool_args: dict, ctx: RunContext, tool: ToolsetTool[None]
|
|
53
|
-
) -> Any:
|
|
54
|
-
# The `tool` object is passed in. Use it for inspection.
|
|
55
|
-
# Define a temporary function that performs the actual tool call.
|
|
56
|
-
async def execute_delegated_tool_call(**params):
|
|
57
|
-
# Pass all arguments down the chain.
|
|
58
|
-
return await self.wrapped.call_tool(name, tool_args, ctx, tool)
|
|
59
|
-
|
|
60
|
-
# For the confirmation UI, make our temporary function look like the real one.
|
|
61
|
-
try:
|
|
62
|
-
execute_delegated_tool_call.__name__ = name
|
|
63
|
-
execute_delegated_tool_call.__doc__ = tool.function.__doc__
|
|
64
|
-
execute_delegated_tool_call.__signature__ = inspect.signature(
|
|
65
|
-
tool.function
|
|
66
|
-
)
|
|
67
|
-
except (AttributeError, TypeError):
|
|
68
|
-
pass # Ignore if we can't inspect the original function
|
|
69
|
-
# Use the existing wrap_func to get the confirmation logic
|
|
70
|
-
wrapped_executor = wrap_func(
|
|
71
|
-
execute_delegated_tool_call, self.ctx, self.yolo_mode
|
|
72
|
-
)
|
|
73
|
-
# Call the wrapped executor. This will trigger the confirmation prompt.
|
|
74
|
-
return await wrapped_executor(**tool_args)
|
|
75
|
-
|
|
76
|
-
if yolo_mode is None:
|
|
77
|
-
yolo_mode = False
|
|
78
|
-
# Normalize tools
|
|
79
|
-
tool_list = []
|
|
80
|
-
for tool_or_callable in tools:
|
|
81
|
-
if isinstance(tool_or_callable, Tool):
|
|
82
|
-
tool_list.append(tool_or_callable)
|
|
83
|
-
# Update tool's function
|
|
84
|
-
tool = tool_or_callable
|
|
85
|
-
tool_list.append(
|
|
86
|
-
Tool(
|
|
87
|
-
function=wrap_func(tool.function, ctx, yolo_mode),
|
|
88
|
-
takes_ctx=tool.takes_ctx,
|
|
89
|
-
max_retries=tool.max_retries,
|
|
90
|
-
name=tool.name,
|
|
91
|
-
description=tool.description,
|
|
92
|
-
prepare=tool.prepare,
|
|
93
|
-
docstring_format=tool.docstring_format,
|
|
94
|
-
require_parameter_descriptions=tool.require_parameter_descriptions,
|
|
95
|
-
schema_generator=GenerateToolJsonSchema,
|
|
96
|
-
strict=tool.strict,
|
|
97
|
-
)
|
|
98
|
-
)
|
|
99
|
-
else:
|
|
100
|
-
# Turn function into tool
|
|
101
|
-
tool_list.append(wrap_tool(tool_or_callable, ctx, yolo_mode))
|
|
102
|
-
# Wrap toolsets
|
|
103
|
-
wrapped_toolsets = [
|
|
104
|
-
ConfirmationWrapperToolset(wrapped=toolset, ctx=ctx, yolo_mode=yolo_mode)
|
|
105
|
-
for toolset in toolsets
|
|
106
|
-
]
|
|
107
|
-
# Create History processor with summarizer
|
|
108
|
-
history_processors = [] if history_processors is None else history_processors
|
|
109
|
-
if auto_summarize:
|
|
110
|
-
history_processors += [
|
|
111
|
-
create_summarize_history_processor(
|
|
112
|
-
ctx=ctx,
|
|
113
|
-
system_prompt=system_prompt,
|
|
114
|
-
rate_limitter=rate_limitter,
|
|
115
|
-
summarization_model=summarization_model,
|
|
116
|
-
summarization_model_settings=summarization_model_settings,
|
|
117
|
-
summarization_system_prompt=summarization_system_prompt,
|
|
118
|
-
summarization_token_threshold=summarization_token_threshold,
|
|
119
|
-
summarization_retries=summarization_retries,
|
|
120
|
-
)
|
|
121
|
-
]
|
|
122
|
-
# Return Agent
|
|
123
|
-
return Agent[None, Any](
|
|
124
|
-
model=model,
|
|
125
|
-
output_type=output_type,
|
|
126
|
-
instructions=system_prompt,
|
|
127
|
-
tools=tool_list,
|
|
128
|
-
toolsets=wrapped_toolsets,
|
|
129
|
-
model_settings=model_settings,
|
|
130
|
-
retries=retries,
|
|
131
|
-
history_processors=history_processors,
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def get_agent(
|
|
136
|
-
ctx: AnyContext,
|
|
137
|
-
model: "str | Model",
|
|
138
|
-
rate_limitter: LLMRateLimitter | None = None,
|
|
139
|
-
output_type: "OutputSpec[OutputDataT]" = str,
|
|
140
|
-
system_prompt: str = "",
|
|
141
|
-
model_settings: "ModelSettings | None" = None,
|
|
142
|
-
tools_attr: (
|
|
143
|
-
"list[ToolOrCallable] | Callable[[AnyContext], list[ToolOrCallable]]"
|
|
144
|
-
) = [],
|
|
145
|
-
additional_tools: "list[ToolOrCallable]" = [],
|
|
146
|
-
toolsets_attr: "list[AbstractToolset[None] | str] | Callable[[AnyContext], list[AbstractToolset[None] | str]]" = [], # noqa
|
|
147
|
-
additional_toolsets: "list[AbstractToolset[None] | str]" = [],
|
|
148
|
-
retries: int = 3,
|
|
149
|
-
yolo_mode: bool | list[str] | None = None,
|
|
150
|
-
summarization_model: "Model | str | None" = None,
|
|
151
|
-
summarization_model_settings: "ModelSettings | None" = None,
|
|
152
|
-
summarization_system_prompt: str | None = None,
|
|
153
|
-
summarization_retries: int = 2,
|
|
154
|
-
summarization_token_threshold: int | None = None,
|
|
155
|
-
history_processors: list["HistoryProcessor"] | None = None,
|
|
156
|
-
) -> "Agent":
|
|
157
|
-
"""Retrieves the configured Agent instance or creates one if necessary."""
|
|
158
|
-
# Get tools for agent
|
|
159
|
-
tools = list(tools_attr(ctx) if callable(tools_attr) else tools_attr)
|
|
160
|
-
tools.extend(additional_tools)
|
|
161
|
-
# Get Toolsets for agent
|
|
162
|
-
toolset_or_str_list = list(
|
|
163
|
-
toolsets_attr(ctx) if callable(toolsets_attr) else toolsets_attr
|
|
164
|
-
)
|
|
165
|
-
toolset_or_str_list.extend(additional_toolsets)
|
|
166
|
-
toolsets = _render_toolset_or_str_list(ctx, toolset_or_str_list)
|
|
167
|
-
# If no agent provided, create one using the configuration
|
|
168
|
-
return create_agent_instance(
|
|
169
|
-
ctx=ctx,
|
|
170
|
-
model=model,
|
|
171
|
-
rate_limitter=rate_limitter,
|
|
172
|
-
output_type=output_type,
|
|
173
|
-
system_prompt=system_prompt,
|
|
174
|
-
tools=tools,
|
|
175
|
-
toolsets=toolsets,
|
|
176
|
-
model_settings=model_settings,
|
|
177
|
-
retries=retries,
|
|
178
|
-
yolo_mode=yolo_mode,
|
|
179
|
-
summarization_model=summarization_model,
|
|
180
|
-
summarization_model_settings=summarization_model_settings,
|
|
181
|
-
summarization_system_prompt=summarization_system_prompt,
|
|
182
|
-
summarization_retries=summarization_retries,
|
|
183
|
-
summarization_token_threshold=summarization_token_threshold,
|
|
184
|
-
history_processors=history_processors,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def _render_toolset_or_str_list(
|
|
189
|
-
ctx: AnyContext, toolset_or_str_list: list["AbstractToolset[None] | str"]
|
|
190
|
-
) -> list["AbstractToolset[None]"]:
|
|
191
|
-
from pydantic_ai.mcp import load_mcp_servers
|
|
192
|
-
|
|
193
|
-
toolsets = []
|
|
194
|
-
for toolset_or_str in toolset_or_str_list:
|
|
195
|
-
if isinstance(toolset_or_str, str):
|
|
196
|
-
try:
|
|
197
|
-
servers = load_mcp_servers(toolset_or_str)
|
|
198
|
-
for server in servers:
|
|
199
|
-
toolsets.append(server)
|
|
200
|
-
except Exception as e:
|
|
201
|
-
ctx.log_error(f"Invalid MCP Config {toolset_or_str}: {e}")
|
|
202
|
-
continue
|
|
203
|
-
toolsets.append(toolset_or_str)
|
|
204
|
-
return toolsets
|
zrb/task/llm/agent_runner.py
DELETED
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from collections.abc import Callable
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
4
|
-
|
|
5
|
-
from zrb.config.llm_rate_limitter import LLMRateLimitter, llm_rate_limitter
|
|
6
|
-
from zrb.context.any_context import AnyContext
|
|
7
|
-
from zrb.task.llm.error import extract_api_error_details
|
|
8
|
-
from zrb.task.llm.print_node import print_node
|
|
9
|
-
from zrb.task.llm.typing import ListOfDict
|
|
10
|
-
from zrb.util.cli.style import stylize_faint
|
|
11
|
-
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from pydantic_ai import Agent, Tool
|
|
14
|
-
from pydantic_ai.agent import AgentRun
|
|
15
|
-
from pydantic_ai.messages import UserContent
|
|
16
|
-
|
|
17
|
-
ToolOrCallable = Tool | Callable
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
async def run_agent_iteration(
|
|
21
|
-
ctx: AnyContext,
|
|
22
|
-
agent: "Agent[None, Any]",
|
|
23
|
-
user_prompt: str,
|
|
24
|
-
attachments: "list[UserContent] | None" = None,
|
|
25
|
-
history_list: ListOfDict | None = None,
|
|
26
|
-
rate_limitter: LLMRateLimitter | None = None,
|
|
27
|
-
max_retry: int = 2,
|
|
28
|
-
log_indent_level: int = 0,
|
|
29
|
-
) -> "AgentRun":
|
|
30
|
-
"""
|
|
31
|
-
Runs a single iteration of the agent execution loop.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
ctx: The task context.
|
|
35
|
-
agent: The Pydantic AI agent instance.
|
|
36
|
-
user_prompt: The user's input prompt.
|
|
37
|
-
history_list: The current conversation history.
|
|
38
|
-
|
|
39
|
-
Returns:
|
|
40
|
-
The agent run result object.
|
|
41
|
-
|
|
42
|
-
Raises:
|
|
43
|
-
Exception: If any error occurs during agent execution.
|
|
44
|
-
"""
|
|
45
|
-
if max_retry < 0:
|
|
46
|
-
raise ValueError("Max retry cannot be less than 0")
|
|
47
|
-
attempt = 0
|
|
48
|
-
while attempt < max_retry:
|
|
49
|
-
try:
|
|
50
|
-
return await _run_single_agent_iteration(
|
|
51
|
-
ctx=ctx,
|
|
52
|
-
agent=agent,
|
|
53
|
-
user_prompt=user_prompt,
|
|
54
|
-
attachments=[] if attachments is None else attachments,
|
|
55
|
-
history_list=[] if history_list is None else history_list,
|
|
56
|
-
rate_limitter=(
|
|
57
|
-
llm_rate_limitter if rate_limitter is None else rate_limitter
|
|
58
|
-
),
|
|
59
|
-
log_indent_level=log_indent_level,
|
|
60
|
-
)
|
|
61
|
-
except BaseException:
|
|
62
|
-
attempt += 1
|
|
63
|
-
if attempt == max_retry:
|
|
64
|
-
raise
|
|
65
|
-
raise Exception("Max retry exceeded")
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
async def _run_single_agent_iteration(
|
|
69
|
-
ctx: AnyContext,
|
|
70
|
-
agent: "Agent",
|
|
71
|
-
user_prompt: str,
|
|
72
|
-
attachments: "list[UserContent]",
|
|
73
|
-
history_list: ListOfDict,
|
|
74
|
-
rate_limitter: LLMRateLimitter,
|
|
75
|
-
log_indent_level: int,
|
|
76
|
-
) -> "AgentRun":
|
|
77
|
-
from openai import APIError
|
|
78
|
-
from pydantic_ai import UsageLimits
|
|
79
|
-
from pydantic_ai.messages import ModelMessagesTypeAdapter
|
|
80
|
-
|
|
81
|
-
agent_payload = _estimate_request_payload(
|
|
82
|
-
agent, user_prompt, attachments, history_list
|
|
83
|
-
)
|
|
84
|
-
callback = _create_print_throttle_notif(ctx)
|
|
85
|
-
if rate_limitter:
|
|
86
|
-
await rate_limitter.throttle(agent_payload, callback)
|
|
87
|
-
else:
|
|
88
|
-
await llm_rate_limitter.throttle(agent_payload, callback)
|
|
89
|
-
user_prompt_with_attachments = [user_prompt] + attachments
|
|
90
|
-
async with agent:
|
|
91
|
-
async with agent.iter(
|
|
92
|
-
user_prompt=user_prompt_with_attachments,
|
|
93
|
-
message_history=ModelMessagesTypeAdapter.validate_python(history_list),
|
|
94
|
-
usage_limits=UsageLimits(request_limit=None), # We don't want limit
|
|
95
|
-
) as agent_run:
|
|
96
|
-
async for node in agent_run:
|
|
97
|
-
# Each node represents a step in the agent's execution
|
|
98
|
-
try:
|
|
99
|
-
await print_node(
|
|
100
|
-
_get_plain_printer(ctx), agent_run, node, log_indent_level
|
|
101
|
-
)
|
|
102
|
-
except APIError as e:
|
|
103
|
-
# Extract detailed error information from the response
|
|
104
|
-
error_details = extract_api_error_details(e)
|
|
105
|
-
ctx.log_error(f"API Error: {error_details}")
|
|
106
|
-
raise
|
|
107
|
-
except Exception as e:
|
|
108
|
-
ctx.log_error(f"Error processing node: {str(e)}")
|
|
109
|
-
ctx.log_error(f"Error type: {type(e).__name__}")
|
|
110
|
-
raise
|
|
111
|
-
return agent_run
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def _create_print_throttle_notif(ctx: AnyContext) -> Callable[[str], None]:
|
|
115
|
-
def _print_throttle_notif(reason: str):
|
|
116
|
-
ctx.print(stylize_faint(f" ⌛>> Request Throttled: {reason}"), plain=True)
|
|
117
|
-
|
|
118
|
-
return _print_throttle_notif
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def _estimate_request_payload(
|
|
122
|
-
agent: "Agent",
|
|
123
|
-
user_prompt: str,
|
|
124
|
-
attachments: "list[UserContent]",
|
|
125
|
-
history_list: ListOfDict,
|
|
126
|
-
) -> str:
|
|
127
|
-
system_prompts = agent._system_prompts if hasattr(agent, "_system_prompts") else ()
|
|
128
|
-
return json.dumps(
|
|
129
|
-
[
|
|
130
|
-
{"role": "system", "content": "\n".join(system_prompts)},
|
|
131
|
-
*history_list,
|
|
132
|
-
{"role": "user", "content": user_prompt},
|
|
133
|
-
*[_estimate_attachment_payload(attachment) for attachment in attachments],
|
|
134
|
-
]
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def _estimate_attachment_payload(attachment: "UserContent") -> Any:
|
|
139
|
-
if hasattr(attachment, "url"):
|
|
140
|
-
return {"role": "user", "content": attachment.url}
|
|
141
|
-
if hasattr(attachment, "data"):
|
|
142
|
-
return {"role": "user", "content": "x" * len(attachment.data)}
|
|
143
|
-
return ""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def _get_plain_printer(ctx: AnyContext):
|
|
147
|
-
def printer(*args, **kwargs):
|
|
148
|
-
if "plain" not in kwargs:
|
|
149
|
-
kwargs["plain"] = True
|
|
150
|
-
return ctx.print(*args, **kwargs)
|
|
151
|
-
|
|
152
|
-
return printer
|