zrb 1.21.9__py3-none-any.whl → 1.21.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zrb might be problematic. Click here for more details.
- zrb/attr/type.py +10 -7
- zrb/builtin/git.py +12 -1
- zrb/builtin/llm/chat_completion.py +287 -0
- zrb/builtin/llm/chat_session_cmd.py +90 -28
- zrb/builtin/llm/chat_trigger.py +6 -1
- zrb/builtin/llm/history.py +4 -4
- zrb/builtin/llm/tool/cli.py +25 -13
- zrb/builtin/llm/tool/code.py +9 -2
- zrb/builtin/llm/tool/file.py +42 -81
- zrb/builtin/llm/tool/note.py +36 -16
- zrb/builtin/llm/tool/search/__init__.py +1 -0
- zrb/builtin/llm/tool/search/brave.py +60 -0
- zrb/builtin/llm/tool/search/searxng.py +55 -0
- zrb/builtin/llm/tool/search/serpapi.py +55 -0
- zrb/builtin/llm/tool/sub_agent.py +30 -10
- zrb/builtin/llm/tool/web.py +12 -72
- zrb/config/config.py +108 -13
- zrb/config/default_prompt/interactive_system_prompt.md +1 -1
- zrb/config/default_prompt/summarization_prompt.md +54 -8
- zrb/config/default_prompt/system_prompt.md +1 -1
- zrb/config/llm_rate_limitter.py +24 -5
- zrb/input/option_input.py +13 -1
- zrb/task/llm/agent.py +42 -144
- zrb/task/llm/agent_runner.py +152 -0
- zrb/task/llm/config.py +7 -5
- zrb/task/llm/conversation_history.py +35 -24
- zrb/task/llm/conversation_history_model.py +4 -11
- zrb/task/llm/default_workflow/coding/workflow.md +2 -3
- zrb/task/llm/file_replacement.py +206 -0
- zrb/task/llm/file_tool_model.py +57 -0
- zrb/task/llm/history_processor.py +206 -0
- zrb/task/llm/history_summarization.py +2 -179
- zrb/task/llm/print_node.py +14 -5
- zrb/task/llm/prompt.py +7 -18
- zrb/task/llm/subagent_conversation_history.py +41 -0
- zrb/task/llm/tool_confirmation_completer.py +41 -0
- zrb/task/llm/tool_wrapper.py +26 -12
- zrb/task/llm_task.py +55 -47
- zrb/util/attr.py +17 -10
- zrb/util/cli/text.py +6 -4
- zrb/util/git.py +2 -2
- zrb/util/yaml.py +1 -0
- zrb/xcom/xcom.py +10 -0
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/METADATA +5 -5
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/RECORD +47 -37
- zrb/task/llm/history_summarization_tool.py +0 -24
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/WHEEL +0 -0
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/entry_points.txt +0 -0
zrb/builtin/llm/tool/web.py
CHANGED
|
@@ -2,6 +2,7 @@ from collections.abc import Callable
|
|
|
2
2
|
from typing import Any
|
|
3
3
|
from urllib.parse import urljoin
|
|
4
4
|
|
|
5
|
+
from zrb.builtin.llm.tool.search import brave, searxng, serpapi
|
|
5
6
|
from zrb.config.config import CFG
|
|
6
7
|
from zrb.config.llm_config import llm_config
|
|
7
8
|
|
|
@@ -30,78 +31,17 @@ async def open_web_page(url: str) -> dict[str, Any]:
|
|
|
30
31
|
def create_search_internet_tool() -> Callable:
|
|
31
32
|
if llm_config.default_search_internet_tool is not None:
|
|
32
33
|
return llm_config.default_search_internet_tool
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
page (int, optional): Search result page number. Defaults to 1.
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
dict: Summary of search results (titles, links, snippets).
|
|
48
|
-
"""
|
|
49
|
-
import requests
|
|
50
|
-
|
|
51
|
-
if (
|
|
52
|
-
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "serpapi"
|
|
53
|
-
and CFG.SERPAPI_KEY != ""
|
|
54
|
-
):
|
|
55
|
-
response = requests.get(
|
|
56
|
-
"https://serpapi.com/search",
|
|
57
|
-
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
|
58
|
-
params={
|
|
59
|
-
"q": query,
|
|
60
|
-
"start": (page - 1) * 10,
|
|
61
|
-
"hl": CFG.SERPAPI_LANG,
|
|
62
|
-
"safe": CFG.SERPAPI_SAFE,
|
|
63
|
-
"api_key": CFG.SERPAPI_KEY,
|
|
64
|
-
},
|
|
65
|
-
)
|
|
66
|
-
elif (
|
|
67
|
-
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "brave"
|
|
68
|
-
and CFG.BRAVE_API_KEY != ""
|
|
69
|
-
):
|
|
70
|
-
response = requests.get(
|
|
71
|
-
"https://api.search.brave.com/res/v1/web/search",
|
|
72
|
-
headers={
|
|
73
|
-
"User-Agent": _DEFAULT_USER_AGENT,
|
|
74
|
-
"Accept": "application/json",
|
|
75
|
-
"x-subscription-token": CFG.BRAVE_API_KEY,
|
|
76
|
-
},
|
|
77
|
-
params={
|
|
78
|
-
"q": query,
|
|
79
|
-
"count": "10",
|
|
80
|
-
"offset": (page - 1) * 10,
|
|
81
|
-
"safesearch": CFG.BRAVE_API_SAFE,
|
|
82
|
-
"search_lang": CFG.BRAVE_API_LANG,
|
|
83
|
-
"summary": "true",
|
|
84
|
-
},
|
|
85
|
-
)
|
|
86
|
-
else:
|
|
87
|
-
response = requests.get(
|
|
88
|
-
url=f"{CFG.SEARXNG_BASE_URL}/search",
|
|
89
|
-
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
|
90
|
-
params={
|
|
91
|
-
"q": query,
|
|
92
|
-
"format": "json",
|
|
93
|
-
"pageno": page,
|
|
94
|
-
"safesearch": CFG.SEARXNG_SAFE,
|
|
95
|
-
"language": CFG.SEARXNG_LANG,
|
|
96
|
-
},
|
|
97
|
-
)
|
|
98
|
-
if response.status_code != 200:
|
|
99
|
-
raise Exception(
|
|
100
|
-
f"Error: Unable to retrieve search results (status code: {response.status_code})" # noqa
|
|
101
|
-
)
|
|
102
|
-
return response.json()
|
|
103
|
-
|
|
104
|
-
return search_internet
|
|
34
|
+
if (
|
|
35
|
+
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "serpapi"
|
|
36
|
+
and CFG.SERPAPI_KEY != ""
|
|
37
|
+
):
|
|
38
|
+
return serpapi.search_internet
|
|
39
|
+
if (
|
|
40
|
+
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "brave"
|
|
41
|
+
and CFG.BRAVE_API_KEY != ""
|
|
42
|
+
):
|
|
43
|
+
return brave.search_internet
|
|
44
|
+
return searxng.search_internet
|
|
105
45
|
|
|
106
46
|
|
|
107
47
|
async def _fetch_page_content(url: str) -> tuple[str, list[str]]:
|
zrb/config/config.py
CHANGED
|
@@ -28,8 +28,13 @@ class Config:
|
|
|
28
28
|
def ENV_PREFIX(self) -> str:
|
|
29
29
|
return os.getenv("_ZRB_ENV_PREFIX", "ZRB")
|
|
30
30
|
|
|
31
|
-
def _getenv(self, env_name: str, default: str = "") -> str:
|
|
32
|
-
|
|
31
|
+
def _getenv(self, env_name: str | list[str], default: str = "") -> str:
|
|
32
|
+
env_name_list = env_name if isinstance(env_name, list) else [env_name]
|
|
33
|
+
for env_name in env_name_list:
|
|
34
|
+
value = os.getenv(f"{self.ENV_PREFIX}_{env_name}", None)
|
|
35
|
+
if value is not None:
|
|
36
|
+
return value
|
|
37
|
+
return default
|
|
33
38
|
|
|
34
39
|
def _get_internal_default_prompt(self, name: str) -> str:
|
|
35
40
|
if name not in self.__internal_default_prompt:
|
|
@@ -60,6 +65,38 @@ class Config:
|
|
|
60
65
|
def DEFAULT_EDITOR(self) -> str:
|
|
61
66
|
return self._getenv("EDITOR", "nano")
|
|
62
67
|
|
|
68
|
+
@property
|
|
69
|
+
def DEFAULT_DIFF_EDIT_COMMAND_TPL(self) -> str:
|
|
70
|
+
return self._getenv("DIFF_EDIT_COMMAND", self._get_default_diff_edit_command())
|
|
71
|
+
|
|
72
|
+
def _get_default_diff_edit_command(self) -> str:
|
|
73
|
+
editor = self.DEFAULT_EDITOR
|
|
74
|
+
if editor in [
|
|
75
|
+
"code",
|
|
76
|
+
"vscode",
|
|
77
|
+
"vscodium",
|
|
78
|
+
"windsurf",
|
|
79
|
+
"cursor",
|
|
80
|
+
"zed",
|
|
81
|
+
"zeditor",
|
|
82
|
+
"agy",
|
|
83
|
+
]:
|
|
84
|
+
return f"{editor} --wait --diff {{old}} {{new}}"
|
|
85
|
+
if editor == "emacs":
|
|
86
|
+
return 'emacs --eval \'(ediff-files "{old}" "{new}")\''
|
|
87
|
+
if editor in ["nvim", "vim"]:
|
|
88
|
+
return (
|
|
89
|
+
f"{editor} -d {{old}} {{new}} "
|
|
90
|
+
"-i NONE "
|
|
91
|
+
'-c "wincmd h | set readonly | wincmd l" '
|
|
92
|
+
'-c "highlight DiffAdd cterm=bold ctermbg=22 guibg=#005f00 | highlight DiffChange cterm=bold ctermbg=24 guibg=#005f87 | highlight DiffText ctermbg=21 guibg=#0000af | highlight DiffDelete ctermbg=52 guibg=#5f0000" ' # noqa
|
|
93
|
+
'-c "set showtabline=2 | set tabline=[Instructions]\\ :wqa(save\\ &\\ quit)\\ \\|\\ i/esc(toggle\\ edit\\ mode)" ' # noqa
|
|
94
|
+
'-c "wincmd h | setlocal statusline=OLD\\ FILE" '
|
|
95
|
+
'-c "wincmd l | setlocal statusline=%#StatusBold#NEW\\ FILE\\ :wqa(save\\ &\\ quit)\\ \\|\\ i/esc(toggle\\ edit\\ mode)" ' # noqa
|
|
96
|
+
'-c "autocmd BufWritePost * wqa"'
|
|
97
|
+
)
|
|
98
|
+
return 'vimdiff {old} {new} +"setlocal ro" +"wincmd l" +"autocmd BufWritePost <buffer> qa"' # noqa
|
|
99
|
+
|
|
63
100
|
@property
|
|
64
101
|
def INIT_MODULES(self) -> list[str]:
|
|
65
102
|
init_modules_str = self._getenv("INIT_MODULES", "")
|
|
@@ -287,7 +324,9 @@ class Config:
|
|
|
287
324
|
@property
|
|
288
325
|
def LLM_BUILTIN_WORKFLOW_PATHS(self) -> list[str]:
|
|
289
326
|
"""Get a list of additional builtin workflow paths from environment variables."""
|
|
290
|
-
builtin_workflow_paths_str = self._getenv(
|
|
327
|
+
builtin_workflow_paths_str = self._getenv(
|
|
328
|
+
["LLM_BUILTIN_WORFKLOW_PATH", "LLM_BUILTIN_WORKFLOW_PATHS"], ""
|
|
329
|
+
)
|
|
291
330
|
if builtin_workflow_paths_str != "":
|
|
292
331
|
return [
|
|
293
332
|
path.strip()
|
|
@@ -306,13 +345,21 @@ class Config:
|
|
|
306
345
|
value = self._getenv("LLM_SUMMARIZATION_PROMPT")
|
|
307
346
|
return None if value == "" else value
|
|
308
347
|
|
|
348
|
+
@property
|
|
349
|
+
def LLM_SHOW_TOOL_CALL_RESULT(self) -> bool:
|
|
350
|
+
return to_boolean(self._getenv("LLM_SHOW_TOOL_CALL_RESULT", "false"))
|
|
351
|
+
|
|
309
352
|
@property
|
|
310
353
|
def LLM_MAX_REQUESTS_PER_MINUTE(self) -> int:
|
|
311
354
|
"""
|
|
312
355
|
Maximum number of LLM requests allowed per minute.
|
|
313
356
|
Default is conservative to accommodate free-tier LLM providers.
|
|
314
357
|
"""
|
|
315
|
-
return int(
|
|
358
|
+
return int(
|
|
359
|
+
self._getenv(
|
|
360
|
+
["LLM_MAX_REQUEST_PER_MINUTE", "LLM_MAX_REQUESTS_PER_MINUTE"], "60"
|
|
361
|
+
)
|
|
362
|
+
)
|
|
316
363
|
|
|
317
364
|
@property
|
|
318
365
|
def LLM_MAX_TOKENS_PER_MINUTE(self) -> int:
|
|
@@ -320,22 +367,38 @@ class Config:
|
|
|
320
367
|
Maximum number of LLM tokens allowed per minute.
|
|
321
368
|
Default is conservative to accommodate free-tier LLM providers.
|
|
322
369
|
"""
|
|
323
|
-
return int(
|
|
370
|
+
return int(
|
|
371
|
+
self._getenv(
|
|
372
|
+
["LLM_MAX_TOKEN_PER_MINUTE", "LLM_MAX_TOKENS_PER_MINUTE"], "100000"
|
|
373
|
+
)
|
|
374
|
+
)
|
|
324
375
|
|
|
325
376
|
@property
|
|
326
377
|
def LLM_MAX_TOKENS_PER_REQUEST(self) -> int:
|
|
327
378
|
"""Maximum number of tokens allowed per individual LLM request."""
|
|
328
|
-
return int(
|
|
379
|
+
return int(
|
|
380
|
+
self._getenv(
|
|
381
|
+
["LLM_MAX_TOKEN_PER_REQUEST", "LLM_MAX_TOKENS_PER_REQUEST"], "120000"
|
|
382
|
+
)
|
|
383
|
+
)
|
|
329
384
|
|
|
330
385
|
@property
|
|
331
386
|
def LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT(self) -> int:
|
|
332
387
|
"""Maximum number of tokens allowed per tool call result."""
|
|
333
|
-
return int(
|
|
388
|
+
return int(
|
|
389
|
+
self._getenv(
|
|
390
|
+
[
|
|
391
|
+
"LLM_MAX_TOKEN_PER_TOOL_CALL_RESULT",
|
|
392
|
+
"LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT",
|
|
393
|
+
],
|
|
394
|
+
str(self._get_max_threshold(0.4)),
|
|
395
|
+
)
|
|
396
|
+
)
|
|
334
397
|
|
|
335
398
|
@property
|
|
336
399
|
def LLM_THROTTLE_SLEEP(self) -> float:
|
|
337
400
|
"""Number of seconds to sleep when throttling is required."""
|
|
338
|
-
return float(self._getenv("LLM_THROTTLE_SLEEP", "
|
|
401
|
+
return float(self._getenv("LLM_THROTTLE_SLEEP", "5.0"))
|
|
339
402
|
|
|
340
403
|
@property
|
|
341
404
|
def LLM_YOLO_MODE(self) -> bool | list[str]:
|
|
@@ -351,19 +414,51 @@ class Config:
|
|
|
351
414
|
|
|
352
415
|
@property
|
|
353
416
|
def LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
|
|
354
|
-
|
|
417
|
+
threshold = int(
|
|
418
|
+
self._getenv(
|
|
419
|
+
"LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD",
|
|
420
|
+
str(self._get_max_threshold(0.6)),
|
|
421
|
+
)
|
|
422
|
+
)
|
|
423
|
+
return self._limit_token_threshold(threshold, 0.6)
|
|
355
424
|
|
|
356
425
|
@property
|
|
357
426
|
def LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD(self) -> int:
|
|
358
|
-
|
|
427
|
+
threshold = int(
|
|
428
|
+
self._getenv(
|
|
429
|
+
"LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD",
|
|
430
|
+
str(self._get_max_threshold(0.4)),
|
|
431
|
+
)
|
|
432
|
+
)
|
|
433
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
359
434
|
|
|
360
435
|
@property
|
|
361
436
|
def LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
|
|
362
|
-
|
|
437
|
+
threshold = int(
|
|
438
|
+
self._getenv(
|
|
439
|
+
"LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD",
|
|
440
|
+
str(self._get_max_threshold(0.4)),
|
|
441
|
+
)
|
|
442
|
+
)
|
|
443
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
363
444
|
|
|
364
445
|
@property
|
|
365
|
-
def
|
|
366
|
-
|
|
446
|
+
def LLM_FILE_ANALYSIS_TOKEN_THRESHOLD(self) -> int:
|
|
447
|
+
threshold = int(
|
|
448
|
+
self._getenv(
|
|
449
|
+
"LLM_FILE_ANALYSIS_TOKEN_THRESHOLD", str(self._get_max_threshold(0.4))
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
453
|
+
|
|
454
|
+
def _limit_token_threshold(self, threshold: int, factor: float) -> int:
|
|
455
|
+
return min(threshold, self._get_max_threshold(factor))
|
|
456
|
+
|
|
457
|
+
def _get_max_threshold(self, factor: float) -> int:
|
|
458
|
+
return round(
|
|
459
|
+
factor
|
|
460
|
+
* min(self.LLM_MAX_TOKENS_PER_MINUTE, self.LLM_MAX_TOKENS_PER_REQUEST)
|
|
461
|
+
)
|
|
367
462
|
|
|
368
463
|
@property
|
|
369
464
|
def LLM_FILE_EXTRACTOR_SYSTEM_PROMPT(self) -> str:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
This is an interactive session. Your primary goal is to help users effectively and efficiently.
|
|
2
2
|
|
|
3
3
|
# Core Principles
|
|
4
4
|
- **Tool-Centric:** Describe what you are about to do, then call the appropriate tool.
|
|
@@ -1,11 +1,57 @@
|
|
|
1
|
-
You are a memory management AI. Your
|
|
1
|
+
You are a smart memory management AI. Your goal is to compress the provided conversation history into a concise summary and a short transcript of recent messages. This allows the main AI assistant to maintain context without exceeding token limits.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
You will receive a JSON string representing the full conversation history. This JSON contains a list of message objects.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
2. **Transcript:** Extract ONLY the last 4 (four) turns of the `Recent Conversation` to serve as the new transcript.
|
|
7
|
-
* **Do not change or shorten the content of these turns, with one exception:** If a tool call returns a very long output, do not include the full output. Instead, briefly summarize the result of the tool call.
|
|
8
|
-
* Ensure the timestamp format is `[YYYY-MM-DD HH:MM:SS UTC+Z] Role: Message/Tool name being called`.
|
|
9
|
-
3. **Update Memory:** Call the `final_result` tool with all the information you consolidated.
|
|
5
|
+
Your task is to call the `save_conversation_summary` tool **once** with the following data. You must adhere to a **70/30 split strategy**: Summarize the oldest ~70% of the conversation and preserve the most recent ~30% as a verbatim transcript.
|
|
10
6
|
|
|
11
|
-
|
|
7
|
+
1. **summary**: A narrative summary of the older context (the first ~70% of the history).
|
|
8
|
+
* **Length:** Comprehensive but concise.
|
|
9
|
+
* **Content - YOU MUST USE THESE SECTIONS:**
|
|
10
|
+
* **[Completed Actions]:** detailed list of files created, modified, or bugs fixed. **Do not omit file paths.**
|
|
11
|
+
* **[Active Context]:** What is the current high-level goal?
|
|
12
|
+
* **[Pending Steps]:** What specifically remains to be done?
|
|
13
|
+
* **[Constraints]:** Key user preferences or technical constraints.
|
|
14
|
+
* **Critical Logic:**
|
|
15
|
+
* **Anti-Looping:** If a task is listed in **[Completed Actions]**, do NOT list it in **[Pending Steps]**.
|
|
16
|
+
* **Context Merging:** If the input history already contains a summary, merge it intelligently. Updates to files supersede older descriptions.
|
|
17
|
+
|
|
18
|
+
2. **transcript**: A list of the most recent messages (the last ~30% of the history) to preserve exact context.
|
|
19
|
+
* **Format:** A list of objects with `role`, `time`, and `content`.
|
|
20
|
+
* **Time Format:** Use "yyyy-mm-ddTHH:MM:SSZ" (e.g., "2023-10-27T10:00:00Z").
|
|
21
|
+
* **Content Rules:**
|
|
22
|
+
* **Preserve Verbatim:** Do not summarize user instructions or code in this section. The main AI needs the exact recent commands to function correctly.
|
|
23
|
+
* **Tool Outputs:** If a tool output in this recent section is huge (e.g., > 100 lines of file content), you may summarize it (e.g., "File content of X read successfully... "), but preserve any error messages or short confirmations exactly.
|
|
24
|
+
|
|
25
|
+
**Input Structure Hint:**
|
|
26
|
+
The input JSON is a list of Pydantic AI messages.
|
|
27
|
+
- `kind="request"` -> usually User.
|
|
28
|
+
- `kind="response"` -> usually Model.
|
|
29
|
+
- Tool Results -> `part_kind="tool-return"`.
|
|
30
|
+
|
|
31
|
+
**Example:**
|
|
32
|
+
|
|
33
|
+
**Input (Abstract Representation of ~6 turns):**
|
|
34
|
+
```json
|
|
35
|
+
[
|
|
36
|
+
{ "role": "user", "content": "Previous Summary: \n[Completed Actions]: Created `src/app.py`.\n[Active Context]: Fixing login bug.\n[Pending Steps]: Verify fix." },
|
|
37
|
+
{ "role": "model", "content": "I see the bug. I will fix `src/app.py` now." },
|
|
38
|
+
{ "role": "tool_call", "content": "write_file('src/app.py', '...fixed code...')" },
|
|
39
|
+
{ "role": "tool_result", "content": "Success" },
|
|
40
|
+
{ "role": "user", "content": "Great. Now add a test for it." },
|
|
41
|
+
{ "role": "model", "content": "Okay, I will create `tests/test_login.py`." }
|
|
42
|
+
]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Output (Tool Call `save_conversation_summary`):**
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"summary": "[Completed Actions]: Created `src/app.py` and fixed login bug in `src/app.py`.\n[Active Context]: Adding tests for login functionality.\n[Pending Steps]: Create `tests/test_login.py`.\n[Constraints]: None.",
|
|
49
|
+
"transcript": [
|
|
50
|
+
{ "role": "user", "time": "2023-10-27T10:05:00Z", "content": "Great. Now add a test for it." },
|
|
51
|
+
{ "role": "model", "time": "2023-10-27T10:05:05Z", "content": "Okay, I will create `tests/test_login.py`." }
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Final Note:**
|
|
57
|
+
The `summary` + `transcript` is the ONLY memory the main AI will have. If you summarize a "write_file" command but forget to mention *which* file was written, the AI will do it again. **Be specific.**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
This is a single request session. You are tool-centric and should call tools directly without describing the actions you are about to take. Only communicate to report the final result.
|
|
2
2
|
|
|
3
3
|
# Core Principles
|
|
4
4
|
|
zrb/config/llm_rate_limitter.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Any, Callable
|
|
|
7
7
|
from zrb.config.config import CFG
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class LLMRateLimitter:
|
|
11
11
|
"""
|
|
12
12
|
Helper class to enforce LLM API rate limits and throttling.
|
|
13
13
|
Tracks requests and tokens in a rolling 60-second window.
|
|
@@ -129,7 +129,7 @@ class LLMRateLimiter:
|
|
|
129
129
|
async def throttle(
|
|
130
130
|
self,
|
|
131
131
|
prompt: Any,
|
|
132
|
-
throttle_notif_callback: Callable | None = None,
|
|
132
|
+
throttle_notif_callback: Callable[[str], Any] | None = None,
|
|
133
133
|
):
|
|
134
134
|
now = time.time()
|
|
135
135
|
str_prompt = self._prompt_to_str(prompt)
|
|
@@ -142,7 +142,17 @@ class LLMRateLimiter:
|
|
|
142
142
|
# Check per-request token limit
|
|
143
143
|
if tokens > self.max_tokens_per_request:
|
|
144
144
|
raise ValueError(
|
|
145
|
-
|
|
145
|
+
(
|
|
146
|
+
"Request exceeds max_tokens_per_request "
|
|
147
|
+
f"({tokens} > {self.max_tokens_per_request})."
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
if tokens > self.max_tokens_per_minute:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
(
|
|
153
|
+
"Request exceeds max_tokens_per_minute "
|
|
154
|
+
f"({tokens} > {self.max_tokens_per_minute})."
|
|
155
|
+
)
|
|
146
156
|
)
|
|
147
157
|
# Wait if over per-minute request or token limit
|
|
148
158
|
while (
|
|
@@ -150,7 +160,16 @@ class LLMRateLimiter:
|
|
|
150
160
|
or sum(t for _, t in self.token_times) + tokens > self.max_tokens_per_minute
|
|
151
161
|
):
|
|
152
162
|
if throttle_notif_callback is not None:
|
|
153
|
-
|
|
163
|
+
if len(self.request_times) >= self.max_requests_per_minute:
|
|
164
|
+
rpm = len(self.request_times)
|
|
165
|
+
throttle_notif_callback(
|
|
166
|
+
f"Max request per minute exceeded: {rpm} of {self.max_requests_per_minute}"
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
tpm = sum(t for _, t in self.token_times) + tokens
|
|
170
|
+
throttle_notif_callback(
|
|
171
|
+
f"Max token per minute exceeded: {tpm} of {self.max_tokens_per_minute}"
|
|
172
|
+
)
|
|
154
173
|
await asyncio.sleep(self.throttle_sleep)
|
|
155
174
|
now = time.time()
|
|
156
175
|
while self.request_times and now - self.request_times[0] > 60:
|
|
@@ -168,4 +187,4 @@ class LLMRateLimiter:
|
|
|
168
187
|
return f"{prompt}"
|
|
169
188
|
|
|
170
189
|
|
|
171
|
-
llm_rate_limitter =
|
|
190
|
+
llm_rate_limitter = LLMRateLimitter()
|
zrb/input/option_input.py
CHANGED
|
@@ -47,9 +47,21 @@ class OptionInput(BaseInput):
|
|
|
47
47
|
option_str = ", ".join(options)
|
|
48
48
|
if default_value != "":
|
|
49
49
|
prompt_message = f"{prompt_message} ({option_str}) [{default_value}]"
|
|
50
|
-
value =
|
|
50
|
+
value = self._get_value_from_user_input(shared_ctx, prompt_message, options)
|
|
51
51
|
if value.strip() != "" and value.strip() not in options:
|
|
52
52
|
value = self._prompt_cli_str(shared_ctx)
|
|
53
53
|
if value.strip() == "":
|
|
54
54
|
value = default_value
|
|
55
55
|
return value
|
|
56
|
+
|
|
57
|
+
def _get_value_from_user_input(
|
|
58
|
+
self, shared_ctx: AnySharedContext, prompt_message: str, options: list[str]
|
|
59
|
+
) -> str:
|
|
60
|
+
from prompt_toolkit import PromptSession
|
|
61
|
+
from prompt_toolkit.completion import WordCompleter
|
|
62
|
+
|
|
63
|
+
if shared_ctx.is_tty:
|
|
64
|
+
reader = PromptSession()
|
|
65
|
+
option_completer = WordCompleter(options, ignore_case=True)
|
|
66
|
+
return reader.prompt(f"{prompt_message}: ", completer=option_completer)
|
|
67
|
+
return input(f"{prompt_message}: ")
|