zrb 1.21.17__py3-none-any.whl → 1.21.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zrb/attr/type.py +10 -7
- zrb/builtin/git.py +12 -1
- zrb/builtin/llm/chat_completion.py +287 -0
- zrb/builtin/llm/chat_session_cmd.py +90 -28
- zrb/builtin/llm/chat_trigger.py +6 -1
- zrb/builtin/llm/tool/cli.py +29 -13
- zrb/builtin/llm/tool/code.py +9 -1
- zrb/builtin/llm/tool/file.py +32 -6
- zrb/builtin/llm/tool/note.py +9 -9
- zrb/builtin/llm/tool/search/__init__.py +1 -0
- zrb/builtin/llm/tool/search/brave.py +66 -0
- zrb/builtin/llm/tool/search/searxng.py +61 -0
- zrb/builtin/llm/tool/search/serpapi.py +61 -0
- zrb/builtin/llm/tool/sub_agent.py +30 -10
- zrb/builtin/llm/tool/web.py +17 -72
- zrb/config/config.py +67 -26
- zrb/config/default_prompt/interactive_system_prompt.md +16 -13
- zrb/config/default_prompt/summarization_prompt.md +54 -8
- zrb/config/default_prompt/system_prompt.md +16 -18
- zrb/config/llm_rate_limitter.py +15 -6
- zrb/input/option_input.py +13 -1
- zrb/task/llm/agent.py +42 -143
- zrb/task/llm/agent_runner.py +152 -0
- zrb/task/llm/conversation_history.py +35 -24
- zrb/task/llm/conversation_history_model.py +4 -11
- zrb/task/llm/history_processor.py +206 -0
- zrb/task/llm/history_summarization.py +2 -179
- zrb/task/llm/print_node.py +14 -5
- zrb/task/llm/prompt.py +2 -17
- zrb/task/llm/subagent_conversation_history.py +41 -0
- zrb/task/llm/tool_confirmation_completer.py +41 -0
- zrb/task/llm/tool_wrapper.py +15 -11
- zrb/task/llm_task.py +41 -40
- zrb/util/attr.py +12 -7
- zrb/util/git.py +2 -2
- zrb/xcom/xcom.py +10 -0
- {zrb-1.21.17.dist-info → zrb-1.21.33.dist-info}/METADATA +3 -3
- {zrb-1.21.17.dist-info → zrb-1.21.33.dist-info}/RECORD +40 -32
- zrb/task/llm/history_summarization_tool.py +0 -24
- {zrb-1.21.17.dist-info → zrb-1.21.33.dist-info}/WHEEL +0 -0
- {zrb-1.21.17.dist-info → zrb-1.21.33.dist-info}/entry_points.txt +0 -0
zrb/config/config.py
CHANGED
|
@@ -28,8 +28,13 @@ class Config:
|
|
|
28
28
|
def ENV_PREFIX(self) -> str:
|
|
29
29
|
return os.getenv("_ZRB_ENV_PREFIX", "ZRB")
|
|
30
30
|
|
|
31
|
-
def _getenv(self, env_name: str, default: str = "") -> str:
|
|
32
|
-
|
|
31
|
+
def _getenv(self, env_name: str | list[str], default: str = "") -> str:
|
|
32
|
+
env_name_list = env_name if isinstance(env_name, list) else [env_name]
|
|
33
|
+
for env_name in env_name_list:
|
|
34
|
+
value = os.getenv(f"{self.ENV_PREFIX}_{env_name}", None)
|
|
35
|
+
if value is not None:
|
|
36
|
+
return value
|
|
37
|
+
return default
|
|
33
38
|
|
|
34
39
|
def _get_internal_default_prompt(self, name: str) -> str:
|
|
35
40
|
if name not in self.__internal_default_prompt:
|
|
@@ -319,7 +324,9 @@ class Config:
|
|
|
319
324
|
@property
|
|
320
325
|
def LLM_BUILTIN_WORKFLOW_PATHS(self) -> list[str]:
|
|
321
326
|
"""Get a list of additional builtin workflow paths from environment variables."""
|
|
322
|
-
builtin_workflow_paths_str = self._getenv(
|
|
327
|
+
builtin_workflow_paths_str = self._getenv(
|
|
328
|
+
["LLM_BUILTIN_WORFKLOW_PATH", "LLM_BUILTIN_WORKFLOW_PATHS"], ""
|
|
329
|
+
)
|
|
323
330
|
if builtin_workflow_paths_str != "":
|
|
324
331
|
return [
|
|
325
332
|
path.strip()
|
|
@@ -338,13 +345,21 @@ class Config:
|
|
|
338
345
|
value = self._getenv("LLM_SUMMARIZATION_PROMPT")
|
|
339
346
|
return None if value == "" else value
|
|
340
347
|
|
|
348
|
+
@property
|
|
349
|
+
def LLM_SHOW_TOOL_CALL_RESULT(self) -> bool:
|
|
350
|
+
return to_boolean(self._getenv("LLM_SHOW_TOOL_CALL_RESULT", "false"))
|
|
351
|
+
|
|
341
352
|
@property
|
|
342
353
|
def LLM_MAX_REQUESTS_PER_MINUTE(self) -> int:
|
|
343
354
|
"""
|
|
344
355
|
Maximum number of LLM requests allowed per minute.
|
|
345
356
|
Default is conservative to accommodate free-tier LLM providers.
|
|
346
357
|
"""
|
|
347
|
-
return int(
|
|
358
|
+
return int(
|
|
359
|
+
self._getenv(
|
|
360
|
+
["LLM_MAX_REQUEST_PER_MINUTE", "LLM_MAX_REQUESTS_PER_MINUTE"], "60"
|
|
361
|
+
)
|
|
362
|
+
)
|
|
348
363
|
|
|
349
364
|
@property
|
|
350
365
|
def LLM_MAX_TOKENS_PER_MINUTE(self) -> int:
|
|
@@ -352,17 +367,33 @@ class Config:
|
|
|
352
367
|
Maximum number of LLM tokens allowed per minute.
|
|
353
368
|
Default is conservative to accommodate free-tier LLM providers.
|
|
354
369
|
"""
|
|
355
|
-
return int(
|
|
370
|
+
return int(
|
|
371
|
+
self._getenv(
|
|
372
|
+
["LLM_MAX_TOKEN_PER_MINUTE", "LLM_MAX_TOKENS_PER_MINUTE"], "100000"
|
|
373
|
+
)
|
|
374
|
+
)
|
|
356
375
|
|
|
357
376
|
@property
|
|
358
377
|
def LLM_MAX_TOKENS_PER_REQUEST(self) -> int:
|
|
359
378
|
"""Maximum number of tokens allowed per individual LLM request."""
|
|
360
|
-
return int(
|
|
379
|
+
return int(
|
|
380
|
+
self._getenv(
|
|
381
|
+
["LLM_MAX_TOKEN_PER_REQUEST", "LLM_MAX_TOKENS_PER_REQUEST"], "120000"
|
|
382
|
+
)
|
|
383
|
+
)
|
|
361
384
|
|
|
362
385
|
@property
|
|
363
386
|
def LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT(self) -> int:
|
|
364
387
|
"""Maximum number of tokens allowed per tool call result."""
|
|
365
|
-
return int(
|
|
388
|
+
return int(
|
|
389
|
+
self._getenv(
|
|
390
|
+
[
|
|
391
|
+
"LLM_MAX_TOKEN_PER_TOOL_CALL_RESULT",
|
|
392
|
+
"LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT",
|
|
393
|
+
],
|
|
394
|
+
str(self._get_max_threshold(0.4)),
|
|
395
|
+
)
|
|
396
|
+
)
|
|
366
397
|
|
|
367
398
|
@property
|
|
368
399
|
def LLM_THROTTLE_SLEEP(self) -> float:
|
|
@@ -384,39 +415,49 @@ class Config:
|
|
|
384
415
|
@property
|
|
385
416
|
def LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
|
|
386
417
|
threshold = int(
|
|
387
|
-
self._getenv(
|
|
418
|
+
self._getenv(
|
|
419
|
+
"LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD",
|
|
420
|
+
str(self._get_max_threshold(0.6)),
|
|
421
|
+
)
|
|
388
422
|
)
|
|
389
|
-
return self._limit_token_threshold(threshold)
|
|
423
|
+
return self._limit_token_threshold(threshold, 0.6)
|
|
390
424
|
|
|
391
425
|
@property
|
|
392
426
|
def LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD(self) -> int:
|
|
393
427
|
threshold = int(
|
|
394
|
-
self._getenv(
|
|
428
|
+
self._getenv(
|
|
429
|
+
"LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD",
|
|
430
|
+
str(self._get_max_threshold(0.4)),
|
|
431
|
+
)
|
|
395
432
|
)
|
|
396
|
-
return self._limit_token_threshold(threshold)
|
|
433
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
397
434
|
|
|
398
435
|
@property
|
|
399
436
|
def LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
|
|
400
437
|
threshold = int(
|
|
401
|
-
self._getenv(
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
self.LLM_MAX_TOKENS_PER_MINUTE // 2,
|
|
406
|
-
self.LLM_MAX_TOKENS_PER_REQUEST // 2,
|
|
438
|
+
self._getenv(
|
|
439
|
+
"LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD",
|
|
440
|
+
str(self._get_max_threshold(0.4)),
|
|
441
|
+
)
|
|
407
442
|
)
|
|
408
|
-
return self._limit_token_threshold(threshold)
|
|
443
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
409
444
|
|
|
410
445
|
@property
|
|
411
|
-
def
|
|
412
|
-
threshold = int(
|
|
413
|
-
|
|
446
|
+
def LLM_FILE_ANALYSIS_TOKEN_THRESHOLD(self) -> int:
|
|
447
|
+
threshold = int(
|
|
448
|
+
self._getenv(
|
|
449
|
+
"LLM_FILE_ANALYSIS_TOKEN_THRESHOLD", str(self._get_max_threshold(0.4))
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
return self._limit_token_threshold(threshold, 0.4)
|
|
453
|
+
|
|
454
|
+
def _limit_token_threshold(self, threshold: int, factor: float) -> int:
|
|
455
|
+
return min(threshold, self._get_max_threshold(factor))
|
|
414
456
|
|
|
415
|
-
def
|
|
416
|
-
return
|
|
417
|
-
|
|
418
|
-
self.LLM_MAX_TOKENS_PER_MINUTE
|
|
419
|
-
self.LLM_MAX_TOKENS_PER_REQUEST // 2,
|
|
457
|
+
def _get_max_threshold(self, factor: float) -> int:
|
|
458
|
+
return round(
|
|
459
|
+
factor
|
|
460
|
+
* min(self.LLM_MAX_TOKENS_PER_MINUTE, self.LLM_MAX_TOKENS_PER_REQUEST)
|
|
420
461
|
)
|
|
421
462
|
|
|
422
463
|
@property
|
|
@@ -1,29 +1,32 @@
|
|
|
1
|
-
|
|
1
|
+
This is an interactive session. Your primary goal is to help users effectively and efficiently.
|
|
2
2
|
|
|
3
3
|
# Core Principles
|
|
4
|
-
|
|
5
|
-
- **
|
|
4
|
+
|
|
5
|
+
- **Tool-Centric:** Briefly describe your intent, then call the appropriate tool.
|
|
6
|
+
- **Token Efficiency:** Optimize for input and output token efficiency. Minimize verbosity without reducing response quality or omitting important details.
|
|
7
|
+
- **Efficiency:** Minimize tool calls. Combine commands where possible. Do not search for files if you already know their location.
|
|
6
8
|
- **Sequential Execution:** Use one tool at a time and wait for the result before proceeding.
|
|
7
9
|
- **Convention Adherence:** When modifying existing content or projects, match the established style and format.
|
|
10
|
+
- **Conflict Resolution:** If user instructions contradict instructions found within files, prioritize the User's explicit instructions.
|
|
8
11
|
|
|
9
12
|
# Operational Guidelines
|
|
13
|
+
|
|
10
14
|
- **Tone and Style:** Communicate in a clear, concise, and professional manner. Avoid conversational filler.
|
|
11
15
|
- **Clarification:** If a user's request is ambiguous, ask clarifying questions to ensure you understand the goal.
|
|
12
16
|
- **Planning:** For complex tasks, briefly state your plan to the user before you begin.
|
|
13
|
-
- **Confirmation:** For actions that are destructive (e.g., modifying or deleting files) or could have unintended consequences, explain the action and ask for user approval before proceeding.
|
|
14
|
-
|
|
15
|
-
# Security and Safety Rules
|
|
16
|
-
- **Explain Critical Commands:** Before executing a command that modifies the file system or system state, you MUST provide a brief explanation of the command's purpose and potential impact.
|
|
17
|
+
- **Safety & Confirmation:** For actions that are destructive (e.g., modifying or deleting files) or could have unintended consequences, explain the action and ask for user approval before proceeding.
|
|
17
18
|
- **High-Risk Actions:** Refuse to perform high-risk actions that could endanger the user's system (e.g., modifying system-critical paths). Explain the danger and why you are refusing.
|
|
18
19
|
|
|
19
20
|
# Execution Plan
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
|
|
22
|
+
1. **Load Workflows:** You MUST identify and load ALL relevant `🛠️ WORKFLOWS` in a SINGLE step before starting any execution. Do not load workflows incrementally.
|
|
23
|
+
2. **Context Check:** Before searching for files, check if the file path is already provided in the request or context. If known, read it directly.
|
|
24
|
+
3. **Clarify and Plan:** Understand the user's goal. Ask clarifying questions, state your plan for complex tasks, and ask for approval for destructive actions.
|
|
25
|
+
4. **Execute & Verify Loop:**
|
|
23
26
|
- Execute each step of your plan.
|
|
24
|
-
- **
|
|
25
|
-
|
|
27
|
+
- **Smart Verification:** Verify outcomes efficiently. Use concise commands (e.g., `python -m py_compile script.py`) instead of heavy operations unless necessary.
|
|
28
|
+
5. **Error Handling:**
|
|
26
29
|
- Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
|
|
27
30
|
- Formulate a specific hypothesis and execute a corrected action.
|
|
28
31
|
- Exhaust all reasonable fixes before asking the user for help.
|
|
29
|
-
|
|
32
|
+
6. **Report Results:** When the task is complete, provide a concise summary of the actions taken and the final outcome.
|
|
@@ -1,11 +1,57 @@
|
|
|
1
|
-
You are a memory management AI. Your
|
|
1
|
+
You are a smart memory management AI. Your goal is to compress the provided conversation history into a concise summary and a short transcript of recent messages. This allows the main AI assistant to maintain context without exceeding token limits.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
You will receive a JSON string representing the full conversation history. This JSON contains a list of message objects.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
2. **Transcript:** Extract ONLY the last 4 (four) turns of the `Recent Conversation` to serve as the new transcript.
|
|
7
|
-
* **Do not change or shorten the content of these turns, with one exception:** If a tool call returns a very long output, do not include the full output. Instead, briefly summarize the result of the tool call.
|
|
8
|
-
* Ensure the timestamp format is `[YYYY-MM-DD HH:MM:SS UTC+Z] Role: Message/Tool name being called`.
|
|
9
|
-
3. **Update Memory:** Call the `final_result` tool with all the information you consolidated.
|
|
5
|
+
Your task is to call the `save_conversation_summary` tool **once** with the following data. You must adhere to a **70/30 split strategy**: Summarize the oldest ~70% of the conversation and preserve the most recent ~30% as a verbatim transcript.
|
|
10
6
|
|
|
11
|
-
|
|
7
|
+
1. **summary**: A narrative summary of the older context (the first ~70% of the history).
|
|
8
|
+
* **Length:** Comprehensive but concise.
|
|
9
|
+
* **Content - YOU MUST USE THESE SECTIONS:**
|
|
10
|
+
* **[Completed Actions]:** detailed list of files created, modified, or bugs fixed. **Do not omit file paths.**
|
|
11
|
+
* **[Active Context]:** What is the current high-level goal?
|
|
12
|
+
* **[Pending Steps]:** What specifically remains to be done?
|
|
13
|
+
* **[Constraints]:** Key user preferences or technical constraints.
|
|
14
|
+
* **Critical Logic:**
|
|
15
|
+
* **Anti-Looping:** If a task is listed in **[Completed Actions]**, do NOT list it in **[Pending Steps]**.
|
|
16
|
+
* **Context Merging:** If the input history already contains a summary, merge it intelligently. Updates to files supersede older descriptions.
|
|
17
|
+
|
|
18
|
+
2. **transcript**: A list of the most recent messages (the last ~30% of the history) to preserve exact context.
|
|
19
|
+
* **Format:** A list of objects with `role`, `time`, and `content`.
|
|
20
|
+
* **Time Format:** Use "yyyy-mm-ddTHH:MM:SSZ" (e.g., "2023-10-27T10:00:00Z").
|
|
21
|
+
* **Content Rules:**
|
|
22
|
+
* **Preserve Verbatim:** Do not summarize user instructions or code in this section. The main AI needs the exact recent commands to function correctly.
|
|
23
|
+
* **Tool Outputs:** If a tool output in this recent section is huge (e.g., > 100 lines of file content), you may summarize it (e.g., "File content of X read successfully... "), but preserve any error messages or short confirmations exactly.
|
|
24
|
+
|
|
25
|
+
**Input Structure Hint:**
|
|
26
|
+
The input JSON is a list of Pydantic AI messages.
|
|
27
|
+
- `kind="request"` -> usually User.
|
|
28
|
+
- `kind="response"` -> usually Model.
|
|
29
|
+
- Tool Results -> `part_kind="tool-return"`.
|
|
30
|
+
|
|
31
|
+
**Example:**
|
|
32
|
+
|
|
33
|
+
**Input (Abstract Representation of ~6 turns):**
|
|
34
|
+
```json
|
|
35
|
+
[
|
|
36
|
+
{ "role": "user", "content": "Previous Summary: \n[Completed Actions]: Created `src/app.py`.\n[Active Context]: Fixing login bug.\n[Pending Steps]: Verify fix." },
|
|
37
|
+
{ "role": "model", "content": "I see the bug. I will fix `src/app.py` now." },
|
|
38
|
+
{ "role": "tool_call", "content": "write_file('src/app.py', '...fixed code...')" },
|
|
39
|
+
{ "role": "tool_result", "content": "Success" },
|
|
40
|
+
{ "role": "user", "content": "Great. Now add a test for it." },
|
|
41
|
+
{ "role": "model", "content": "Okay, I will create `tests/test_login.py`." }
|
|
42
|
+
]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Output (Tool Call `save_conversation_summary`):**
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"summary": "[Completed Actions]: Created `src/app.py` and fixed login bug in `src/app.py`.\n[Active Context]: Adding tests for login functionality.\n[Pending Steps]: Create `tests/test_login.py`.\n[Constraints]: None.",
|
|
49
|
+
"transcript": [
|
|
50
|
+
{ "role": "user", "time": "2023-10-27T10:05:00Z", "content": "Great. Now add a test for it." },
|
|
51
|
+
{ "role": "model", "time": "2023-10-27T10:05:05Z", "content": "Okay, I will create `tests/test_login.py`." }
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Final Note:**
|
|
57
|
+
The `summary` + `transcript` is the ONLY memory the main AI will have. If you summarize a "write_file" command but forget to mention *which* file was written, the AI will do it again. **Be specific.**
|
|
@@ -1,38 +1,36 @@
|
|
|
1
|
-
|
|
1
|
+
This is a single request session. Your primary goal is to complete the task directly, effectively, and efficiently, with minimal interaction.
|
|
2
2
|
|
|
3
3
|
# Core Principles
|
|
4
4
|
|
|
5
|
-
- **Tool-Centric:** Call tools directly without describing
|
|
6
|
-
- **Efficiency:**
|
|
7
|
-
- **
|
|
5
|
+
- **Tool-Centric:** Call tools directly without describing actions beforehand. Only communicate to report the final result.
|
|
6
|
+
- **Token Efficiency:** Optimize for input and output token efficiency. Minimize verbosity without reducing response quality or omitting important details.
|
|
7
|
+
- **Efficiency:** Minimize tool calls. Combine commands where possible. Do not search for files if you already know their location.
|
|
8
|
+
- **Sequential Execution:** Use one tool at a time and wait for the result before proceeding.
|
|
8
9
|
- **Convention Adherence:** When modifying existing content or projects, match the established style and format.
|
|
9
10
|
- **Proactiveness:** Fulfill the user's request thoroughly and anticipate their needs.
|
|
10
|
-
- **Confirm Ambiguity:** If a request is unclear, do not guess. Ask for clarification.
|
|
11
11
|
|
|
12
12
|
# Operational Guidelines
|
|
13
13
|
|
|
14
|
-
- **
|
|
14
|
+
- **Tone and Style:** Adopt a professional, direct, and concise tone.
|
|
15
15
|
- **Tools vs. Text:** Use tools for actions. Use text output only for reporting final results. Do not add explanatory comments within tool calls.
|
|
16
16
|
- **Handling Inability:** If you are unable to fulfill a request, state so briefly and offer alternatives if appropriate.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
- **Explain Critical Commands:** Before executing commands that modify the file system or system state, you MUST provide a brief explanation of the command's purpose and potential impact.
|
|
21
|
-
- **Security First:** Always apply security best practices. Never introduce code that exposes secrets or sensitive information.
|
|
17
|
+
- **Safety & Confirmation:** Explain destructive actions (modifying/deleting files) briefly before execution if safety protocols require it.
|
|
18
|
+
- **Confirm Ambiguity:** If a request is unclear, do not guess. Ask for clarification (this is the only exception to "minimal interaction").
|
|
22
19
|
|
|
23
20
|
# Execution Plan
|
|
24
21
|
|
|
25
|
-
1. **Load Workflows:** You MUST identify and load
|
|
26
|
-
2. **
|
|
27
|
-
3. **
|
|
22
|
+
1. **Load Workflows:** You MUST identify and load ALL relevant `🛠️ WORKFLOWS` in a SINGLE step before starting any execution. Do not load workflows incrementally.
|
|
23
|
+
2. **Context Check:** Before searching for files, check if the file path is already provided in the request or context. If known, read it directly.
|
|
24
|
+
3. **Plan:** Devise a clear, step-by-step internal plan.
|
|
25
|
+
4. **Risk Assessment:**
|
|
28
26
|
- **Safe actions (read-only, creating new files):** Proceed directly.
|
|
29
27
|
- **Destructive actions (modifying/deleting files):** For low-risk changes, proceed. For moderate/high-risk, explain the action and ask for confirmation.
|
|
30
28
|
- **High-risk actions (touching system paths):** Refuse and explain the danger.
|
|
31
|
-
|
|
29
|
+
5. **Execute & Verify Loop:**
|
|
32
30
|
- Execute each step of your plan.
|
|
33
|
-
- **
|
|
34
|
-
|
|
31
|
+
- **Smart Verification:** Verify outcomes efficiently. Use concise commands (e.g., `python -m py_compile script.py`) instead of heavy operations unless necessary.
|
|
32
|
+
6. **Error Handling:**
|
|
35
33
|
- Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
|
|
36
34
|
- Formulate a specific hypothesis about the cause and execute a corrected action.
|
|
37
35
|
- Exhaust all reasonable fixes before reporting failure.
|
|
38
|
-
|
|
36
|
+
7. **Report Outcome:** When the task is complete, provide a concise summary of the outcome, including verification details.
|
zrb/config/llm_rate_limitter.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Any, Callable
|
|
|
7
7
|
from zrb.config.config import CFG
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class LLMRateLimitter:
|
|
11
11
|
"""
|
|
12
12
|
Helper class to enforce LLM API rate limits and throttling.
|
|
13
13
|
Tracks requests and tokens in a rolling 60-second window.
|
|
@@ -129,7 +129,7 @@ class LLMRateLimiter:
|
|
|
129
129
|
async def throttle(
|
|
130
130
|
self,
|
|
131
131
|
prompt: Any,
|
|
132
|
-
throttle_notif_callback: Callable | None = None,
|
|
132
|
+
throttle_notif_callback: Callable[[str], Any] | None = None,
|
|
133
133
|
):
|
|
134
134
|
now = time.time()
|
|
135
135
|
str_prompt = self._prompt_to_str(prompt)
|
|
@@ -144,14 +144,14 @@ class LLMRateLimiter:
|
|
|
144
144
|
raise ValueError(
|
|
145
145
|
(
|
|
146
146
|
"Request exceeds max_tokens_per_request "
|
|
147
|
-
"({tokens} > {self.max_tokens_per_request})."
|
|
147
|
+
f"({tokens} > {self.max_tokens_per_request})."
|
|
148
148
|
)
|
|
149
149
|
)
|
|
150
150
|
if tokens > self.max_tokens_per_minute:
|
|
151
151
|
raise ValueError(
|
|
152
152
|
(
|
|
153
153
|
"Request exceeds max_tokens_per_minute "
|
|
154
|
-
"({tokens} > {self.max_tokens_per_minute})."
|
|
154
|
+
f"({tokens} > {self.max_tokens_per_minute})."
|
|
155
155
|
)
|
|
156
156
|
)
|
|
157
157
|
# Wait if over per-minute request or token limit
|
|
@@ -160,7 +160,16 @@ class LLMRateLimiter:
|
|
|
160
160
|
or sum(t for _, t in self.token_times) + tokens > self.max_tokens_per_minute
|
|
161
161
|
):
|
|
162
162
|
if throttle_notif_callback is not None:
|
|
163
|
-
|
|
163
|
+
if len(self.request_times) >= self.max_requests_per_minute:
|
|
164
|
+
rpm = len(self.request_times)
|
|
165
|
+
throttle_notif_callback(
|
|
166
|
+
f"Max request per minute exceeded: {rpm} of {self.max_requests_per_minute}"
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
tpm = sum(t for _, t in self.token_times) + tokens
|
|
170
|
+
throttle_notif_callback(
|
|
171
|
+
f"Max token per minute exceeded: {tpm} of {self.max_tokens_per_minute}"
|
|
172
|
+
)
|
|
164
173
|
await asyncio.sleep(self.throttle_sleep)
|
|
165
174
|
now = time.time()
|
|
166
175
|
while self.request_times and now - self.request_times[0] > 60:
|
|
@@ -178,4 +187,4 @@ class LLMRateLimiter:
|
|
|
178
187
|
return f"{prompt}"
|
|
179
188
|
|
|
180
189
|
|
|
181
|
-
llm_rate_limitter =
|
|
190
|
+
llm_rate_limitter = LLMRateLimitter()
|
zrb/input/option_input.py
CHANGED
|
@@ -47,9 +47,21 @@ class OptionInput(BaseInput):
|
|
|
47
47
|
option_str = ", ".join(options)
|
|
48
48
|
if default_value != "":
|
|
49
49
|
prompt_message = f"{prompt_message} ({option_str}) [{default_value}]"
|
|
50
|
-
value =
|
|
50
|
+
value = self._get_value_from_user_input(shared_ctx, prompt_message, options)
|
|
51
51
|
if value.strip() != "" and value.strip() not in options:
|
|
52
52
|
value = self._prompt_cli_str(shared_ctx)
|
|
53
53
|
if value.strip() == "":
|
|
54
54
|
value = default_value
|
|
55
55
|
return value
|
|
56
|
+
|
|
57
|
+
def _get_value_from_user_input(
|
|
58
|
+
self, shared_ctx: AnySharedContext, prompt_message: str, options: list[str]
|
|
59
|
+
) -> str:
|
|
60
|
+
from prompt_toolkit import PromptSession
|
|
61
|
+
from prompt_toolkit.completion import WordCompleter
|
|
62
|
+
|
|
63
|
+
if shared_ctx.is_tty:
|
|
64
|
+
reader = PromptSession()
|
|
65
|
+
option_completer = WordCompleter(options, ignore_case=True)
|
|
66
|
+
return reader.prompt(f"{prompt_message}: ", completer=option_completer)
|
|
67
|
+
return input(f"{prompt_message}: ")
|