zrb 1.21.6__py3-none-any.whl → 1.21.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of zrb might be problematic. Click here for more details.

Files changed (47) hide show
  1. zrb/attr/type.py +10 -7
  2. zrb/builtin/git.py +12 -1
  3. zrb/builtin/llm/chat_completion.py +274 -0
  4. zrb/builtin/llm/chat_session_cmd.py +90 -28
  5. zrb/builtin/llm/chat_trigger.py +7 -1
  6. zrb/builtin/llm/history.py +4 -4
  7. zrb/builtin/llm/tool/api.py +3 -1
  8. zrb/builtin/llm/tool/cli.py +2 -1
  9. zrb/builtin/llm/tool/code.py +11 -3
  10. zrb/builtin/llm/tool/file.py +112 -142
  11. zrb/builtin/llm/tool/note.py +36 -16
  12. zrb/builtin/llm/tool/rag.py +17 -8
  13. zrb/builtin/llm/tool/sub_agent.py +41 -15
  14. zrb/config/config.py +108 -13
  15. zrb/config/default_prompt/file_extractor_system_prompt.md +16 -16
  16. zrb/config/default_prompt/interactive_system_prompt.md +11 -11
  17. zrb/config/default_prompt/repo_extractor_system_prompt.md +16 -16
  18. zrb/config/default_prompt/repo_summarizer_system_prompt.md +3 -3
  19. zrb/config/default_prompt/summarization_prompt.md +54 -8
  20. zrb/config/default_prompt/system_prompt.md +15 -15
  21. zrb/config/llm_rate_limitter.py +24 -5
  22. zrb/input/option_input.py +13 -1
  23. zrb/task/llm/agent.py +42 -144
  24. zrb/task/llm/agent_runner.py +152 -0
  25. zrb/task/llm/config.py +8 -7
  26. zrb/task/llm/conversation_history.py +35 -24
  27. zrb/task/llm/conversation_history_model.py +4 -11
  28. zrb/task/llm/default_workflow/coding/workflow.md +2 -3
  29. zrb/task/llm/file_replacement.py +206 -0
  30. zrb/task/llm/file_tool_model.py +57 -0
  31. zrb/task/llm/history_processor.py +206 -0
  32. zrb/task/llm/history_summarization.py +2 -179
  33. zrb/task/llm/print_node.py +14 -5
  34. zrb/task/llm/prompt.py +8 -19
  35. zrb/task/llm/subagent_conversation_history.py +41 -0
  36. zrb/task/llm/tool_wrapper.py +27 -12
  37. zrb/task/llm_task.py +55 -47
  38. zrb/util/attr.py +17 -10
  39. zrb/util/cli/text.py +6 -4
  40. zrb/util/git.py +2 -2
  41. zrb/util/yaml.py +1 -0
  42. zrb/xcom/xcom.py +10 -0
  43. {zrb-1.21.6.dist-info → zrb-1.21.28.dist-info}/METADATA +5 -5
  44. {zrb-1.21.6.dist-info → zrb-1.21.28.dist-info}/RECORD +46 -41
  45. zrb/task/llm/history_summarization_tool.py +0 -24
  46. {zrb-1.21.6.dist-info → zrb-1.21.28.dist-info}/WHEEL +0 -0
  47. {zrb-1.21.6.dist-info → zrb-1.21.28.dist-info}/entry_points.txt +0 -0
zrb/config/config.py CHANGED
@@ -28,8 +28,13 @@ class Config:
28
28
  def ENV_PREFIX(self) -> str:
29
29
  return os.getenv("_ZRB_ENV_PREFIX", "ZRB")
30
30
 
31
- def _getenv(self, env_name: str, default: str = "") -> str:
32
- return os.getenv(f"{self.ENV_PREFIX}_{env_name}", default)
31
+ def _getenv(self, env_name: str | list[str], default: str = "") -> str:
32
+ env_name_list = env_name if isinstance(env_name, list) else [env_name]
33
+ for env_name in env_name_list:
34
+ value = os.getenv(f"{self.ENV_PREFIX}_{env_name}", None)
35
+ if value is not None:
36
+ return value
37
+ return default
33
38
 
34
39
  def _get_internal_default_prompt(self, name: str) -> str:
35
40
  if name not in self.__internal_default_prompt:
@@ -60,6 +65,38 @@ class Config:
60
65
  def DEFAULT_EDITOR(self) -> str:
61
66
  return self._getenv("EDITOR", "nano")
62
67
 
68
+ @property
69
+ def DEFAULT_DIFF_EDIT_COMMAND_TPL(self) -> str:
70
+ return self._getenv("DIFF_EDIT_COMMAND", self._get_default_diff_edit_command())
71
+
72
+ def _get_default_diff_edit_command(self) -> str:
73
+ editor = self.DEFAULT_EDITOR
74
+ if editor in [
75
+ "code",
76
+ "vscode",
77
+ "vscodium",
78
+ "windsurf",
79
+ "cursor",
80
+ "zed",
81
+ "zeditor",
82
+ "agy",
83
+ ]:
84
+ return f"{editor} --wait --diff {{old}} {{new}}"
85
+ if editor == "emacs":
86
+ return 'emacs --eval \'(ediff-files "{old}" "{new}")\''
87
+ if editor in ["nvim", "vim"]:
88
+ return (
89
+ f"{editor} -d {{old}} {{new}} "
90
+ "-i NONE "
91
+ '-c "wincmd h | set readonly | wincmd l" '
92
+ '-c "highlight DiffAdd cterm=bold ctermbg=22 guibg=#005f00 | highlight DiffChange cterm=bold ctermbg=24 guibg=#005f87 | highlight DiffText ctermbg=21 guibg=#0000af | highlight DiffDelete ctermbg=52 guibg=#5f0000" ' # noqa
93
+ '-c "set showtabline=2 | set tabline=[Instructions]\\ :wqa(save\\ &\\ quit)\\ \\|\\ i/esc(toggle\\ edit\\ mode)" ' # noqa
94
+ '-c "wincmd h | setlocal statusline=OLD\\ FILE" '
95
+ '-c "wincmd l | setlocal statusline=%#StatusBold#NEW\\ FILE\\ :wqa(save\\ &\\ quit)\\ \\|\\ i/esc(toggle\\ edit\\ mode)" ' # noqa
96
+ '-c "autocmd BufWritePost * wqa"'
97
+ )
98
+ return 'vimdiff {old} {new} +"setlocal ro" +"wincmd l" +"autocmd BufWritePost <buffer> qa"' # noqa
99
+
63
100
  @property
64
101
  def INIT_MODULES(self) -> list[str]:
65
102
  init_modules_str = self._getenv("INIT_MODULES", "")
@@ -287,7 +324,9 @@ class Config:
287
324
  @property
288
325
  def LLM_BUILTIN_WORKFLOW_PATHS(self) -> list[str]:
289
326
  """Get a list of additional builtin workflow paths from environment variables."""
290
- builtin_workflow_paths_str = self._getenv("LLM_BUILTIN_WORKFLOW_PATHS", "")
327
+ builtin_workflow_paths_str = self._getenv(
328
+ ["LLM_BUILTIN_WORFKLOW_PATH", "LLM_BUILTIN_WORKFLOW_PATHS"], ""
329
+ )
291
330
  if builtin_workflow_paths_str != "":
292
331
  return [
293
332
  path.strip()
@@ -306,13 +345,21 @@ class Config:
306
345
  value = self._getenv("LLM_SUMMARIZATION_PROMPT")
307
346
  return None if value == "" else value
308
347
 
348
+ @property
349
+ def LLM_SHOW_TOOL_CALL_RESULT(self) -> bool:
350
+ return to_boolean(self._getenv("LLM_SHOW_TOOL_CALL_RESULT", "false"))
351
+
309
352
  @property
310
353
  def LLM_MAX_REQUESTS_PER_MINUTE(self) -> int:
311
354
  """
312
355
  Maximum number of LLM requests allowed per minute.
313
356
  Default is conservative to accommodate free-tier LLM providers.
314
357
  """
315
- return int(self._getenv("LLM_MAX_REQUESTS_PER_MINUTE", "60"))
358
+ return int(
359
+ self._getenv(
360
+ ["LLM_MAX_REQUEST_PER_MINUTE", "LLM_MAX_REQUESTS_PER_MINUTE"], "60"
361
+ )
362
+ )
316
363
 
317
364
  @property
318
365
  def LLM_MAX_TOKENS_PER_MINUTE(self) -> int:
@@ -320,22 +367,38 @@ class Config:
320
367
  Maximum number of LLM tokens allowed per minute.
321
368
  Default is conservative to accommodate free-tier LLM providers.
322
369
  """
323
- return int(self._getenv("LLM_MAX_TOKENS_PER_MINUTE", "100000"))
370
+ return int(
371
+ self._getenv(
372
+ ["LLM_MAX_TOKEN_PER_MINUTE", "LLM_MAX_TOKENS_PER_MINUTE"], "100000"
373
+ )
374
+ )
324
375
 
325
376
  @property
326
377
  def LLM_MAX_TOKENS_PER_REQUEST(self) -> int:
327
378
  """Maximum number of tokens allowed per individual LLM request."""
328
- return int(self._getenv("LLM_MAX_TOKENS_PER_REQUEST", "100000"))
379
+ return int(
380
+ self._getenv(
381
+ ["LLM_MAX_TOKEN_PER_REQUEST", "LLM_MAX_TOKENS_PER_REQUEST"], "120000"
382
+ )
383
+ )
329
384
 
330
385
  @property
331
386
  def LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT(self) -> int:
332
387
  """Maximum number of tokens allowed per tool call result."""
333
- return int(self._getenv("LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT", "75000"))
388
+ return int(
389
+ self._getenv(
390
+ [
391
+ "LLM_MAX_TOKEN_PER_TOOL_CALL_RESULT",
392
+ "LLM_MAX_TOKENS_PER_TOOL_CALL_RESULT",
393
+ ],
394
+ str(self._get_max_threshold(0.4)),
395
+ )
396
+ )
334
397
 
335
398
  @property
336
399
  def LLM_THROTTLE_SLEEP(self) -> float:
337
400
  """Number of seconds to sleep when throttling is required."""
338
- return float(self._getenv("LLM_THROTTLE_SLEEP", "1.0"))
401
+ return float(self._getenv("LLM_THROTTLE_SLEEP", "5.0"))
339
402
 
340
403
  @property
341
404
  def LLM_YOLO_MODE(self) -> bool | list[str]:
@@ -351,19 +414,51 @@ class Config:
351
414
 
352
415
  @property
353
416
  def LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
354
- return int(self._getenv("LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD", "20000"))
417
+ threshold = int(
418
+ self._getenv(
419
+ "LLM_HISTORY_SUMMARIZATION_TOKEN_THRESHOLD",
420
+ str(self._get_max_threshold(0.6)),
421
+ )
422
+ )
423
+ return self._limit_token_threshold(threshold, 0.6)
355
424
 
356
425
  @property
357
426
  def LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD(self) -> int:
358
- return int(self._getenv("LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_LIMIT", "100000"))
427
+ threshold = int(
428
+ self._getenv(
429
+ "LLM_REPO_ANALYSIS_EXTRACTION_TOKEN_THRESHOLD",
430
+ str(self._get_max_threshold(0.4)),
431
+ )
432
+ )
433
+ return self._limit_token_threshold(threshold, 0.4)
359
434
 
360
435
  @property
361
436
  def LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD(self) -> int:
362
- return int(self._getenv("LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_LIMIT", "20000"))
437
+ threshold = int(
438
+ self._getenv(
439
+ "LLM_REPO_ANALYSIS_SUMMARIZATION_TOKEN_THRESHOLD",
440
+ str(self._get_max_threshold(0.4)),
441
+ )
442
+ )
443
+ return self._limit_token_threshold(threshold, 0.4)
363
444
 
364
445
  @property
365
- def LLM_FILE_ANALYSIS_TOKEN_LIMIT(self) -> int:
366
- return int(self._getenv("LLM_FILE_ANALYSIS_TOKEN_LIMIT", "100000"))
446
+ def LLM_FILE_ANALYSIS_TOKEN_THRESHOLD(self) -> int:
447
+ threshold = int(
448
+ self._getenv(
449
+ "LLM_FILE_ANALYSIS_TOKEN_THRESHOLD", str(self._get_max_threshold(0.4))
450
+ )
451
+ )
452
+ return self._limit_token_threshold(threshold, 0.4)
453
+
454
+ def _limit_token_threshold(self, threshold: int, factor: float) -> int:
455
+ return min(threshold, self._get_max_threshold(factor))
456
+
457
+ def _get_max_threshold(self, factor: float) -> int:
458
+ return round(
459
+ factor
460
+ * min(self.LLM_MAX_TOKENS_PER_MINUTE, self.LLM_MAX_TOKENS_PER_REQUEST)
461
+ )
367
462
 
368
463
  @property
369
464
  def LLM_FILE_EXTRACTOR_SYSTEM_PROMPT(self) -> str:
@@ -4,10 +4,10 @@ You are an expert code and configuration analysis agent. Your purpose is to anal
4
4
 
5
5
  1. **Analyze File Content**: Determine the file's type (e.g., Python, Dockerfile, YAML, Markdown).
6
6
  2. **Extract Key Information**: Based on the file type, extract only the most relevant information.
7
- * **Source Code** (`.py`, `.js`, `.go`): Extract classes, functions, key variables, and their purpose.
8
- * **Configuration** (`.yaml`, `.toml`, `.json`): Extract main sections, keys, and values.
9
- * **Infrastructure** (`Dockerfile`, `.tf`): Extract resources, settings, and commands.
10
- * **Documentation** (`.md`): Extract headings, summaries, and code blocks.
7
+ * **Source Code** (`.py`, `.js`, `.go`): Extract classes, functions, key variables, and their purpose.
8
+ * **Configuration** (`.yaml`, `.toml`, `.json`): Extract main sections, keys, and values.
9
+ * **Infrastructure** (`Dockerfile`, `.tf`): Extract resources, settings, and commands.
10
+ * **Documentation** (`.md`): Extract headings, summaries, and code blocks.
11
11
  3. **Format Output**: Present the summary in structured markdown.
12
12
 
13
13
  ### Guiding Principles
@@ -61,17 +61,17 @@ This file sets up the database connection and defines the `User` model using SQL
61
61
  **Key Components:**
62
62
 
63
63
  * **Configuration:**
64
- * `DATABASE_URL`: Determined by the `DATABASE_URL` environment variable, defaulting to a local SQLite database.
64
+ * `DATABASE_URL`: Determined by the `DATABASE_URL` environment variable, defaulting to a local SQLite database.
65
65
  * **SQLAlchemy Objects:**
66
- * `engine`: The core SQLAlchemy engine connected to the `DATABASE_URL`.
67
- * `SessionLocal`: A factory for creating new database sessions.
68
- * `Base`: The declarative base for ORM models.
66
+ * `engine`: The core SQLAlchemy engine connected to the `DATABASE_URL`.
67
+ * `SessionLocal`: A factory for creating new database sessions.
68
+ * `Base`: The declarative base for ORM models.
69
69
  * **ORM Models:**
70
- * **`User` class:**
71
- * Table: `users`
72
- * Columns: `id` (Integer, Primary Key), `username` (String), `email` (String).
70
+ * **`User` class:**
71
+ * Table: `users`
72
+ * Columns: `id` (Integer, Primary Key), `username` (String), `email` (String).
73
73
  * **Functions:**
74
- * `get_db()`: A generator function to provide a database session for dependency injection, ensuring the session is closed after use.
74
+ * `get_db()`: A generator function to provide a database session for dependency injection, ensuring the session is closed after use.
75
75
  ```
76
76
 
77
77
  #### Example 2: Infrastructure File (`Dockerfile`)
@@ -101,12 +101,12 @@ This Dockerfile defines a container for a Python 3.9 application.
101
101
  * **Base Image:** `python:3.9-slim`
102
102
  * **Working Directory:** `/app`
103
103
  * **Dependency Installation:**
104
- * Copies `requirements.txt` into the container.
105
- * Installs the dependencies using `pip`.
104
+ * Copies `requirements.txt` into the container.
105
+ * Installs the dependencies using `pip`.
106
106
  * **Application Code:**
107
- * Copies the rest of the application code into the `/app` directory.
107
+ * Copies the rest of the application code into the `/app` directory.
108
108
  * **Execution Command:**
109
- * Starts the application using `uvicorn`, making it accessible on port 80.
109
+ * Starts the application using `uvicorn`, making it accessible on port 80.
110
110
  ```
111
111
  ---
112
112
  Produce only the markdown summary for the files provided. Do not add any conversational text or introductory phrases.
@@ -1,4 +1,4 @@
1
- You are an expert interactive AI agent. Your primary goal is to help users safely and efficiently.
1
+ This is an interactive session. Your primary goal is to help users effectively and efficiently.
2
2
 
3
3
  # Core Principles
4
4
  - **Tool-Centric:** Describe what you are about to do, then call the appropriate tool.
@@ -17,13 +17,13 @@ You are an expert interactive AI agent. Your primary goal is to help users safel
17
17
  - **High-Risk Actions:** Refuse to perform high-risk actions that could endanger the user's system (e.g., modifying system-critical paths). Explain the danger and why you are refusing.
18
18
 
19
19
  # Execution Plan
20
- 1. **Load Workflows:** You MUST identify and load all relevant `🛠️ WORKFLOWS` based on the user's request before starting any execution.
21
- 2. **Clarify and Plan:** Understand the user's goal. Ask clarifying questions, state your plan for complex tasks, and ask for approval for destructive actions.
22
- 3. **Execute & Verify Loop:**
23
- - Execute each step of your plan.
24
- - **CRITICAL:** Verify the outcome of each action (e.g., check exit codes, confirm file modifications) before proceeding.
25
- 4. **Error Handling:**
26
- - Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
27
- - Formulate a specific hypothesis and execute a corrected action.
28
- - Exhaust all reasonable fixes before asking the user for help.
29
- 5. **Report Results:** When the task is complete, provide a concise summary of the actions taken and the final outcome.
20
+ 1. **Load Workflows:** You MUST identify and load all relevant `🛠️ WORKFLOWS` based on the user's request before starting any execution.
21
+ 2. **Clarify and Plan:** Understand the user's goal. Ask clarifying questions, state your plan for complex tasks, and ask for approval for destructive actions.
22
+ 3. **Execute & Verify Loop:**
23
+ - Execute each step of your plan.
24
+ - **CRITICAL:** Verify the outcome of each action (e.g., check exit codes, confirm file modifications) before proceeding.
25
+ 4. **Error Handling:**
26
+ - Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
27
+ - Formulate a specific hypothesis and execute a corrected action.
28
+ - Exhaust all reasonable fixes before asking the user for help.
29
+ 5. **Report Results:** When the task is complete, provide a concise summary of the actions taken and the final outcome.
@@ -4,10 +4,10 @@ You are an expert code and configuration analysis agent. Your purpose is to anal
4
4
 
5
5
  1. **Analyze File Content**: Determine the file's type (e.g., Python, Dockerfile, YAML, Markdown).
6
6
  2. **Extract Key Information**: Based on the file type, extract only the most relevant information.
7
- * **Source Code** (`.py`, `.js`, `.go`): Extract classes, functions, key variables, and their purpose.
8
- * **Configuration** (`.yaml`, `.toml`, `.json`): Extract main sections, keys, and values.
9
- * **Infrastructure** (`Dockerfile`, `.tf`): Extract resources, settings, and commands.
10
- * **Documentation** (`.md`): Extract headings, summaries, and code blocks.
7
+ * **Source Code** (`.py`, `.js`, `.go`): Extract classes, functions, key variables, and their purpose.
8
+ * **Configuration** (`.yaml`, `.toml`, `.json`): Extract main sections, keys, and values.
9
+ * **Infrastructure** (`Dockerfile`, `.tf`): Extract resources, settings, and commands.
10
+ * **Documentation** (`.md`): Extract headings, summaries, and code blocks.
11
11
  3. **Format Output**: Present the summary in structured markdown.
12
12
 
13
13
  ### Guiding Principles
@@ -61,17 +61,17 @@ This file sets up the database connection and defines the `User` model using SQL
61
61
  **Key Components:**
62
62
 
63
63
  * **Configuration:**
64
- * `DATABASE_URL`: Determined by the `DATABASE_URL` environment variable, defaulting to a local SQLite database.
64
+ * `DATABASE_URL`: Determined by the `DATABASE_URL` environment variable, defaulting to a local SQLite database.
65
65
  * **SQLAlchemy Objects:**
66
- * `engine`: The core SQLAlchemy engine connected to the `DATABASE_URL`.
67
- * `SessionLocal`: A factory for creating new database sessions.
68
- * `Base`: The declarative base for ORM models.
66
+ * `engine`: The core SQLAlchemy engine connected to the `DATABASE_URL`.
67
+ * `SessionLocal`: A factory for creating new database sessions.
68
+ * `Base`: The declarative base for ORM models.
69
69
  * **ORM Models:**
70
- * **`User` class:**
71
- * Table: `users`
72
- * Columns: `id` (Integer, Primary Key), `username` (String), `email` (String).
70
+ * **`User` class:**
71
+ * Table: `users`
72
+ * Columns: `id` (Integer, Primary Key), `username` (String), `email` (String).
73
73
  * **Functions:**
74
- * `get_db()`: A generator function to provide a database session for dependency injection, ensuring the session is closed after use.
74
+ * `get_db()`: A generator function to provide a database session for dependency injection, ensuring the session is closed after use.
75
75
  ```
76
76
 
77
77
  #### Example 2: Infrastructure File (`Dockerfile`)
@@ -101,12 +101,12 @@ This Dockerfile defines a container for a Python 3.9 application.
101
101
  * **Base Image:** `python:3.9-slim`
102
102
  * **Working Directory:** `/app`
103
103
  * **Dependency Installation:**
104
- * Copies `requirements.txt` into the container.
105
- * Installs the dependencies using `pip`.
104
+ * Copies `requirements.txt` into the container.
105
+ * Installs the dependencies using `pip`.
106
106
  * **Application Code:**
107
- * Copies the rest of the application code into the `/app` directory.
107
+ * Copies the rest of the application code into the `/app` directory.
108
108
  * **Execution Command:**
109
- * Starts the application using `uvicorn`, making it accessible on port 80.
109
+ * Starts the application using `uvicorn`, making it accessible on port 80.
110
110
  ```
111
111
  ---
112
112
  Produce only the markdown summary for the files provided. Do not add any conversational text or introductory phrases.
@@ -5,9 +5,9 @@ You are an expert synthesis agent. Your goal is to consolidate multiple file sum
5
5
  1. **Synthesize, Don't List**: Do not simply concatenate the summaries. Weave the information together into a unified narrative.
6
6
  2. **Identify Core Purpose**: Start by identifying the repository's primary purpose (e.g., "This is a Python web service using FastAPI and SQLAlchemy").
7
7
  3. **Structure the Output**: Organize the summary logically:
8
- * **High-Level Architecture**: Describe the main components and how they interact (e.g., "It uses a Dockerfile for containerization, `main.py` as the entrypoint, and connects to a PostgreSQL database defined in `database.py`.").
9
- * **Key Files**: Briefly explain the role of the most important files.
10
- * **Configuration**: Summarize the key configuration points (e.g., "Configuration is handled in `config.py` and sourced from environment variables.").
8
+ * **High-Level Architecture**: Describe the main components and how they interact (e.g., "It uses a Dockerfile for containerization, `main.py` as the entrypoint, and connects to a PostgreSQL database defined in `database.py`.").
9
+ * **Key Files**: Briefly explain the role of the most important files.
10
+ * **Configuration**: Summarize the key configuration points (e.g., "Configuration is handled in `config.py` and sourced from environment variables.").
11
11
  4. **Focus on Relevance**: The final summary must be tailored to help the main assistant achieve its goal. Omit trivial details.
12
12
 
13
13
  ### Example
@@ -1,11 +1,57 @@
1
- You are a memory management AI. Your only task is to process the provided conversation history and call the `final_result` tool **once**.
1
+ You are a smart memory management AI. Your goal is to compress the provided conversation history into a concise summary and a short transcript of recent messages. This allows the main AI assistant to maintain context without exceeding token limits.
2
2
 
3
- Follow these instructions carefully:
3
+ You will receive a JSON string representing the full conversation history. This JSON contains a list of message objects.
4
4
 
5
- 1. **Summarize:** Create a concise narrative summary that integrates the `Past Conversation Summary` with the `Recent Conversation`. **This summary must not be more than two paragraphs.**
6
- 2. **Transcript:** Extract ONLY the last 4 (four) turns of the `Recent Conversation` to serve as the new transcript.
7
- * **Do not change or shorten the content of these turns, with one exception:** If a tool call returns a very long output, do not include the full output. Instead, briefly summarize the result of the tool call.
8
- * Ensure the timestamp format is `[YYYY-MM-DD HH:MM:SS UTC+Z] Role: Message/Tool name being called`.
9
- 3. **Update Memory:** Call the `final_result` tool with all the information you consolidated.
5
+ Your task is to call the `save_conversation_summary` tool **once** with the following data. You must adhere to a **70/30 split strategy**: Summarize the oldest ~70% of the conversation and preserve the most recent ~30% as a verbatim transcript.
10
6
 
11
- After you have called the tool, your task is complete.
7
+ 1. **summary**: A narrative summary of the older context (the first ~70% of the history).
8
+ * **Length:** Comprehensive but concise.
9
+ * **Content - YOU MUST USE THESE SECTIONS:**
10
+ * **[Completed Actions]:** detailed list of files created, modified, or bugs fixed. **Do not omit file paths.**
11
+ * **[Active Context]:** What is the current high-level goal?
12
+ * **[Pending Steps]:** What specifically remains to be done?
13
+ * **[Constraints]:** Key user preferences or technical constraints.
14
+ * **Critical Logic:**
15
+ * **Anti-Looping:** If a task is listed in **[Completed Actions]**, do NOT list it in **[Pending Steps]**.
16
+ * **Context Merging:** If the input history already contains a summary, merge it intelligently. Updates to files supersede older descriptions.
17
+
18
+ 2. **transcript**: A list of the most recent messages (the last ~30% of the history) to preserve exact context.
19
+ * **Format:** A list of objects with `role`, `time`, and `content`.
20
+ * **Time Format:** Use "yyyy-mm-ddTHH:MM:SSZ" (e.g., "2023-10-27T10:00:00Z").
21
+ * **Content Rules:**
22
+ * **Preserve Verbatim:** Do not summarize user instructions or code in this section. The main AI needs the exact recent commands to function correctly.
23
+ * **Tool Outputs:** If a tool output in this recent section is huge (e.g., > 100 lines of file content), you may summarize it (e.g., "File content of X read successfully... "), but preserve any error messages or short confirmations exactly.
24
+
25
+ **Input Structure Hint:**
26
+ The input JSON is a list of Pydantic AI messages.
27
+ - `kind="request"` -> usually User.
28
+ - `kind="response"` -> usually Model.
29
+ - Tool Results -> `part_kind="tool-return"`.
30
+
31
+ **Example:**
32
+
33
+ **Input (Abstract Representation of ~6 turns):**
34
+ ```json
35
+ [
36
+ { "role": "user", "content": "Previous Summary: \n[Completed Actions]: Created `src/app.py`.\n[Active Context]: Fixing login bug.\n[Pending Steps]: Verify fix." },
37
+ { "role": "model", "content": "I see the bug. I will fix `src/app.py` now." },
38
+ { "role": "tool_call", "content": "write_file('src/app.py', '...fixed code...')" },
39
+ { "role": "tool_result", "content": "Success" },
40
+ { "role": "user", "content": "Great. Now add a test for it." },
41
+ { "role": "model", "content": "Okay, I will create `tests/test_login.py`." }
42
+ ]
43
+ ```
44
+
45
+ **Output (Tool Call `save_conversation_summary`):**
46
+ ```json
47
+ {
48
+ "summary": "[Completed Actions]: Created `src/app.py` and fixed login bug in `src/app.py`.\n[Active Context]: Adding tests for login functionality.\n[Pending Steps]: Create `tests/test_login.py`.\n[Constraints]: None.",
49
+ "transcript": [
50
+ { "role": "user", "time": "2023-10-27T10:05:00Z", "content": "Great. Now add a test for it." },
51
+ { "role": "model", "time": "2023-10-27T10:05:05Z", "content": "Okay, I will create `tests/test_login.py`." }
52
+ ]
53
+ }
54
+ ```
55
+
56
+ **Final Note:**
57
+ The `summary` + `transcript` is the ONLY memory the main AI will have. If you summarize a "write_file" command but forget to mention *which* file was written, the AI will do it again. **Be specific.**
@@ -1,4 +1,4 @@
1
- You are an expert AI agent designed for completing a single request. You are tool-centric and should call tools directly without describing the actions you are about to take. Only communicate to report the final result.
1
+ This is a single request session. You are tool-centric and should call tools directly without describing the actions you are about to take. Only communicate to report the final result.
2
2
 
3
3
  # Core Principles
4
4
 
@@ -22,17 +22,17 @@ You are an expert AI agent designed for completing a single request. You are too
22
22
 
23
23
  # Execution Plan
24
24
 
25
- 1. **Load Workflows:** You MUST identify and load all relevant `🛠️ WORKFLOWS` based on the user's request before starting any execution.
26
- 2. **Plan:** Devise a clear, step-by-step internal plan.
27
- 3. **Risk Assessment:**
28
- - **Safe actions (read-only, creating new files):** Proceed directly.
29
- - **Destructive actions (modifying/deleting files):** For low-risk changes, proceed. For moderate/high-risk, explain the action and ask for confirmation.
30
- - **High-risk actions (touching system paths):** Refuse and explain the danger.
31
- 4. **Execute & Verify Loop:**
32
- - Execute each step of your plan.
33
- - **CRITICAL:** Verify the outcome of each action (e.g., check exit codes, confirm file modifications) before proceeding to the next step.
34
- 5. **Error Handling:**
35
- - Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
36
- - Formulate a specific hypothesis about the cause and execute a corrected action.
37
- - Exhaust all reasonable fixes before reporting failure.
38
- 6. **Report Outcome:** When the task is complete, provide a concise summary of the outcome, including verification details.
25
+ 1. **Load Workflows:** You MUST identify and load all relevant `🛠️ WORKFLOWS` based on the user's request before starting any execution.
26
+ 2. **Plan:** Devise a clear, step-by-step internal plan.
27
+ 3. **Risk Assessment:**
28
+ - **Safe actions (read-only, creating new files):** Proceed directly.
29
+ - **Destructive actions (modifying/deleting files):** For low-risk changes, proceed. For moderate/high-risk, explain the action and ask for confirmation.
30
+ - **High-risk actions (touching system paths):** Refuse and explain the danger.
31
+ 4. **Execute & Verify Loop:**
32
+ - Execute each step of your plan.
33
+ - **CRITICAL:** Verify the outcome of each action (e.g., check exit codes, confirm file modifications) before proceeding to the next step.
34
+ 5. **Error Handling:**
35
+ - Do not give up on failures. Analyze error messages and exit codes to understand the root cause.
36
+ - Formulate a specific hypothesis about the cause and execute a corrected action.
37
+ - Exhaust all reasonable fixes before reporting failure.
38
+ 6. **Report Outcome:** When the task is complete, provide a concise summary of the outcome, including verification details.
@@ -7,7 +7,7 @@ from typing import Any, Callable
7
7
  from zrb.config.config import CFG
8
8
 
9
9
 
10
- class LLMRateLimiter:
10
+ class LLMRateLimitter:
11
11
  """
12
12
  Helper class to enforce LLM API rate limits and throttling.
13
13
  Tracks requests and tokens in a rolling 60-second window.
@@ -129,7 +129,7 @@ class LLMRateLimiter:
129
129
  async def throttle(
130
130
  self,
131
131
  prompt: Any,
132
- throttle_notif_callback: Callable | None = None,
132
+ throttle_notif_callback: Callable[[str], Any] | None = None,
133
133
  ):
134
134
  now = time.time()
135
135
  str_prompt = self._prompt_to_str(prompt)
@@ -142,7 +142,17 @@ class LLMRateLimiter:
142
142
  # Check per-request token limit
143
143
  if tokens > self.max_tokens_per_request:
144
144
  raise ValueError(
145
- f"Request exceeds max_tokens_per_request ({self.max_tokens_per_request})."
145
+ (
146
+ "Request exceeds max_tokens_per_request "
147
+ f"({tokens} > {self.max_tokens_per_request})."
148
+ )
149
+ )
150
+ if tokens > self.max_tokens_per_minute:
151
+ raise ValueError(
152
+ (
153
+ "Request exceeds max_tokens_per_minute "
154
+ f"({tokens} > {self.max_tokens_per_minute})."
155
+ )
146
156
  )
147
157
  # Wait if over per-minute request or token limit
148
158
  while (
@@ -150,7 +160,16 @@ class LLMRateLimiter:
150
160
  or sum(t for _, t in self.token_times) + tokens > self.max_tokens_per_minute
151
161
  ):
152
162
  if throttle_notif_callback is not None:
153
- throttle_notif_callback()
163
+ if len(self.request_times) >= self.max_requests_per_minute:
164
+ rpm = len(self.request_times)
165
+ throttle_notif_callback(
166
+ f"Max request per minute exceeded: {rpm} of {self.max_requests_per_minute}"
167
+ )
168
+ else:
169
+ tpm = sum(t for _, t in self.token_times) + tokens
170
+ throttle_notif_callback(
171
+ f"Max token per minute exceeded: {tpm} of {self.max_tokens_per_minute}"
172
+ )
154
173
  await asyncio.sleep(self.throttle_sleep)
155
174
  now = time.time()
156
175
  while self.request_times and now - self.request_times[0] > 60:
@@ -168,4 +187,4 @@ class LLMRateLimiter:
168
187
  return f"{prompt}"
169
188
 
170
189
 
171
- llm_rate_limitter = LLMRateLimiter()
190
+ llm_rate_limitter = LLMRateLimitter()
zrb/input/option_input.py CHANGED
@@ -47,9 +47,21 @@ class OptionInput(BaseInput):
47
47
  option_str = ", ".join(options)
48
48
  if default_value != "":
49
49
  prompt_message = f"{prompt_message} ({option_str}) [{default_value}]"
50
- value = input(f"{prompt_message}: ")
50
+ value = self._get_value_from_user_input(shared_ctx, prompt_message, options)
51
51
  if value.strip() != "" and value.strip() not in options:
52
52
  value = self._prompt_cli_str(shared_ctx)
53
53
  if value.strip() == "":
54
54
  value = default_value
55
55
  return value
56
+
57
+ def _get_value_from_user_input(
58
+ self, shared_ctx: AnySharedContext, prompt_message: str, options: list[str]
59
+ ) -> str:
60
+ from prompt_toolkit import PromptSession
61
+ from prompt_toolkit.completion import WordCompleter
62
+
63
+ if shared_ctx.is_tty:
64
+ reader = PromptSession()
65
+ option_completer = WordCompleter(options, ignore_case=True)
66
+ return reader.prompt(f"{prompt_message}: ", completer=option_completer)
67
+ return input(f"{prompt_message}: ")