zrb 1.21.9__py3-none-any.whl → 1.21.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zrb might be problematic. Click here for more details.
- zrb/attr/type.py +10 -7
- zrb/builtin/git.py +12 -1
- zrb/builtin/llm/chat_completion.py +287 -0
- zrb/builtin/llm/chat_session_cmd.py +90 -28
- zrb/builtin/llm/chat_trigger.py +6 -1
- zrb/builtin/llm/history.py +4 -4
- zrb/builtin/llm/tool/cli.py +25 -13
- zrb/builtin/llm/tool/code.py +9 -2
- zrb/builtin/llm/tool/file.py +42 -81
- zrb/builtin/llm/tool/note.py +36 -16
- zrb/builtin/llm/tool/search/__init__.py +1 -0
- zrb/builtin/llm/tool/search/brave.py +60 -0
- zrb/builtin/llm/tool/search/searxng.py +55 -0
- zrb/builtin/llm/tool/search/serpapi.py +55 -0
- zrb/builtin/llm/tool/sub_agent.py +30 -10
- zrb/builtin/llm/tool/web.py +12 -72
- zrb/config/config.py +108 -13
- zrb/config/default_prompt/interactive_system_prompt.md +1 -1
- zrb/config/default_prompt/summarization_prompt.md +54 -8
- zrb/config/default_prompt/system_prompt.md +1 -1
- zrb/config/llm_rate_limitter.py +24 -5
- zrb/input/option_input.py +13 -1
- zrb/task/llm/agent.py +42 -144
- zrb/task/llm/agent_runner.py +152 -0
- zrb/task/llm/config.py +7 -5
- zrb/task/llm/conversation_history.py +35 -24
- zrb/task/llm/conversation_history_model.py +4 -11
- zrb/task/llm/default_workflow/coding/workflow.md +2 -3
- zrb/task/llm/file_replacement.py +206 -0
- zrb/task/llm/file_tool_model.py +57 -0
- zrb/task/llm/history_processor.py +206 -0
- zrb/task/llm/history_summarization.py +2 -179
- zrb/task/llm/print_node.py +14 -5
- zrb/task/llm/prompt.py +7 -18
- zrb/task/llm/subagent_conversation_history.py +41 -0
- zrb/task/llm/tool_confirmation_completer.py +41 -0
- zrb/task/llm/tool_wrapper.py +26 -12
- zrb/task/llm_task.py +55 -47
- zrb/util/attr.py +17 -10
- zrb/util/cli/text.py +6 -4
- zrb/util/git.py +2 -2
- zrb/util/yaml.py +1 -0
- zrb/xcom/xcom.py +10 -0
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/METADATA +5 -5
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/RECORD +47 -37
- zrb/task/llm/history_summarization_tool.py +0 -24
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/WHEEL +0 -0
- {zrb-1.21.9.dist-info → zrb-1.21.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
import os
|
|
3
|
+
import shlex
|
|
4
|
+
import subprocess
|
|
5
|
+
import tempfile
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from zrb.config.config import CFG
|
|
9
|
+
from zrb.task.llm.file_tool_model import FileReplacement
|
|
10
|
+
from zrb.util.file import read_file
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def is_single_path_replacement(param: Any):
|
|
14
|
+
if isinstance(param, dict):
|
|
15
|
+
return _dict_has_exact_keys(
|
|
16
|
+
param, {"path", "old_text", "new_text"}
|
|
17
|
+
) or _dict_has_exact_keys(param, {"path", "old_text", "new_text", "count"})
|
|
18
|
+
if isinstance(param, list):
|
|
19
|
+
current_path = None
|
|
20
|
+
for single_replacement in param:
|
|
21
|
+
if not is_single_path_replacement(single_replacement):
|
|
22
|
+
return False
|
|
23
|
+
if current_path is not None and current_path != single_replacement["path"]:
|
|
24
|
+
return False
|
|
25
|
+
current_path = single_replacement["path"]
|
|
26
|
+
return True
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _dict_has_exact_keys(dictionary: dict, required_keys: set) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Check if a dictionary contains exactly the specified keys.
|
|
33
|
+
More efficient for large dictionaries.
|
|
34
|
+
"""
|
|
35
|
+
if len(dictionary) != len(required_keys):
|
|
36
|
+
return False
|
|
37
|
+
return all(key in dictionary for key in required_keys)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def edit_replacement(
|
|
41
|
+
replacement: list[FileReplacement] | FileReplacement,
|
|
42
|
+
diff_edit_command_tpl: str | None = None,
|
|
43
|
+
) -> tuple[list[FileReplacement] | FileReplacement, bool]:
|
|
44
|
+
# Normalize input to list
|
|
45
|
+
replacement_list = [replacement] if isinstance(replacement, dict) else replacement
|
|
46
|
+
if not replacement_list:
|
|
47
|
+
return replacement, False
|
|
48
|
+
path = replacement_list[0]["path"]
|
|
49
|
+
original_content = read_file(path)
|
|
50
|
+
# Calculate initial proposed content based on AI's suggestion
|
|
51
|
+
proposed_content = _apply_initial_replacements(original_content, replacement_list)
|
|
52
|
+
# Open external editor for user modification
|
|
53
|
+
edited_content = _open_diff_editor(
|
|
54
|
+
path, original_content, proposed_content, diff_edit_command_tpl
|
|
55
|
+
)
|
|
56
|
+
# If content hasn't changed from proposal, return original replacement
|
|
57
|
+
if edited_content == proposed_content:
|
|
58
|
+
return replacement, False
|
|
59
|
+
# Calculate optimized replacements based on user's final edit
|
|
60
|
+
optimized_replacements = _generate_optimized_replacements(
|
|
61
|
+
path, original_content, edited_content
|
|
62
|
+
)
|
|
63
|
+
return optimized_replacements, True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _apply_initial_replacements(
|
|
67
|
+
content: str, replacement_list: list[FileReplacement]
|
|
68
|
+
) -> str:
|
|
69
|
+
new_content = content
|
|
70
|
+
for single_replacement in replacement_list:
|
|
71
|
+
old_text = single_replacement["old_text"]
|
|
72
|
+
new_text = single_replacement["new_text"]
|
|
73
|
+
count = single_replacement.get("count", -1)
|
|
74
|
+
new_content = new_content.replace(old_text, new_text, count)
|
|
75
|
+
return new_content
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _open_diff_editor(
|
|
79
|
+
original_path: str,
|
|
80
|
+
original_content: str,
|
|
81
|
+
proposed_content: str,
|
|
82
|
+
diff_edit_command_tpl: str | None,
|
|
83
|
+
) -> str:
|
|
84
|
+
if diff_edit_command_tpl is None:
|
|
85
|
+
diff_edit_command_tpl = CFG.DEFAULT_DIFF_EDIT_COMMAND_TPL
|
|
86
|
+
_, extension = os.path.splitext(original_path)
|
|
87
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as old_file:
|
|
88
|
+
old_file_name = old_file.name
|
|
89
|
+
old_file.write(original_content.encode())
|
|
90
|
+
old_file.flush()
|
|
91
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as new_file:
|
|
92
|
+
new_file_name = new_file.name
|
|
93
|
+
new_file.write(proposed_content.encode())
|
|
94
|
+
new_file.flush()
|
|
95
|
+
diff_edit_command = diff_edit_command_tpl.format(
|
|
96
|
+
old=old_file_name, new=new_file_name
|
|
97
|
+
)
|
|
98
|
+
subprocess.call(shlex.split(diff_edit_command))
|
|
99
|
+
edited_content = read_file(new_file_name)
|
|
100
|
+
if os.path.exists(old_file_name):
|
|
101
|
+
os.remove(old_file_name)
|
|
102
|
+
if os.path.exists(new_file_name):
|
|
103
|
+
os.remove(new_file_name)
|
|
104
|
+
return edited_content
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _generate_optimized_replacements(
|
|
108
|
+
path: str, original_content: str, edited_content: str
|
|
109
|
+
) -> list[FileReplacement]:
|
|
110
|
+
matcher = difflib.SequenceMatcher(None, original_content, edited_content)
|
|
111
|
+
hunks = _group_opcodes_into_hunks(matcher.get_opcodes())
|
|
112
|
+
replacements = []
|
|
113
|
+
for hunk in hunks:
|
|
114
|
+
replacement = _create_replacement_from_hunk(
|
|
115
|
+
path, original_content, edited_content, hunk
|
|
116
|
+
)
|
|
117
|
+
if replacement:
|
|
118
|
+
replacements.append(replacement)
|
|
119
|
+
return replacements
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _group_opcodes_into_hunks(opcodes, merge_threshold=200):
|
|
123
|
+
"""
|
|
124
|
+
Groups opcodes into hunks.
|
|
125
|
+
'equal' blocks smaller than merge_threshold are treated as context (glue) within a hunk.
|
|
126
|
+
"""
|
|
127
|
+
hunks = []
|
|
128
|
+
current_hunk = []
|
|
129
|
+
for tag, i1, i2, j1, j2 in opcodes:
|
|
130
|
+
if tag == "equal":
|
|
131
|
+
if i2 - i1 < merge_threshold:
|
|
132
|
+
if current_hunk:
|
|
133
|
+
current_hunk.append((tag, i1, i2, j1, j2))
|
|
134
|
+
else:
|
|
135
|
+
if current_hunk:
|
|
136
|
+
hunks.append(current_hunk)
|
|
137
|
+
current_hunk = []
|
|
138
|
+
else:
|
|
139
|
+
current_hunk.append((tag, i1, i2, j1, j2))
|
|
140
|
+
if current_hunk:
|
|
141
|
+
hunks.append(current_hunk)
|
|
142
|
+
return hunks
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _create_replacement_from_hunk(
|
|
146
|
+
path: str, original_content: str, edited_content: str, hunk: list
|
|
147
|
+
) -> FileReplacement | None:
|
|
148
|
+
# Trim leading/trailing 'equal' opcodes
|
|
149
|
+
while hunk and hunk[0][0] == "equal":
|
|
150
|
+
hunk.pop(0)
|
|
151
|
+
while hunk and hunk[-1][0] == "equal":
|
|
152
|
+
hunk.pop()
|
|
153
|
+
if not hunk:
|
|
154
|
+
return None
|
|
155
|
+
# Determine range of modification
|
|
156
|
+
i_start = hunk[0][1]
|
|
157
|
+
i_end = hunk[-1][2]
|
|
158
|
+
j_start = hunk[0][3]
|
|
159
|
+
j_end = hunk[-1][4]
|
|
160
|
+
base_old_text = original_content[i_start:i_end]
|
|
161
|
+
base_new_text = edited_content[j_start:j_end]
|
|
162
|
+
if base_old_text == base_new_text:
|
|
163
|
+
return None
|
|
164
|
+
# Expand context
|
|
165
|
+
start, end = _expand_context_for_uniqueness(original_content, i_start, i_end)
|
|
166
|
+
start, end = _expand_to_word_boundary(original_content, start, end)
|
|
167
|
+
final_old_text = original_content[start:end]
|
|
168
|
+
# Reconstruct new text
|
|
169
|
+
prefix = original_content[start:i_start]
|
|
170
|
+
suffix = original_content[i_end:end]
|
|
171
|
+
final_new_text = prefix + base_new_text + suffix
|
|
172
|
+
if final_old_text == final_new_text:
|
|
173
|
+
return None
|
|
174
|
+
return {
|
|
175
|
+
"path": path,
|
|
176
|
+
"old_text": final_old_text,
|
|
177
|
+
"new_text": final_new_text,
|
|
178
|
+
"count": 1,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _expand_context_for_uniqueness(
|
|
183
|
+
content: str, start: int, end: int
|
|
184
|
+
) -> tuple[int, int]:
|
|
185
|
+
"""Expands the range [start, end] until the substring content[start:end] is unique."""
|
|
186
|
+
while content.count(content[start:end]) > 1:
|
|
187
|
+
if start == 0 and end == len(content):
|
|
188
|
+
break
|
|
189
|
+
if start > 0:
|
|
190
|
+
start -= 1
|
|
191
|
+
if end < len(content):
|
|
192
|
+
end += 1
|
|
193
|
+
return start, end
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _expand_to_word_boundary(content: str, start: int, end: int) -> tuple[int, int]:
|
|
197
|
+
"""Expands the range [start, end] outwards to the nearest whitespace boundaries."""
|
|
198
|
+
|
|
199
|
+
def is_boundary(char):
|
|
200
|
+
return char.isspace()
|
|
201
|
+
|
|
202
|
+
while start > 0 and not is_boundary(content[start - 1]):
|
|
203
|
+
start -= 1
|
|
204
|
+
while end < len(content) and not is_boundary(content[end]):
|
|
205
|
+
end += 1
|
|
206
|
+
return start, end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
if sys.version_info >= (3, 12):
|
|
5
|
+
from typing import NotRequired, TypedDict
|
|
6
|
+
else:
|
|
7
|
+
from typing_extensions import NotRequired, TypedDict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileToRead(TypedDict):
|
|
11
|
+
"""
|
|
12
|
+
Configuration for reading a file or file section.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
path (str): Absolute or relative path to the file
|
|
16
|
+
start_line (int | None): Starting line number (1-based, inclusive).
|
|
17
|
+
If None, reads from beginning.
|
|
18
|
+
end_line (int | None): Ending line number (1-based, exclusive). If None, reads to end.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
path: str
|
|
22
|
+
start_line: NotRequired[int | None]
|
|
23
|
+
end_line: NotRequired[int | None]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FileToWrite(TypedDict):
|
|
27
|
+
"""
|
|
28
|
+
Configuration for writing content to a file.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
path (str): Absolute or relative path where file will be written.
|
|
32
|
+
content (str): Content to write. CRITICAL: For JSON, ensure all special characters
|
|
33
|
+
in this string are properly escaped.
|
|
34
|
+
mode (str): Mode for writing:
|
|
35
|
+
'w' (overwrite, default), 'a' (append), 'x' (create exclusively).
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
path: str
|
|
39
|
+
content: str
|
|
40
|
+
mode: NotRequired[Literal["w", "wt", "tw", "a", "at", "ta", "x", "xt", "tx"]]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class FileReplacement(TypedDict):
|
|
44
|
+
"""
|
|
45
|
+
Configuration for a single text replacement operation in a file.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
path (str): Absolute or relative path to the file
|
|
49
|
+
old_text (str): Exact text to find and replace (must match file content exactly)
|
|
50
|
+
new_text (str): New text to replace with
|
|
51
|
+
count (int): Optional. Number of occurrences to replace. Defaults to -1 (all).
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
path: str
|
|
55
|
+
old_text: str
|
|
56
|
+
new_text: str
|
|
57
|
+
count: NotRequired[int]
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Coroutine
|
|
5
|
+
|
|
6
|
+
from zrb.config.llm_config import llm_config
|
|
7
|
+
from zrb.config.llm_rate_limitter import LLMRateLimitter
|
|
8
|
+
from zrb.config.llm_rate_limitter import llm_rate_limitter as default_llm_rate_limitter
|
|
9
|
+
from zrb.context.any_context import AnyContext
|
|
10
|
+
from zrb.task.llm.agent_runner import run_agent_iteration
|
|
11
|
+
from zrb.util.cli.style import stylize_faint
|
|
12
|
+
from zrb.util.markdown import make_markdown_section
|
|
13
|
+
|
|
14
|
+
if sys.version_info >= (3, 12):
|
|
15
|
+
from typing import TypedDict
|
|
16
|
+
else:
|
|
17
|
+
from typing_extensions import TypedDict
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from pydantic_ai import ModelMessage
|
|
22
|
+
from pydantic_ai.models import Model
|
|
23
|
+
from pydantic_ai.settings import ModelSettings
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SingleMessage(TypedDict):
|
|
27
|
+
"""
|
|
28
|
+
SingleConversation
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
role: Either AI, User, Tool Call, or Tool Result
|
|
32
|
+
time: yyyy-mm-ddTHH:MM:SSZ:
|
|
33
|
+
content: The content of the message (summarize if too long)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
role: str
|
|
37
|
+
time: str
|
|
38
|
+
content: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ConversationSummary(TypedDict):
|
|
42
|
+
"""
|
|
43
|
+
Conversation history
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
transcript: Several last transcript of the conversation
|
|
47
|
+
summary: Descriptive conversation summary
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
transcript: list[SingleMessage]
|
|
51
|
+
summary: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def save_conversation_summary(conversation_summary: ConversationSummary):
|
|
55
|
+
"""
|
|
56
|
+
Write conversation summary for main assistant to continue conversation.
|
|
57
|
+
"""
|
|
58
|
+
return conversation_summary
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def create_summarize_history_processor(
|
|
62
|
+
ctx: AnyContext,
|
|
63
|
+
system_prompt: str,
|
|
64
|
+
rate_limitter: LLMRateLimitter | None = None,
|
|
65
|
+
summarization_model: "Model | str | None" = None,
|
|
66
|
+
summarization_model_settings: "ModelSettings | None" = None,
|
|
67
|
+
summarization_system_prompt: str | None = None,
|
|
68
|
+
summarization_token_threshold: int | None = None,
|
|
69
|
+
summarization_retries: int = 2,
|
|
70
|
+
) -> Callable[[list["ModelMessage"]], Coroutine[None, None, list["ModelMessage"]]]:
|
|
71
|
+
from pydantic_ai import Agent, ModelMessage, ModelRequest
|
|
72
|
+
from pydantic_ai.messages import ModelMessagesTypeAdapter, UserPromptPart
|
|
73
|
+
|
|
74
|
+
if rate_limitter is None:
|
|
75
|
+
rate_limitter = default_llm_rate_limitter
|
|
76
|
+
if summarization_model is None:
|
|
77
|
+
summarization_model = llm_config.default_small_model
|
|
78
|
+
if summarization_model_settings is None:
|
|
79
|
+
summarization_model_settings = llm_config.default_small_model_settings
|
|
80
|
+
if summarization_system_prompt is None:
|
|
81
|
+
summarization_system_prompt = llm_config.default_summarization_prompt
|
|
82
|
+
if summarization_token_threshold is None:
|
|
83
|
+
summarization_token_threshold = (
|
|
84
|
+
llm_config.default_history_summarization_token_threshold
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
async def maybe_summarize_history(
|
|
88
|
+
messages: list[ModelMessage],
|
|
89
|
+
) -> list[ModelMessage]:
|
|
90
|
+
history_list = json.loads(ModelMessagesTypeAdapter.dump_json(messages))
|
|
91
|
+
history_json_str = json.dumps(history_list)
|
|
92
|
+
# Estimate token usage
|
|
93
|
+
# Note: Pydantic ai has run context parameter
|
|
94
|
+
# (https://ai.pydantic.dev/message-history/#runcontext-parameter)
|
|
95
|
+
# But we cannot use run_ctx.usage.total_tokens because total token keep increasing
|
|
96
|
+
# even after summariztion.
|
|
97
|
+
estimated_token_usage = rate_limitter.count_token(history_json_str)
|
|
98
|
+
_print_request_info(
|
|
99
|
+
ctx, estimated_token_usage, summarization_token_threshold, messages
|
|
100
|
+
)
|
|
101
|
+
if estimated_token_usage < summarization_token_threshold or len(messages) == 1:
|
|
102
|
+
return messages
|
|
103
|
+
history_list_without_instruction = [
|
|
104
|
+
{
|
|
105
|
+
key: obj[key]
|
|
106
|
+
for key in obj
|
|
107
|
+
if index == len(history_list) - 1 or key != "instructions"
|
|
108
|
+
}
|
|
109
|
+
for index, obj in enumerate(history_list)
|
|
110
|
+
]
|
|
111
|
+
history_json_str_without_instruction = json.dumps(
|
|
112
|
+
history_list_without_instruction
|
|
113
|
+
)
|
|
114
|
+
summarization_message = f"Summarize the following conversation: {history_json_str_without_instruction}"
|
|
115
|
+
summarization_agent = Agent[None, ConversationSummary](
|
|
116
|
+
model=summarization_model,
|
|
117
|
+
output_type=save_conversation_summary,
|
|
118
|
+
instructions=summarization_system_prompt,
|
|
119
|
+
model_settings=summarization_model_settings,
|
|
120
|
+
retries=summarization_retries,
|
|
121
|
+
)
|
|
122
|
+
try:
|
|
123
|
+
_print_info(ctx, "📝 Rollup Conversation", 2)
|
|
124
|
+
summary_run = await run_agent_iteration(
|
|
125
|
+
ctx=ctx,
|
|
126
|
+
agent=summarization_agent,
|
|
127
|
+
user_prompt=summarization_message,
|
|
128
|
+
attachments=[],
|
|
129
|
+
history_list=[],
|
|
130
|
+
rate_limitter=rate_limitter,
|
|
131
|
+
log_indent_level=2,
|
|
132
|
+
)
|
|
133
|
+
if summary_run and summary_run.result and summary_run.result.output:
|
|
134
|
+
usage = summary_run.result.usage()
|
|
135
|
+
_print_info(ctx, f"📝 Rollup Conversation Token: {usage}", 2)
|
|
136
|
+
ctx.print(plain=True)
|
|
137
|
+
ctx.log_info("History summarized and updated.")
|
|
138
|
+
condensed_message = make_markdown_section(
|
|
139
|
+
header="Past Conversation",
|
|
140
|
+
content="\n".join(
|
|
141
|
+
[
|
|
142
|
+
make_markdown_section(
|
|
143
|
+
"Summary", _extract_summary(summary_run.result.output)
|
|
144
|
+
),
|
|
145
|
+
make_markdown_section(
|
|
146
|
+
"Past Trancript",
|
|
147
|
+
_extract_transcript(summary_run.result.output),
|
|
148
|
+
),
|
|
149
|
+
]
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
return [
|
|
153
|
+
ModelRequest(
|
|
154
|
+
instructions=system_prompt,
|
|
155
|
+
parts=[UserPromptPart(condensed_message)],
|
|
156
|
+
)
|
|
157
|
+
]
|
|
158
|
+
ctx.log_warning("History summarization failed or returned no data.")
|
|
159
|
+
except BaseException as e:
|
|
160
|
+
ctx.log_warning(f"Error during history summarization: {e}")
|
|
161
|
+
traceback.print_exc()
|
|
162
|
+
return messages
|
|
163
|
+
|
|
164
|
+
return maybe_summarize_history
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _print_request_info(
|
|
168
|
+
ctx: AnyContext,
|
|
169
|
+
estimated_token_usage: int,
|
|
170
|
+
summarization_token_threshold: int,
|
|
171
|
+
messages: list["ModelMessage"],
|
|
172
|
+
):
|
|
173
|
+
_print_info(ctx, f"Current request token (estimated): {estimated_token_usage}")
|
|
174
|
+
_print_info(ctx, f"Summarization token threshold: {summarization_token_threshold}")
|
|
175
|
+
_print_info(ctx, f"History length: {len(messages)}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _print_info(ctx: AnyContext, text: str, log_indent_level: int = 0):
|
|
179
|
+
log_prefix = (2 * (log_indent_level + 1)) * " "
|
|
180
|
+
ctx.print(stylize_faint(f"{log_prefix}{text}"), plain=True)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _extract_summary(summary_result_output: dict[str, Any] | str) -> str:
|
|
184
|
+
summary = (
|
|
185
|
+
summary_result_output.get("summary", "")
|
|
186
|
+
if isinstance(summary_result_output, dict)
|
|
187
|
+
else ""
|
|
188
|
+
)
|
|
189
|
+
return summary
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _extract_transcript(summary_result_output: dict[str, Any] | str) -> str:
|
|
193
|
+
transcript_list = (
|
|
194
|
+
summary_result_output.get("transcript", [])
|
|
195
|
+
if isinstance(summary_result_output, dict)
|
|
196
|
+
else []
|
|
197
|
+
)
|
|
198
|
+
transcript_list = [] if not isinstance(transcript_list, list) else transcript_list
|
|
199
|
+
return "\n".join(_format_transcript_message(message) for message in transcript_list)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _format_transcript_message(message: dict[str, str]) -> str:
|
|
203
|
+
role = message.get("role", "Message")
|
|
204
|
+
time = message.get("time", "<unknown>")
|
|
205
|
+
content = message.get("content", "<empty>")
|
|
206
|
+
return f"{role} ({time}): {content}"
|
|
@@ -1,36 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
import traceback
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from zrb.attr.type import BoolAttr, IntAttr
|
|
1
|
+
from zrb.attr.type import IntAttr
|
|
6
2
|
from zrb.config.llm_config import llm_config
|
|
7
|
-
from zrb.config.llm_rate_limitter import LLMRateLimiter, llm_rate_limitter
|
|
8
3
|
from zrb.context.any_context import AnyContext
|
|
9
|
-
from zrb.
|
|
10
|
-
from zrb.task.llm.conversation_history import (
|
|
11
|
-
count_part_in_history_list,
|
|
12
|
-
inject_conversation_history_notes,
|
|
13
|
-
replace_system_prompt_in_history,
|
|
14
|
-
)
|
|
15
|
-
from zrb.task.llm.conversation_history_model import ConversationHistory
|
|
16
|
-
from zrb.task.llm.history_summarization_tool import (
|
|
17
|
-
create_history_summarization_tool,
|
|
18
|
-
)
|
|
19
|
-
from zrb.task.llm.typing import ListOfDict
|
|
20
|
-
from zrb.util.attr import get_bool_attr, get_int_attr
|
|
21
|
-
from zrb.util.cli.style import stylize_faint
|
|
22
|
-
from zrb.util.markdown import make_markdown_section
|
|
23
|
-
from zrb.util.truncate import truncate_str
|
|
24
|
-
|
|
25
|
-
if TYPE_CHECKING:
|
|
26
|
-
from pydantic_ai.models import Model
|
|
27
|
-
from pydantic_ai.settings import ModelSettings
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _count_token_in_history(history_list: ListOfDict) -> int:
|
|
31
|
-
"""Counts the total number of tokens in a conversation history list."""
|
|
32
|
-
text_to_count = json.dumps(history_list)
|
|
33
|
-
return llm_rate_limitter.count_token(text_to_count)
|
|
4
|
+
from zrb.util.attr import get_int_attr
|
|
34
5
|
|
|
35
6
|
|
|
36
7
|
def get_history_summarization_token_threshold(
|
|
@@ -52,151 +23,3 @@ def get_history_summarization_token_threshold(
|
|
|
52
23
|
"Defaulting to -1 (no threshold)."
|
|
53
24
|
)
|
|
54
25
|
return -1
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def should_summarize_history(
|
|
58
|
-
ctx: AnyContext,
|
|
59
|
-
history_list: ListOfDict,
|
|
60
|
-
should_summarize_history_attr: BoolAttr | None,
|
|
61
|
-
render_summarize_history: bool,
|
|
62
|
-
history_summarization_token_threshold_attr: IntAttr | None,
|
|
63
|
-
render_history_summarization_token_threshold: bool,
|
|
64
|
-
) -> bool:
|
|
65
|
-
"""Determines if history summarization should occur based on token length and config."""
|
|
66
|
-
history_part_count = count_part_in_history_list(history_list)
|
|
67
|
-
if history_part_count == 0:
|
|
68
|
-
return False
|
|
69
|
-
summarization_token_threshold = get_history_summarization_token_threshold(
|
|
70
|
-
ctx,
|
|
71
|
-
history_summarization_token_threshold_attr,
|
|
72
|
-
render_history_summarization_token_threshold,
|
|
73
|
-
)
|
|
74
|
-
history_token_count = _count_token_in_history(history_list)
|
|
75
|
-
if (
|
|
76
|
-
summarization_token_threshold == -1
|
|
77
|
-
or summarization_token_threshold > history_token_count
|
|
78
|
-
):
|
|
79
|
-
return False
|
|
80
|
-
return get_bool_attr(
|
|
81
|
-
ctx,
|
|
82
|
-
should_summarize_history_attr,
|
|
83
|
-
llm_config.default_summarize_history,
|
|
84
|
-
auto_render=render_summarize_history,
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
async def summarize_history(
|
|
89
|
-
ctx: AnyContext,
|
|
90
|
-
model: "Model | str | None",
|
|
91
|
-
settings: "ModelSettings | None",
|
|
92
|
-
system_prompt: str,
|
|
93
|
-
conversation_history: ConversationHistory,
|
|
94
|
-
rate_limitter: LLMRateLimiter | None = None,
|
|
95
|
-
retries: int = 3,
|
|
96
|
-
) -> ConversationHistory:
|
|
97
|
-
"""Runs an LLM call to update the conversation summary."""
|
|
98
|
-
from pydantic_ai import Agent
|
|
99
|
-
|
|
100
|
-
inject_conversation_history_notes(conversation_history)
|
|
101
|
-
ctx.log_info("Attempting to summarize conversation history...")
|
|
102
|
-
# Construct the user prompt for the summarization agent
|
|
103
|
-
user_prompt = "\n".join(
|
|
104
|
-
[
|
|
105
|
-
make_markdown_section(
|
|
106
|
-
"Past Conversation",
|
|
107
|
-
"\n".join(
|
|
108
|
-
[
|
|
109
|
-
make_markdown_section(
|
|
110
|
-
"Summary",
|
|
111
|
-
conversation_history.past_conversation_summary,
|
|
112
|
-
as_code=True,
|
|
113
|
-
),
|
|
114
|
-
make_markdown_section(
|
|
115
|
-
"Last Transcript",
|
|
116
|
-
conversation_history.past_conversation_transcript,
|
|
117
|
-
as_code=True,
|
|
118
|
-
),
|
|
119
|
-
]
|
|
120
|
-
),
|
|
121
|
-
),
|
|
122
|
-
make_markdown_section(
|
|
123
|
-
"Recent Conversation (JSON)",
|
|
124
|
-
json.dumps(truncate_str(conversation_history.history, 1000)),
|
|
125
|
-
as_code=True,
|
|
126
|
-
),
|
|
127
|
-
]
|
|
128
|
-
)
|
|
129
|
-
summarize = create_history_summarization_tool(conversation_history)
|
|
130
|
-
summarization_agent = Agent[None, str](
|
|
131
|
-
model=model,
|
|
132
|
-
output_type=summarize,
|
|
133
|
-
system_prompt=system_prompt,
|
|
134
|
-
model_settings=settings,
|
|
135
|
-
retries=retries,
|
|
136
|
-
)
|
|
137
|
-
try:
|
|
138
|
-
ctx.print(stylize_faint(" 📝 Rollup Conversation"), plain=True)
|
|
139
|
-
summary_run = await run_agent_iteration(
|
|
140
|
-
ctx=ctx,
|
|
141
|
-
agent=summarization_agent,
|
|
142
|
-
user_prompt=user_prompt,
|
|
143
|
-
attachments=[],
|
|
144
|
-
history_list=[],
|
|
145
|
-
rate_limitter=rate_limitter,
|
|
146
|
-
log_indent_level=2,
|
|
147
|
-
)
|
|
148
|
-
if summary_run and summary_run.result and summary_run.result.output:
|
|
149
|
-
usage = summary_run.result.usage()
|
|
150
|
-
ctx.print(
|
|
151
|
-
stylize_faint(f" 📝 Rollup Conversation Token: {usage}"), plain=True
|
|
152
|
-
)
|
|
153
|
-
ctx.print(plain=True)
|
|
154
|
-
ctx.log_info("History summarized and updated.")
|
|
155
|
-
else:
|
|
156
|
-
ctx.log_warning("History summarization failed or returned no data.")
|
|
157
|
-
except BaseException as e:
|
|
158
|
-
ctx.log_warning(f"Error during history summarization: {e}")
|
|
159
|
-
traceback.print_exc()
|
|
160
|
-
# Return the original summary if summarization fails
|
|
161
|
-
return conversation_history
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
async def maybe_summarize_history(
|
|
165
|
-
ctx: AnyContext,
|
|
166
|
-
conversation_history: ConversationHistory,
|
|
167
|
-
should_summarize_history_attr: BoolAttr | None,
|
|
168
|
-
render_summarize_history: bool,
|
|
169
|
-
history_summarization_token_threshold_attr: IntAttr | None,
|
|
170
|
-
render_history_summarization_token_threshold: bool,
|
|
171
|
-
model: "str | Model | None",
|
|
172
|
-
model_settings: "ModelSettings | None",
|
|
173
|
-
summarization_prompt: str,
|
|
174
|
-
rate_limitter: LLMRateLimiter | None = None,
|
|
175
|
-
) -> ConversationHistory:
|
|
176
|
-
"""Summarizes history and updates context if enabled and threshold met."""
|
|
177
|
-
shorten_history = replace_system_prompt_in_history(conversation_history.history)
|
|
178
|
-
if should_summarize_history(
|
|
179
|
-
ctx,
|
|
180
|
-
shorten_history,
|
|
181
|
-
should_summarize_history_attr,
|
|
182
|
-
render_summarize_history,
|
|
183
|
-
history_summarization_token_threshold_attr,
|
|
184
|
-
render_history_summarization_token_threshold,
|
|
185
|
-
):
|
|
186
|
-
original_history = conversation_history.history
|
|
187
|
-
conversation_history.history = shorten_history
|
|
188
|
-
conversation_history = await summarize_history(
|
|
189
|
-
ctx=ctx,
|
|
190
|
-
model=model,
|
|
191
|
-
settings=model_settings,
|
|
192
|
-
system_prompt=summarization_prompt,
|
|
193
|
-
conversation_history=conversation_history,
|
|
194
|
-
rate_limitter=rate_limitter,
|
|
195
|
-
)
|
|
196
|
-
conversation_history.history = original_history
|
|
197
|
-
if (
|
|
198
|
-
conversation_history.past_conversation_summary != ""
|
|
199
|
-
and conversation_history.past_conversation_transcript != ""
|
|
200
|
-
):
|
|
201
|
-
conversation_history.history = []
|
|
202
|
-
return conversation_history
|
zrb/task/llm/print_node.py
CHANGED
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
+
from zrb.config.config import CFG
|
|
5
6
|
from zrb.util.cli.style import stylize_faint
|
|
6
7
|
|
|
7
8
|
|
|
@@ -104,12 +105,20 @@ async def print_node(
|
|
|
104
105
|
and event.tool_call_id
|
|
105
106
|
):
|
|
106
107
|
call_id = event.tool_call_id
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
108
|
+
if CFG.LLM_SHOW_TOOL_CALL_RESULT:
|
|
109
|
+
result_content = event.result.content
|
|
110
|
+
print_func(
|
|
111
|
+
_format_content(
|
|
112
|
+
f"{call_id} | Return {result_content}",
|
|
113
|
+
log_indent_level,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
print_func(
|
|
118
|
+
_format_content(
|
|
119
|
+
f"{call_id} | Executed", log_indent_level
|
|
120
|
+
)
|
|
111
121
|
)
|
|
112
|
-
)
|
|
113
122
|
except UnexpectedModelBehavior as e:
|
|
114
123
|
print_func("") # ensure newline consistency
|
|
115
124
|
print_func(
|