zrb 1.8.15__py3-none-any.whl → 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zrb/builtin/llm/chat_session.py +16 -11
- zrb/builtin/llm/llm_ask.py +4 -0
- zrb/builtin/llm/tool/code.py +2 -2
- zrb/builtin/llm/tool/file.py +105 -2
- zrb/builtin/llm/tool/web.py +1 -1
- zrb/config.py +14 -8
- zrb/llm_config.py +156 -185
- zrb/task/any_task.py +6 -9
- zrb/task/llm/agent.py +26 -33
- zrb/task/llm/config.py +4 -7
- zrb/task/llm/context.py +0 -44
- zrb/task/llm/context_enrichment.py +73 -100
- zrb/task/llm/error.py +2 -4
- zrb/task/llm/history.py +19 -11
- zrb/task/llm/history_summarization.py +75 -88
- zrb/task/llm/print_node.py +10 -8
- zrb/task/llm/prompt.py +12 -19
- zrb/task/llm/tool_wrapper.py +2 -4
- zrb/task/llm_task.py +207 -78
- zrb/util/file.py +3 -2
- {zrb-1.8.15.dist-info → zrb-1.9.1.dist-info}/METADATA +1 -1
- {zrb-1.8.15.dist-info → zrb-1.9.1.dist-info}/RECORD +24 -24
- {zrb-1.8.15.dist-info → zrb-1.9.1.dist-info}/WHEEL +0 -0
- {zrb-1.8.15.dist-info → zrb-1.9.1.dist-info}/entry_points.txt +0 -0
zrb/task/llm/context.py
CHANGED
@@ -1,14 +1,9 @@
|
|
1
1
|
import datetime
|
2
|
-
import inspect
|
3
2
|
import os
|
4
3
|
import platform
|
5
4
|
import re
|
6
|
-
from collections.abc import Callable
|
7
5
|
from typing import Any
|
8
6
|
|
9
|
-
from zrb.context.any_context import AnyContext
|
10
|
-
from zrb.context.any_shared_context import AnySharedContext
|
11
|
-
from zrb.util.attr import get_attr
|
12
7
|
from zrb.util.file import read_dir, read_file_with_line_numbers
|
13
8
|
|
14
9
|
|
@@ -61,42 +56,3 @@ def extract_default_context(user_message: str) -> tuple[str, dict[str, Any]]:
|
|
61
56
|
}
|
62
57
|
|
63
58
|
return modified_user_message, context
|
64
|
-
|
65
|
-
|
66
|
-
def get_conversation_context(
|
67
|
-
ctx: AnyContext,
|
68
|
-
conversation_context_attr: (
|
69
|
-
dict[str, Any] | Callable[[AnySharedContext], dict[str, Any]] | None
|
70
|
-
),
|
71
|
-
) -> dict[str, Any]:
|
72
|
-
"""
|
73
|
-
Retrieves the conversation context.
|
74
|
-
If a value in the context dict is callable, it executes it with ctx.
|
75
|
-
"""
|
76
|
-
raw_context = get_attr(ctx, conversation_context_attr, {}, auto_render=False)
|
77
|
-
if not isinstance(raw_context, dict):
|
78
|
-
ctx.log_warning(
|
79
|
-
f"Conversation context resolved to type {type(raw_context)}, "
|
80
|
-
"expected dict. Returning empty context."
|
81
|
-
)
|
82
|
-
return {}
|
83
|
-
# If conversation_context contains callable value, execute them.
|
84
|
-
processed_context: dict[str, Any] = {}
|
85
|
-
for key, value in raw_context.items():
|
86
|
-
if callable(value):
|
87
|
-
try:
|
88
|
-
# Check if the callable expects 'ctx'
|
89
|
-
sig = inspect.signature(value)
|
90
|
-
if "ctx" in sig.parameters:
|
91
|
-
processed_context[key] = value(ctx)
|
92
|
-
else:
|
93
|
-
processed_context[key] = value()
|
94
|
-
except Exception as e:
|
95
|
-
ctx.log_warning(
|
96
|
-
f"Error executing callable for context key '{key}': {e}. "
|
97
|
-
"Skipping."
|
98
|
-
)
|
99
|
-
processed_context[key] = None
|
100
|
-
else:
|
101
|
-
processed_context[key] = value
|
102
|
-
return processed_context
|
@@ -1,13 +1,11 @@
|
|
1
1
|
import json
|
2
2
|
import traceback
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
|
-
from pydantic import BaseModel
|
3
|
+
from typing import TYPE_CHECKING
|
6
4
|
|
7
5
|
from zrb.attr.type import BoolAttr, IntAttr
|
8
6
|
from zrb.context.any_context import AnyContext
|
9
7
|
from zrb.llm_config import llm_config
|
10
|
-
from zrb.llm_rate_limitter import LLMRateLimiter
|
8
|
+
from zrb.llm_rate_limitter import LLMRateLimiter, llm_rate_limitter
|
11
9
|
from zrb.task.llm.agent import run_agent_iteration
|
12
10
|
from zrb.task.llm.history import (
|
13
11
|
count_part_in_history_list,
|
@@ -20,114 +18,87 @@ from zrb.util.cli.style import stylize_faint
|
|
20
18
|
if TYPE_CHECKING:
|
21
19
|
from pydantic_ai.models import Model
|
22
20
|
from pydantic_ai.settings import ModelSettings
|
23
|
-
else:
|
24
|
-
Model = Any
|
25
|
-
ModelSettings = Any
|
26
|
-
|
27
|
-
|
28
|
-
class EnrichmentConfig(BaseModel):
|
29
|
-
model_config = {"arbitrary_types_allowed": True}
|
30
|
-
model: Model | str | None = None
|
31
|
-
settings: ModelSettings | None = None
|
32
|
-
prompt: str
|
33
|
-
retries: int = 3
|
34
21
|
|
35
22
|
|
36
|
-
|
37
|
-
|
23
|
+
def _count_token_in_history(history_list: ListOfDict) -> int:
|
24
|
+
"""Counts the total number of tokens in a conversation history list."""
|
25
|
+
text_to_count = json.dumps(history_list)
|
26
|
+
return llm_rate_limitter.count_token(text_to_count)
|
38
27
|
|
39
28
|
|
40
29
|
async def enrich_context(
|
41
30
|
ctx: AnyContext,
|
42
|
-
|
43
|
-
|
31
|
+
model: "Model | str | None",
|
32
|
+
settings: "ModelSettings | None",
|
33
|
+
prompt: str,
|
34
|
+
previous_long_term_context: str,
|
44
35
|
history_list: ListOfDict,
|
45
36
|
rate_limitter: LLMRateLimiter | None = None,
|
46
|
-
|
47
|
-
|
37
|
+
retries: int = 3,
|
38
|
+
) -> str:
|
39
|
+
"""Runs an LLM call to update the long-term context and returns the new context string."""
|
48
40
|
from pydantic_ai import Agent
|
49
41
|
|
50
42
|
ctx.log_info("Attempting to enrich conversation context...")
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
[
|
59
|
-
"Extract context from the following conversation info.",
|
60
|
-
"Extract only contexts that will be relevant across multiple conversations, like", # noqa
|
61
|
-
"- user name",
|
62
|
-
"- user hobby",
|
63
|
-
"- user's long life goal",
|
64
|
-
"- standard/SOP",
|
65
|
-
"- etc.",
|
66
|
-
"Always maintain the relevant context and remove the irrelevant ones.",
|
67
|
-
"Restructure the context in a helpful way",
|
68
|
-
"Keep the context small",
|
69
|
-
f"Existing Context: {context_json}",
|
70
|
-
f"Conversation History: {history_json}",
|
71
|
-
]
|
72
|
-
)
|
73
|
-
except Exception as e:
|
74
|
-
ctx.log_warning(f"Error formatting context/history for enrichment: {e}")
|
75
|
-
return conversation_context # Return original context if formatting fails
|
76
|
-
|
43
|
+
# Construct the user prompt according to the new prompt format
|
44
|
+
user_prompt = json.dumps(
|
45
|
+
{
|
46
|
+
"previous_long_term_context": previous_long_term_context,
|
47
|
+
"recent_conversation_history": history_list,
|
48
|
+
}
|
49
|
+
)
|
77
50
|
enrichment_agent = Agent(
|
78
|
-
model=
|
79
|
-
system_prompt=
|
80
|
-
model_settings=
|
81
|
-
retries=
|
82
|
-
output_type=EnrichmentResult,
|
51
|
+
model=model,
|
52
|
+
system_prompt=prompt,
|
53
|
+
model_settings=settings,
|
54
|
+
retries=retries,
|
83
55
|
)
|
84
56
|
|
85
57
|
try:
|
86
|
-
ctx.print(stylize_faint("
|
58
|
+
ctx.print(stylize_faint("💡 Enrich Context"), plain=True)
|
87
59
|
enrichment_run = await run_agent_iteration(
|
88
60
|
ctx=ctx,
|
89
61
|
agent=enrichment_agent,
|
90
|
-
user_prompt=
|
91
|
-
history_list=[], # Enrichment agent
|
62
|
+
user_prompt=user_prompt,
|
63
|
+
history_list=[], # Enrichment agent works off the prompt, not history
|
92
64
|
rate_limitter=rate_limitter,
|
93
65
|
)
|
94
66
|
if enrichment_run and enrichment_run.result.output:
|
95
|
-
|
67
|
+
new_long_term_context = str(enrichment_run.result.output)
|
96
68
|
usage = enrichment_run.result.usage()
|
97
|
-
ctx.print(
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
f"Updated conversation context: {json.dumps(conversation_context)}"
|
105
|
-
)
|
69
|
+
ctx.print(
|
70
|
+
stylize_faint(f"💡 Context Enrichment Token: {usage}"), plain=True
|
71
|
+
)
|
72
|
+
ctx.print(plain=True)
|
73
|
+
ctx.log_info("Context enriched based on history.")
|
74
|
+
ctx.log_info(f"Updated long-term context:\n{new_long_term_context}")
|
75
|
+
return new_long_term_context
|
106
76
|
else:
|
107
|
-
ctx.log_warning("Context enrichment returned no data")
|
77
|
+
ctx.log_warning("Context enrichment returned no data.")
|
108
78
|
except Exception as e:
|
109
79
|
ctx.log_warning(f"Error during context enrichment LLM call: {e}")
|
110
80
|
traceback.print_exc()
|
111
|
-
|
81
|
+
|
82
|
+
# Return the original context if enrichment fails
|
83
|
+
return previous_long_term_context
|
112
84
|
|
113
85
|
|
114
|
-
def
|
86
|
+
def get_context_enrichment_token_threshold(
|
115
87
|
ctx: AnyContext,
|
116
|
-
|
117
|
-
|
88
|
+
context_enrichment_token_threshold_attr: IntAttr | None,
|
89
|
+
render_context_enrichment_token_threshold: bool,
|
118
90
|
) -> int:
|
119
|
-
"""Gets the context enrichment threshold, handling defaults and errors."""
|
91
|
+
"""Gets the context enrichment token threshold, handling defaults and errors."""
|
120
92
|
try:
|
121
93
|
return get_int_attr(
|
122
94
|
ctx,
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
auto_render=render_context_enrichment_threshold,
|
95
|
+
context_enrichment_token_threshold_attr,
|
96
|
+
llm_config.default_context_enrichment_token_threshold,
|
97
|
+
auto_render=render_context_enrichment_token_threshold,
|
127
98
|
)
|
128
99
|
except ValueError as e:
|
129
100
|
ctx.log_warning(
|
130
|
-
f"Could not convert
|
101
|
+
f"Could not convert context_enrichment_token_threshold to int: {e}. "
|
131
102
|
"Defaulting to -1 (no threshold)."
|
132
103
|
)
|
133
104
|
return -1
|
@@ -136,23 +107,27 @@ def get_context_enrichment_threshold(
|
|
136
107
|
def should_enrich_context(
|
137
108
|
ctx: AnyContext,
|
138
109
|
history_list: ListOfDict,
|
139
|
-
should_enrich_context_attr: BoolAttr | None,
|
110
|
+
should_enrich_context_attr: BoolAttr | None,
|
140
111
|
render_enrich_context: bool,
|
141
|
-
|
142
|
-
|
112
|
+
context_enrichment_token_threshold_attr: IntAttr | None,
|
113
|
+
render_context_enrichment_token_threshold: bool,
|
143
114
|
) -> bool:
|
144
115
|
"""
|
145
|
-
Determines if context enrichment should occur based on history, threshold, and config.
|
116
|
+
Determines if context enrichment should occur based on history, token threshold, and config.
|
146
117
|
"""
|
147
118
|
history_part_count = count_part_in_history_list(history_list)
|
148
119
|
if history_part_count == 0:
|
149
120
|
return False
|
150
|
-
|
121
|
+
enrichment_token_threshold = get_context_enrichment_token_threshold(
|
151
122
|
ctx,
|
152
|
-
|
153
|
-
|
123
|
+
context_enrichment_token_threshold_attr,
|
124
|
+
render_context_enrichment_token_threshold,
|
154
125
|
)
|
155
|
-
|
126
|
+
history_token_count = _count_token_in_history(history_list)
|
127
|
+
if (
|
128
|
+
enrichment_token_threshold == -1
|
129
|
+
or enrichment_token_threshold > history_token_count
|
130
|
+
):
|
156
131
|
return False
|
157
132
|
return get_bool_attr(
|
158
133
|
ctx,
|
@@ -165,35 +140,33 @@ def should_enrich_context(
|
|
165
140
|
async def maybe_enrich_context(
|
166
141
|
ctx: AnyContext,
|
167
142
|
history_list: ListOfDict,
|
168
|
-
|
143
|
+
long_term_context: str,
|
169
144
|
should_enrich_context_attr: BoolAttr | None,
|
170
145
|
render_enrich_context: bool,
|
171
|
-
|
172
|
-
|
173
|
-
model: str | Model | None,
|
174
|
-
model_settings: ModelSettings | None,
|
146
|
+
context_enrichment_token_threshold_attr: IntAttr | None,
|
147
|
+
render_context_enrichment_token_threshold: bool,
|
148
|
+
model: "str | Model | None",
|
149
|
+
model_settings: "ModelSettings | None",
|
175
150
|
context_enrichment_prompt: str,
|
176
151
|
rate_limitter: LLMRateLimiter | None = None,
|
177
|
-
) ->
|
178
|
-
"""Enriches context based on history if enabled and threshold met."""
|
152
|
+
) -> str:
|
153
|
+
"""Enriches context based on history if enabled and token threshold met."""
|
179
154
|
shorten_history_list = replace_system_prompt_in_history_list(history_list)
|
180
155
|
if should_enrich_context(
|
181
156
|
ctx,
|
182
157
|
shorten_history_list,
|
183
158
|
should_enrich_context_attr,
|
184
159
|
render_enrich_context,
|
185
|
-
|
186
|
-
|
160
|
+
context_enrichment_token_threshold_attr,
|
161
|
+
render_context_enrichment_token_threshold,
|
187
162
|
):
|
188
163
|
return await enrich_context(
|
189
164
|
ctx=ctx,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
),
|
195
|
-
conversation_context=conversation_context,
|
165
|
+
model=model,
|
166
|
+
settings=model_settings,
|
167
|
+
prompt=context_enrichment_prompt,
|
168
|
+
previous_long_term_context=long_term_context,
|
196
169
|
history_list=shorten_history_list,
|
197
170
|
rate_limitter=rate_limitter,
|
198
171
|
)
|
199
|
-
return
|
172
|
+
return long_term_context
|
zrb/task/llm/error.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
import json
|
2
|
-
from typing import TYPE_CHECKING,
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
3
3
|
|
4
4
|
from pydantic import BaseModel
|
5
5
|
|
6
6
|
if TYPE_CHECKING:
|
7
7
|
from openai import APIError
|
8
|
-
else:
|
9
|
-
APIError = Any
|
10
8
|
|
11
9
|
|
12
10
|
# Define a structured error model for tool execution failures
|
@@ -17,7 +15,7 @@ class ToolExecutionError(BaseModel):
|
|
17
15
|
details: Optional[str] = None
|
18
16
|
|
19
17
|
|
20
|
-
def extract_api_error_details(error: APIError) -> str:
|
18
|
+
def extract_api_error_details(error: "APIError") -> str:
|
21
19
|
"""Extract detailed error information from an APIError."""
|
22
20
|
details = f"{error.message}"
|
23
21
|
# Try to parse the error body as JSON
|
zrb/task/llm/history.py
CHANGED
@@ -4,7 +4,7 @@ from collections.abc import Callable
|
|
4
4
|
from copy import deepcopy
|
5
5
|
from typing import Any, Optional
|
6
6
|
|
7
|
-
from pydantic import BaseModel
|
7
|
+
from pydantic import BaseModel, Field
|
8
8
|
|
9
9
|
from zrb.attr.type import StrAttr
|
10
10
|
from zrb.context.any_context import AnyContext
|
@@ -17,8 +17,18 @@ from zrb.util.run import run_async
|
|
17
17
|
|
18
18
|
# Define the new ConversationHistoryData model
|
19
19
|
class ConversationHistoryData(BaseModel):
|
20
|
-
|
21
|
-
|
20
|
+
long_term_context: str = Field(
|
21
|
+
default="",
|
22
|
+
description="A markdown-formatted string containing curated, long-term context.",
|
23
|
+
)
|
24
|
+
conversation_summary: str = Field(
|
25
|
+
default="",
|
26
|
+
description="A free-text summary of the conversation history.",
|
27
|
+
)
|
28
|
+
history: ListOfDict = Field(
|
29
|
+
default_factory=list,
|
30
|
+
description="The recent, un-summarized conversation history.",
|
31
|
+
)
|
22
32
|
|
23
33
|
@classmethod
|
24
34
|
async def read_from_sources(
|
@@ -69,19 +79,17 @@ class ConversationHistoryData(BaseModel):
|
|
69
79
|
try:
|
70
80
|
if isinstance(data, cls):
|
71
81
|
return data # Already a valid instance
|
72
|
-
if isinstance(data, dict)
|
73
|
-
#
|
74
|
-
# Ensure context exists, even if empty
|
75
|
-
data.setdefault("context", {})
|
82
|
+
if isinstance(data, dict):
|
83
|
+
# This handles both the new format and the old {'context': ..., 'history': ...}
|
76
84
|
return cls.model_validate(data)
|
77
85
|
elif isinstance(data, list):
|
78
|
-
# Handle old format (just a list) - wrap it
|
86
|
+
# Handle very old format (just a list) - wrap it
|
79
87
|
ctx.log_warning(
|
80
|
-
f"History from {source} contains
|
81
|
-
"Wrapping it into the new structure
|
88
|
+
f"History from {source} contains legacy list format. "
|
89
|
+
"Wrapping it into the new structure. "
|
82
90
|
"Consider updating the source format."
|
83
91
|
)
|
84
|
-
return cls(history=data
|
92
|
+
return cls(history=data)
|
85
93
|
else:
|
86
94
|
ctx.log_warning(
|
87
95
|
f"History data from {source} has unexpected format "
|
@@ -1,12 +1,11 @@
|
|
1
1
|
import json
|
2
|
-
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
2
|
+
import traceback
|
3
|
+
from typing import TYPE_CHECKING
|
5
4
|
|
6
5
|
from zrb.attr.type import BoolAttr, IntAttr
|
7
6
|
from zrb.context.any_context import AnyContext
|
8
7
|
from zrb.llm_config import llm_config
|
9
|
-
from zrb.llm_rate_limitter import LLMRateLimiter
|
8
|
+
from zrb.llm_rate_limitter import LLMRateLimiter, llm_rate_limitter
|
10
9
|
from zrb.task.llm.agent import run_agent_iteration
|
11
10
|
from zrb.task.llm.history import (
|
12
11
|
count_part_in_history_list,
|
@@ -19,28 +18,30 @@ from zrb.util.cli.style import stylize_faint
|
|
19
18
|
if TYPE_CHECKING:
|
20
19
|
from pydantic_ai.models import Model
|
21
20
|
from pydantic_ai.settings import ModelSettings
|
22
|
-
else:
|
23
|
-
Model = Any
|
24
|
-
ModelSettings = Any
|
25
21
|
|
26
22
|
|
27
|
-
def
|
23
|
+
def _count_token_in_history(history_list: ListOfDict) -> int:
|
24
|
+
"""Counts the total number of tokens in a conversation history list."""
|
25
|
+
text_to_count = json.dumps(history_list)
|
26
|
+
return llm_rate_limitter.count_token(text_to_count)
|
27
|
+
|
28
|
+
|
29
|
+
def get_history_summarization_token_threshold(
|
28
30
|
ctx: AnyContext,
|
29
|
-
|
30
|
-
|
31
|
+
history_summarization_token_threshold_attr: IntAttr | None,
|
32
|
+
render_history_summarization_token_threshold: bool,
|
31
33
|
) -> int:
|
32
|
-
"""Gets the history summarization threshold, handling defaults and errors."""
|
34
|
+
"""Gets the history summarization token threshold, handling defaults and errors."""
|
33
35
|
try:
|
34
36
|
return get_int_attr(
|
35
37
|
ctx,
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
auto_render=render_history_summarization_threshold,
|
38
|
+
history_summarization_token_threshold_attr,
|
39
|
+
llm_config.default_history_summarization_token_threshold,
|
40
|
+
auto_render=render_history_summarization_token_threshold,
|
40
41
|
)
|
41
42
|
except ValueError as e:
|
42
43
|
ctx.log_warning(
|
43
|
-
f"Could not convert
|
44
|
+
f"Could not convert history_summarization_token_threshold to int: {e}. "
|
44
45
|
"Defaulting to -1 (no threshold)."
|
45
46
|
)
|
46
47
|
return -1
|
@@ -49,110 +50,99 @@ def get_history_summarization_threshold(
|
|
49
50
|
def should_summarize_history(
|
50
51
|
ctx: AnyContext,
|
51
52
|
history_list: ListOfDict,
|
52
|
-
should_summarize_history_attr: BoolAttr | None,
|
53
|
+
should_summarize_history_attr: BoolAttr | None,
|
53
54
|
render_summarize_history: bool,
|
54
|
-
|
55
|
-
|
55
|
+
history_summarization_token_threshold_attr: IntAttr | None,
|
56
|
+
render_history_summarization_token_threshold: bool,
|
56
57
|
) -> bool:
|
57
|
-
"""Determines if history summarization should occur based on length and config."""
|
58
|
+
"""Determines if history summarization should occur based on token length and config."""
|
58
59
|
history_part_count = count_part_in_history_list(history_list)
|
59
60
|
if history_part_count == 0:
|
60
61
|
return False
|
61
|
-
|
62
|
+
summarization_token_threshold = get_history_summarization_token_threshold(
|
62
63
|
ctx,
|
63
|
-
|
64
|
-
|
64
|
+
history_summarization_token_threshold_attr,
|
65
|
+
render_history_summarization_token_threshold,
|
65
66
|
)
|
66
|
-
|
67
|
+
history_token_count = _count_token_in_history(history_list)
|
68
|
+
if (
|
69
|
+
summarization_token_threshold == -1
|
70
|
+
or summarization_token_threshold > history_token_count
|
71
|
+
):
|
67
72
|
return False
|
68
73
|
return get_bool_attr(
|
69
74
|
ctx,
|
70
75
|
should_summarize_history_attr,
|
71
|
-
# Use llm_config default if attribute is None
|
72
76
|
llm_config.default_summarize_history,
|
73
77
|
auto_render=render_summarize_history,
|
74
78
|
)
|
75
79
|
|
76
80
|
|
77
|
-
class SummarizationConfig(BaseModel):
|
78
|
-
model_config = {"arbitrary_types_allowed": True}
|
79
|
-
model: Model | str | None = None
|
80
|
-
settings: ModelSettings | None = None
|
81
|
-
prompt: str
|
82
|
-
retries: int = 3
|
83
|
-
|
84
|
-
|
85
81
|
async def summarize_history(
|
86
82
|
ctx: AnyContext,
|
87
|
-
|
88
|
-
|
83
|
+
model: "Model | str | None",
|
84
|
+
settings: "ModelSettings | None",
|
85
|
+
prompt: str,
|
86
|
+
previous_summary: str,
|
89
87
|
history_list: ListOfDict,
|
90
88
|
rate_limitter: LLMRateLimiter | None = None,
|
91
|
-
|
92
|
-
|
89
|
+
retries: int = 3,
|
90
|
+
) -> str:
|
91
|
+
"""Runs an LLM call to update the conversation summary."""
|
93
92
|
from pydantic_ai import Agent
|
94
93
|
|
95
94
|
ctx.log_info("Attempting to summarize conversation history...")
|
96
|
-
|
95
|
+
# Construct the user prompt for the summarization agent
|
96
|
+
user_prompt = json.dumps(
|
97
|
+
{"previous_summary": previous_summary, "recent_history": history_list}
|
98
|
+
)
|
97
99
|
summarization_agent = Agent(
|
98
|
-
model=
|
99
|
-
system_prompt=
|
100
|
-
model_settings=
|
101
|
-
retries=
|
100
|
+
model=model,
|
101
|
+
system_prompt=prompt,
|
102
|
+
model_settings=settings,
|
103
|
+
retries=retries,
|
102
104
|
)
|
103
105
|
|
104
|
-
# Prepare context and history for summarization prompt
|
105
|
-
try:
|
106
|
-
context_json = json.dumps(conversation_context)
|
107
|
-
history_to_summarize_json = json.dumps(history_list)
|
108
|
-
summarization_user_prompt = "\n".join(
|
109
|
-
[
|
110
|
-
f"Current Context: {context_json}",
|
111
|
-
f"Conversation History to Summarize: {history_to_summarize_json}",
|
112
|
-
]
|
113
|
-
)
|
114
|
-
except Exception as e:
|
115
|
-
ctx.log_warning(f"Error formatting context/history for summarization: {e}")
|
116
|
-
return conversation_context # Return original context if formatting fails
|
117
|
-
|
118
106
|
try:
|
119
|
-
ctx.print(stylize_faint("
|
107
|
+
ctx.print(stylize_faint("📝 Summarize"), plain=True)
|
120
108
|
summary_run = await run_agent_iteration(
|
121
109
|
ctx=ctx,
|
122
110
|
agent=summarization_agent,
|
123
|
-
user_prompt=
|
124
|
-
history_list=[],
|
111
|
+
user_prompt=user_prompt,
|
112
|
+
history_list=[],
|
125
113
|
rate_limitter=rate_limitter,
|
126
114
|
)
|
127
|
-
if summary_run and summary_run.result.output:
|
128
|
-
|
115
|
+
if summary_run and summary_run.result and summary_run.result.output:
|
116
|
+
new_summary = str(summary_run.result.output)
|
129
117
|
usage = summary_run.result.usage()
|
130
|
-
ctx.print(stylize_faint(f"
|
131
|
-
|
132
|
-
|
133
|
-
ctx.log_info("
|
134
|
-
|
118
|
+
ctx.print(stylize_faint(f"📝 Summarization Token: {usage}"), plain=True)
|
119
|
+
ctx.print(plain=True)
|
120
|
+
ctx.log_info("History summarized and updated.")
|
121
|
+
ctx.log_info(f"New conversation summary:\n{new_summary}")
|
122
|
+
return new_summary
|
135
123
|
else:
|
136
124
|
ctx.log_warning("History summarization failed or returned no data.")
|
137
125
|
except Exception as e:
|
138
126
|
ctx.log_warning(f"Error during history summarization: {e}")
|
127
|
+
traceback.print_exc()
|
139
128
|
|
140
|
-
|
129
|
+
# Return the original summary if summarization fails
|
130
|
+
return previous_summary
|
141
131
|
|
142
132
|
|
143
133
|
async def maybe_summarize_history(
|
144
134
|
ctx: AnyContext,
|
145
135
|
history_list: ListOfDict,
|
146
|
-
|
147
|
-
should_summarize_history_attr: BoolAttr | None,
|
136
|
+
conversation_summary: str,
|
137
|
+
should_summarize_history_attr: BoolAttr | None,
|
148
138
|
render_summarize_history: bool,
|
149
|
-
|
150
|
-
|
151
|
-
model: str | Model | None,
|
152
|
-
model_settings: ModelSettings | None,
|
139
|
+
history_summarization_token_threshold_attr: IntAttr | None,
|
140
|
+
render_history_summarization_token_threshold: bool,
|
141
|
+
model: "str | Model | None",
|
142
|
+
model_settings: "ModelSettings | None",
|
153
143
|
summarization_prompt: str,
|
154
144
|
rate_limitter: LLMRateLimiter | None = None,
|
155
|
-
) -> tuple[ListOfDict,
|
145
|
+
) -> tuple[ListOfDict, str]:
|
156
146
|
"""Summarizes history and updates context if enabled and threshold met."""
|
157
147
|
shorten_history_list = replace_system_prompt_in_history_list(history_list)
|
158
148
|
if should_summarize_history(
|
@@ -160,21 +150,18 @@ async def maybe_summarize_history(
|
|
160
150
|
shorten_history_list,
|
161
151
|
should_summarize_history_attr,
|
162
152
|
render_summarize_history,
|
163
|
-
|
164
|
-
|
153
|
+
history_summarization_token_threshold_attr,
|
154
|
+
render_history_summarization_token_threshold,
|
165
155
|
):
|
166
|
-
|
167
|
-
updated_context = await summarize_history(
|
156
|
+
new_summary = await summarize_history(
|
168
157
|
ctx=ctx,
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
conversation_context=conversation_context,
|
175
|
-
history_list=shorten_history_list, # Pass the full list for context
|
158
|
+
model=model,
|
159
|
+
settings=model_settings,
|
160
|
+
prompt=summarization_prompt,
|
161
|
+
previous_summary=conversation_summary,
|
162
|
+
history_list=shorten_history_list,
|
176
163
|
rate_limitter=rate_limitter,
|
177
164
|
)
|
178
|
-
#
|
179
|
-
return [],
|
180
|
-
return history_list,
|
165
|
+
# After summarization, the history is cleared and replaced by the new summary
|
166
|
+
return [], new_summary
|
167
|
+
return history_list, conversation_summary
|