zrb 1.8.15__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zrb/task/llm/context.py CHANGED
@@ -1,14 +1,9 @@
1
1
  import datetime
2
- import inspect
3
2
  import os
4
3
  import platform
5
4
  import re
6
- from collections.abc import Callable
7
5
  from typing import Any
8
6
 
9
- from zrb.context.any_context import AnyContext
10
- from zrb.context.any_shared_context import AnySharedContext
11
- from zrb.util.attr import get_attr
12
7
  from zrb.util.file import read_dir, read_file_with_line_numbers
13
8
 
14
9
 
@@ -61,42 +56,3 @@ def extract_default_context(user_message: str) -> tuple[str, dict[str, Any]]:
61
56
  }
62
57
 
63
58
  return modified_user_message, context
64
-
65
-
66
- def get_conversation_context(
67
- ctx: AnyContext,
68
- conversation_context_attr: (
69
- dict[str, Any] | Callable[[AnySharedContext], dict[str, Any]] | None
70
- ),
71
- ) -> dict[str, Any]:
72
- """
73
- Retrieves the conversation context.
74
- If a value in the context dict is callable, it executes it with ctx.
75
- """
76
- raw_context = get_attr(ctx, conversation_context_attr, {}, auto_render=False)
77
- if not isinstance(raw_context, dict):
78
- ctx.log_warning(
79
- f"Conversation context resolved to type {type(raw_context)}, "
80
- "expected dict. Returning empty context."
81
- )
82
- return {}
83
- # If conversation_context contains callable value, execute them.
84
- processed_context: dict[str, Any] = {}
85
- for key, value in raw_context.items():
86
- if callable(value):
87
- try:
88
- # Check if the callable expects 'ctx'
89
- sig = inspect.signature(value)
90
- if "ctx" in sig.parameters:
91
- processed_context[key] = value(ctx)
92
- else:
93
- processed_context[key] = value()
94
- except Exception as e:
95
- ctx.log_warning(
96
- f"Error executing callable for context key '{key}': {e}. "
97
- "Skipping."
98
- )
99
- processed_context[key] = None
100
- else:
101
- processed_context[key] = value
102
- return processed_context
@@ -1,13 +1,11 @@
1
1
  import json
2
2
  import traceback
3
- from typing import TYPE_CHECKING, Any
4
-
5
- from pydantic import BaseModel
3
+ from typing import TYPE_CHECKING
6
4
 
7
5
  from zrb.attr.type import BoolAttr, IntAttr
8
6
  from zrb.context.any_context import AnyContext
9
7
  from zrb.llm_config import llm_config
10
- from zrb.llm_rate_limitter import LLMRateLimiter
8
+ from zrb.llm_rate_limitter import LLMRateLimiter, llm_rate_limitter
11
9
  from zrb.task.llm.agent import run_agent_iteration
12
10
  from zrb.task.llm.history import (
13
11
  count_part_in_history_list,
@@ -20,114 +18,87 @@ from zrb.util.cli.style import stylize_faint
20
18
  if TYPE_CHECKING:
21
19
  from pydantic_ai.models import Model
22
20
  from pydantic_ai.settings import ModelSettings
23
- else:
24
- Model = Any
25
- ModelSettings = Any
26
-
27
-
28
- class EnrichmentConfig(BaseModel):
29
- model_config = {"arbitrary_types_allowed": True}
30
- model: Model | str | None = None
31
- settings: ModelSettings | None = None
32
- prompt: str
33
- retries: int = 3
34
21
 
35
22
 
36
- class EnrichmentResult(BaseModel):
37
- response: dict[str, Any] # or further decompose as needed
23
+ def _count_token_in_history(history_list: ListOfDict) -> int:
24
+ """Counts the total number of tokens in a conversation history list."""
25
+ text_to_count = json.dumps(history_list)
26
+ return llm_rate_limitter.count_token(text_to_count)
38
27
 
39
28
 
40
29
  async def enrich_context(
41
30
  ctx: AnyContext,
42
- config: EnrichmentConfig,
43
- conversation_context: dict[str, Any],
31
+ model: "Model | str | None",
32
+ settings: "ModelSettings | None",
33
+ prompt: str,
34
+ previous_long_term_context: str,
44
35
  history_list: ListOfDict,
45
36
  rate_limitter: LLMRateLimiter | None = None,
46
- ) -> dict[str, Any]:
47
- """Runs an LLM call to extract key info and merge it into the context."""
37
+ retries: int = 3,
38
+ ) -> str:
39
+ """Runs an LLM call to update the long-term context and returns the new context string."""
48
40
  from pydantic_ai import Agent
49
41
 
50
42
  ctx.log_info("Attempting to enrich conversation context...")
51
- # Prepare context and history for the enrichment prompt
52
- history_summary = conversation_context.get("history_summary")
53
- try:
54
- context_json = json.dumps(conversation_context)
55
- history_json = json.dumps(history_list)
56
- # The user prompt will now contain the dynamic data
57
- user_prompt_data = "\n".join(
58
- [
59
- "Extract context from the following conversation info.",
60
- "Extract only contexts that will be relevant across multiple conversations, like", # noqa
61
- "- user name",
62
- "- user hobby",
63
- "- user's long life goal",
64
- "- standard/SOP",
65
- "- etc.",
66
- "Always maintain the relevant context and remove the irrelevant ones.",
67
- "Restructure the context in a helpful way",
68
- "Keep the context small",
69
- f"Existing Context: {context_json}",
70
- f"Conversation History: {history_json}",
71
- ]
72
- )
73
- except Exception as e:
74
- ctx.log_warning(f"Error formatting context/history for enrichment: {e}")
75
- return conversation_context # Return original context if formatting fails
76
-
43
+ # Construct the user prompt according to the new prompt format
44
+ user_prompt = json.dumps(
45
+ {
46
+ "previous_long_term_context": previous_long_term_context,
47
+ "recent_conversation_history": history_list,
48
+ }
49
+ )
77
50
  enrichment_agent = Agent(
78
- model=config.model,
79
- system_prompt=config.prompt, # Use the main prompt as system prompt
80
- model_settings=config.settings,
81
- retries=config.retries,
82
- output_type=EnrichmentResult,
51
+ model=model,
52
+ system_prompt=prompt,
53
+ model_settings=settings,
54
+ retries=retries,
83
55
  )
84
56
 
85
57
  try:
86
- ctx.print(stylize_faint("[Context Enrichment Triggered]"), plain=True)
58
+ ctx.print(stylize_faint("💡 Enrich Context"), plain=True)
87
59
  enrichment_run = await run_agent_iteration(
88
60
  ctx=ctx,
89
61
  agent=enrichment_agent,
90
- user_prompt=user_prompt_data, # Pass the formatted data as user prompt
91
- history_list=[], # Enrichment agent doesn't need prior history itself
62
+ user_prompt=user_prompt,
63
+ history_list=[], # Enrichment agent works off the prompt, not history
92
64
  rate_limitter=rate_limitter,
93
65
  )
94
66
  if enrichment_run and enrichment_run.result.output:
95
- response = enrichment_run.result.output.response
67
+ new_long_term_context = str(enrichment_run.result.output)
96
68
  usage = enrichment_run.result.usage()
97
- ctx.print(stylize_faint(f"[Token Usage] {usage}"), plain=True)
98
- if response:
99
- conversation_context = response
100
- # Re inject history summary
101
- conversation_context["history_summary"] = history_summary
102
- ctx.log_info("Context enriched based on history.")
103
- ctx.log_info(
104
- f"Updated conversation context: {json.dumps(conversation_context)}"
105
- )
69
+ ctx.print(
70
+ stylize_faint(f"💡 Context Enrichment Token: {usage}"), plain=True
71
+ )
72
+ ctx.print(plain=True)
73
+ ctx.log_info("Context enriched based on history.")
74
+ ctx.log_info(f"Updated long-term context:\n{new_long_term_context}")
75
+ return new_long_term_context
106
76
  else:
107
- ctx.log_warning("Context enrichment returned no data")
77
+ ctx.log_warning("Context enrichment returned no data.")
108
78
  except Exception as e:
109
79
  ctx.log_warning(f"Error during context enrichment LLM call: {e}")
110
80
  traceback.print_exc()
111
- return conversation_context
81
+
82
+ # Return the original context if enrichment fails
83
+ return previous_long_term_context
112
84
 
113
85
 
114
- def get_context_enrichment_threshold(
86
+ def get_context_enrichment_token_threshold(
115
87
  ctx: AnyContext,
116
- context_enrichment_threshold_attr: IntAttr | None,
117
- render_context_enrichment_threshold: bool,
88
+ context_enrichment_token_threshold_attr: IntAttr | None,
89
+ render_context_enrichment_token_threshold: bool,
118
90
  ) -> int:
119
- """Gets the context enrichment threshold, handling defaults and errors."""
91
+ """Gets the context enrichment token threshold, handling defaults and errors."""
120
92
  try:
121
93
  return get_int_attr(
122
94
  ctx,
123
- context_enrichment_threshold_attr,
124
- # Use llm_config default if attribute is None
125
- llm_config.default_context_enrichment_threshold,
126
- auto_render=render_context_enrichment_threshold,
95
+ context_enrichment_token_threshold_attr,
96
+ llm_config.default_context_enrichment_token_threshold,
97
+ auto_render=render_context_enrichment_token_threshold,
127
98
  )
128
99
  except ValueError as e:
129
100
  ctx.log_warning(
130
- f"Could not convert context_enrichment_threshold to int: {e}. "
101
+ f"Could not convert context_enrichment_token_threshold to int: {e}. "
131
102
  "Defaulting to -1 (no threshold)."
132
103
  )
133
104
  return -1
@@ -136,23 +107,27 @@ def get_context_enrichment_threshold(
136
107
  def should_enrich_context(
137
108
  ctx: AnyContext,
138
109
  history_list: ListOfDict,
139
- should_enrich_context_attr: BoolAttr | None, # Allow None
110
+ should_enrich_context_attr: BoolAttr | None,
140
111
  render_enrich_context: bool,
141
- context_enrichment_threshold_attr: IntAttr | None,
142
- render_context_enrichment_threshold: bool,
112
+ context_enrichment_token_threshold_attr: IntAttr | None,
113
+ render_context_enrichment_token_threshold: bool,
143
114
  ) -> bool:
144
115
  """
145
- Determines if context enrichment should occur based on history, threshold, and config.
116
+ Determines if context enrichment should occur based on history, token threshold, and config.
146
117
  """
147
118
  history_part_count = count_part_in_history_list(history_list)
148
119
  if history_part_count == 0:
149
120
  return False
150
- enrichment_threshold = get_context_enrichment_threshold(
121
+ enrichment_token_threshold = get_context_enrichment_token_threshold(
151
122
  ctx,
152
- context_enrichment_threshold_attr,
153
- render_context_enrichment_threshold,
123
+ context_enrichment_token_threshold_attr,
124
+ render_context_enrichment_token_threshold,
154
125
  )
155
- if enrichment_threshold == -1 or enrichment_threshold > history_part_count:
126
+ history_token_count = _count_token_in_history(history_list)
127
+ if (
128
+ enrichment_token_threshold == -1
129
+ or enrichment_token_threshold > history_token_count
130
+ ):
156
131
  return False
157
132
  return get_bool_attr(
158
133
  ctx,
@@ -165,35 +140,33 @@ def should_enrich_context(
165
140
  async def maybe_enrich_context(
166
141
  ctx: AnyContext,
167
142
  history_list: ListOfDict,
168
- conversation_context: dict[str, Any],
143
+ long_term_context: str,
169
144
  should_enrich_context_attr: BoolAttr | None,
170
145
  render_enrich_context: bool,
171
- context_enrichment_threshold_attr: IntAttr | None,
172
- render_context_enrichment_threshold: bool,
173
- model: str | Model | None,
174
- model_settings: ModelSettings | None,
146
+ context_enrichment_token_threshold_attr: IntAttr | None,
147
+ render_context_enrichment_token_threshold: bool,
148
+ model: "str | Model | None",
149
+ model_settings: "ModelSettings | None",
175
150
  context_enrichment_prompt: str,
176
151
  rate_limitter: LLMRateLimiter | None = None,
177
- ) -> dict[str, Any]:
178
- """Enriches context based on history if enabled and threshold met."""
152
+ ) -> str:
153
+ """Enriches context based on history if enabled and token threshold met."""
179
154
  shorten_history_list = replace_system_prompt_in_history_list(history_list)
180
155
  if should_enrich_context(
181
156
  ctx,
182
157
  shorten_history_list,
183
158
  should_enrich_context_attr,
184
159
  render_enrich_context,
185
- context_enrichment_threshold_attr,
186
- render_context_enrichment_threshold,
160
+ context_enrichment_token_threshold_attr,
161
+ render_context_enrichment_token_threshold,
187
162
  ):
188
163
  return await enrich_context(
189
164
  ctx=ctx,
190
- config=EnrichmentConfig(
191
- model=model,
192
- settings=model_settings,
193
- prompt=context_enrichment_prompt,
194
- ),
195
- conversation_context=conversation_context,
165
+ model=model,
166
+ settings=model_settings,
167
+ prompt=context_enrichment_prompt,
168
+ previous_long_term_context=long_term_context,
196
169
  history_list=shorten_history_list,
197
170
  rate_limitter=rate_limitter,
198
171
  )
199
- return conversation_context
172
+ return long_term_context
zrb/task/llm/error.py CHANGED
@@ -1,12 +1,10 @@
1
1
  import json
2
- from typing import TYPE_CHECKING, Any, Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
6
  if TYPE_CHECKING:
7
7
  from openai import APIError
8
- else:
9
- APIError = Any
10
8
 
11
9
 
12
10
  # Define a structured error model for tool execution failures
@@ -17,7 +15,7 @@ class ToolExecutionError(BaseModel):
17
15
  details: Optional[str] = None
18
16
 
19
17
 
20
- def extract_api_error_details(error: APIError) -> str:
18
+ def extract_api_error_details(error: "APIError") -> str:
21
19
  """Extract detailed error information from an APIError."""
22
20
  details = f"{error.message}"
23
21
  # Try to parse the error body as JSON
zrb/task/llm/history.py CHANGED
@@ -4,7 +4,7 @@ from collections.abc import Callable
4
4
  from copy import deepcopy
5
5
  from typing import Any, Optional
6
6
 
7
- from pydantic import BaseModel
7
+ from pydantic import BaseModel, Field
8
8
 
9
9
  from zrb.attr.type import StrAttr
10
10
  from zrb.context.any_context import AnyContext
@@ -17,8 +17,18 @@ from zrb.util.run import run_async
17
17
 
18
18
  # Define the new ConversationHistoryData model
19
19
  class ConversationHistoryData(BaseModel):
20
- context: dict[str, Any] = {}
21
- history: ListOfDict = []
20
+ long_term_context: str = Field(
21
+ default="",
22
+ description="A markdown-formatted string containing curated, long-term context.",
23
+ )
24
+ conversation_summary: str = Field(
25
+ default="",
26
+ description="A free-text summary of the conversation history.",
27
+ )
28
+ history: ListOfDict = Field(
29
+ default_factory=list,
30
+ description="The recent, un-summarized conversation history.",
31
+ )
22
32
 
23
33
  @classmethod
24
34
  async def read_from_sources(
@@ -69,19 +79,17 @@ class ConversationHistoryData(BaseModel):
69
79
  try:
70
80
  if isinstance(data, cls):
71
81
  return data # Already a valid instance
72
- if isinstance(data, dict) and "history" in data:
73
- # Standard format {'context': ..., 'history': ...}
74
- # Ensure context exists, even if empty
75
- data.setdefault("context", {})
82
+ if isinstance(data, dict):
83
+ # This handles both the new format and the old {'context': ..., 'history': ...}
76
84
  return cls.model_validate(data)
77
85
  elif isinstance(data, list):
78
- # Handle old format (just a list) - wrap it
86
+ # Handle very old format (just a list) - wrap it
79
87
  ctx.log_warning(
80
- f"History from {source} contains old list format. "
81
- "Wrapping it into the new structure {'context': {}, 'history': [...]}. "
88
+ f"History from {source} contains legacy list format. "
89
+ "Wrapping it into the new structure. "
82
90
  "Consider updating the source format."
83
91
  )
84
- return cls(history=data, context={})
92
+ return cls(history=data)
85
93
  else:
86
94
  ctx.log_warning(
87
95
  f"History data from {source} has unexpected format "
@@ -1,12 +1,11 @@
1
1
  import json
2
- from typing import TYPE_CHECKING, Any
3
-
4
- from pydantic import BaseModel
2
+ import traceback
3
+ from typing import TYPE_CHECKING
5
4
 
6
5
  from zrb.attr.type import BoolAttr, IntAttr
7
6
  from zrb.context.any_context import AnyContext
8
7
  from zrb.llm_config import llm_config
9
- from zrb.llm_rate_limitter import LLMRateLimiter
8
+ from zrb.llm_rate_limitter import LLMRateLimiter, llm_rate_limitter
10
9
  from zrb.task.llm.agent import run_agent_iteration
11
10
  from zrb.task.llm.history import (
12
11
  count_part_in_history_list,
@@ -19,28 +18,30 @@ from zrb.util.cli.style import stylize_faint
19
18
  if TYPE_CHECKING:
20
19
  from pydantic_ai.models import Model
21
20
  from pydantic_ai.settings import ModelSettings
22
- else:
23
- Model = Any
24
- ModelSettings = Any
25
21
 
26
22
 
27
- def get_history_summarization_threshold(
23
+ def _count_token_in_history(history_list: ListOfDict) -> int:
24
+ """Counts the total number of tokens in a conversation history list."""
25
+ text_to_count = json.dumps(history_list)
26
+ return llm_rate_limitter.count_token(text_to_count)
27
+
28
+
29
+ def get_history_summarization_token_threshold(
28
30
  ctx: AnyContext,
29
- history_summarization_threshold_attr: IntAttr | None,
30
- render_history_summarization_threshold: bool,
31
+ history_summarization_token_threshold_attr: IntAttr | None,
32
+ render_history_summarization_token_threshold: bool,
31
33
  ) -> int:
32
- """Gets the history summarization threshold, handling defaults and errors."""
34
+ """Gets the history summarization token threshold, handling defaults and errors."""
33
35
  try:
34
36
  return get_int_attr(
35
37
  ctx,
36
- history_summarization_threshold_attr,
37
- # Use llm_config default if attribute is None
38
- llm_config.default_history_summarization_threshold,
39
- auto_render=render_history_summarization_threshold,
38
+ history_summarization_token_threshold_attr,
39
+ llm_config.default_history_summarization_token_threshold,
40
+ auto_render=render_history_summarization_token_threshold,
40
41
  )
41
42
  except ValueError as e:
42
43
  ctx.log_warning(
43
- f"Could not convert history_summarization_threshold to int: {e}. "
44
+ f"Could not convert history_summarization_token_threshold to int: {e}. "
44
45
  "Defaulting to -1 (no threshold)."
45
46
  )
46
47
  return -1
@@ -49,110 +50,99 @@ def get_history_summarization_threshold(
49
50
  def should_summarize_history(
50
51
  ctx: AnyContext,
51
52
  history_list: ListOfDict,
52
- should_summarize_history_attr: BoolAttr | None, # Allow None
53
+ should_summarize_history_attr: BoolAttr | None,
53
54
  render_summarize_history: bool,
54
- history_summarization_threshold_attr: IntAttr | None, # Allow None
55
- render_history_summarization_threshold: bool,
55
+ history_summarization_token_threshold_attr: IntAttr | None,
56
+ render_history_summarization_token_threshold: bool,
56
57
  ) -> bool:
57
- """Determines if history summarization should occur based on length and config."""
58
+ """Determines if history summarization should occur based on token length and config."""
58
59
  history_part_count = count_part_in_history_list(history_list)
59
60
  if history_part_count == 0:
60
61
  return False
61
- summarization_threshold = get_history_summarization_threshold(
62
+ summarization_token_threshold = get_history_summarization_token_threshold(
62
63
  ctx,
63
- history_summarization_threshold_attr,
64
- render_history_summarization_threshold,
64
+ history_summarization_token_threshold_attr,
65
+ render_history_summarization_token_threshold,
65
66
  )
66
- if summarization_threshold == -1 or summarization_threshold > history_part_count:
67
+ history_token_count = _count_token_in_history(history_list)
68
+ if (
69
+ summarization_token_threshold == -1
70
+ or summarization_token_threshold > history_token_count
71
+ ):
67
72
  return False
68
73
  return get_bool_attr(
69
74
  ctx,
70
75
  should_summarize_history_attr,
71
- # Use llm_config default if attribute is None
72
76
  llm_config.default_summarize_history,
73
77
  auto_render=render_summarize_history,
74
78
  )
75
79
 
76
80
 
77
- class SummarizationConfig(BaseModel):
78
- model_config = {"arbitrary_types_allowed": True}
79
- model: Model | str | None = None
80
- settings: ModelSettings | None = None
81
- prompt: str
82
- retries: int = 3
83
-
84
-
85
81
  async def summarize_history(
86
82
  ctx: AnyContext,
87
- config: SummarizationConfig,
88
- conversation_context: dict[str, Any],
83
+ model: "Model | str | None",
84
+ settings: "ModelSettings | None",
85
+ prompt: str,
86
+ previous_summary: str,
89
87
  history_list: ListOfDict,
90
88
  rate_limitter: LLMRateLimiter | None = None,
91
- ) -> dict[str, Any]:
92
- """Runs an LLM call to summarize history and update the context."""
89
+ retries: int = 3,
90
+ ) -> str:
91
+ """Runs an LLM call to update the conversation summary."""
93
92
  from pydantic_ai import Agent
94
93
 
95
94
  ctx.log_info("Attempting to summarize conversation history...")
96
-
95
+ # Construct the user prompt for the summarization agent
96
+ user_prompt = json.dumps(
97
+ {"previous_summary": previous_summary, "recent_history": history_list}
98
+ )
97
99
  summarization_agent = Agent(
98
- model=config.model,
99
- system_prompt=config.prompt,
100
- model_settings=config.settings,
101
- retries=config.retries,
100
+ model=model,
101
+ system_prompt=prompt,
102
+ model_settings=settings,
103
+ retries=retries,
102
104
  )
103
105
 
104
- # Prepare context and history for summarization prompt
105
- try:
106
- context_json = json.dumps(conversation_context)
107
- history_to_summarize_json = json.dumps(history_list)
108
- summarization_user_prompt = "\n".join(
109
- [
110
- f"Current Context: {context_json}",
111
- f"Conversation History to Summarize: {history_to_summarize_json}",
112
- ]
113
- )
114
- except Exception as e:
115
- ctx.log_warning(f"Error formatting context/history for summarization: {e}")
116
- return conversation_context # Return original context if formatting fails
117
-
118
106
  try:
119
- ctx.print(stylize_faint("[Summarization Triggered]"), plain=True)
107
+ ctx.print(stylize_faint("📝 Summarize"), plain=True)
120
108
  summary_run = await run_agent_iteration(
121
109
  ctx=ctx,
122
110
  agent=summarization_agent,
123
- user_prompt=summarization_user_prompt,
124
- history_list=[], # Summarization agent doesn't need prior history
111
+ user_prompt=user_prompt,
112
+ history_list=[],
125
113
  rate_limitter=rate_limitter,
126
114
  )
127
- if summary_run and summary_run.result.output:
128
- summary_text = str(summary_run.result.output)
115
+ if summary_run and summary_run.result and summary_run.result.output:
116
+ new_summary = str(summary_run.result.output)
129
117
  usage = summary_run.result.usage()
130
- ctx.print(stylize_faint(f"[Token Usage] {usage}"), plain=True)
131
- # Update context with the new summary
132
- conversation_context["history_summary"] = summary_text
133
- ctx.log_info("History summarized and added/updated in context.")
134
- ctx.log_info(f"Conversation summary: {summary_text}")
118
+ ctx.print(stylize_faint(f"📝 Summarization Token: {usage}"), plain=True)
119
+ ctx.print(plain=True)
120
+ ctx.log_info("History summarized and updated.")
121
+ ctx.log_info(f"New conversation summary:\n{new_summary}")
122
+ return new_summary
135
123
  else:
136
124
  ctx.log_warning("History summarization failed or returned no data.")
137
125
  except Exception as e:
138
126
  ctx.log_warning(f"Error during history summarization: {e}")
127
+ traceback.print_exc()
139
128
 
140
- return conversation_context
129
+ # Return the original summary if summarization fails
130
+ return previous_summary
141
131
 
142
132
 
143
133
  async def maybe_summarize_history(
144
134
  ctx: AnyContext,
145
135
  history_list: ListOfDict,
146
- conversation_context: dict[str, Any],
147
- should_summarize_history_attr: BoolAttr | None, # Allow None
136
+ conversation_summary: str,
137
+ should_summarize_history_attr: BoolAttr | None,
148
138
  render_summarize_history: bool,
149
- history_summarization_threshold_attr: IntAttr | None, # Allow None
150
- render_history_summarization_threshold: bool,
151
- model: str | Model | None,
152
- model_settings: ModelSettings | None,
139
+ history_summarization_token_threshold_attr: IntAttr | None,
140
+ render_history_summarization_token_threshold: bool,
141
+ model: "str | Model | None",
142
+ model_settings: "ModelSettings | None",
153
143
  summarization_prompt: str,
154
144
  rate_limitter: LLMRateLimiter | None = None,
155
- ) -> tuple[ListOfDict, dict[str, Any]]:
145
+ ) -> tuple[ListOfDict, str]:
156
146
  """Summarizes history and updates context if enabled and threshold met."""
157
147
  shorten_history_list = replace_system_prompt_in_history_list(history_list)
158
148
  if should_summarize_history(
@@ -160,21 +150,18 @@ async def maybe_summarize_history(
160
150
  shorten_history_list,
161
151
  should_summarize_history_attr,
162
152
  render_summarize_history,
163
- history_summarization_threshold_attr,
164
- render_history_summarization_threshold,
153
+ history_summarization_token_threshold_attr,
154
+ render_history_summarization_token_threshold,
165
155
  ):
166
- # Use summarize_history defined above
167
- updated_context = await summarize_history(
156
+ new_summary = await summarize_history(
168
157
  ctx=ctx,
169
- config=SummarizationConfig(
170
- model=model,
171
- settings=model_settings,
172
- prompt=summarization_prompt,
173
- ),
174
- conversation_context=conversation_context,
175
- history_list=shorten_history_list, # Pass the full list for context
158
+ model=model,
159
+ settings=model_settings,
160
+ prompt=summarization_prompt,
161
+ previous_summary=conversation_summary,
162
+ history_list=shorten_history_list,
176
163
  rate_limitter=rate_limitter,
177
164
  )
178
- # Truncate the history list after summarization
179
- return [], updated_context
180
- return history_list, conversation_context
165
+ # After summarization, the history is cleared and replaced by the new summary
166
+ return [], new_summary
167
+ return history_list, conversation_summary