PyPI - zrb - Versions diffs - 1.8.15__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

zrb 1.8.15py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

zrb/builtin/llm/chat_session.py +16 -11
zrb/llm_config.py +132 -165
zrb/task/any_task.py +6 -9
zrb/task/llm/agent.py +26 -33
zrb/task/llm/config.py +4 -7
zrb/task/llm/context.py +0 -44
zrb/task/llm/context_enrichment.py +44 -81
zrb/task/llm/error.py +2 -4
zrb/task/llm/history.py +19 -11
zrb/task/llm/history_summarization.py +46 -69
zrb/task/llm/print_node.py +10 -8
zrb/task/llm/prompt.py +12 -19
zrb/task/llm/tool_wrapper.py +2 -4
zrb/task/llm_task.py +52 -60
{zrb-1.8.15.dist-info → zrb-1.9.0.dist-info}/METADATA +1 -1
{zrb-1.8.15.dist-info → zrb-1.9.0.dist-info}/RECORD +18 -18
{zrb-1.8.15.dist-info → zrb-1.9.0.dist-info}/WHEEL +0 -0
{zrb-1.8.15.dist-info → zrb-1.9.0.dist-info}/entry_points.txt +0 -0

zrb/task/llm/context.py CHANGED Viewed

@@ -1,14 +1,9 @@
 import datetime
-import inspect
 import os
 import platform
 import re
-from collections.abc import Callable
 from typing import Any
-from zrb.context.any_context import AnyContext
-from zrb.context.any_shared_context import AnySharedContext
-from zrb.util.attr import get_attr
 from zrb.util.file import read_dir, read_file_with_line_numbers
@@ -61,42 +56,3 @@ def extract_default_context(user_message: str) -> tuple[str, dict[str, Any]]:
     }
     return modified_user_message, context
-def get_conversation_context(
-    ctx: AnyContext,
-    conversation_context_attr: (
-        dict[str, Any] | Callable[[AnySharedContext], dict[str, Any]] | None
-    ),
-) -> dict[str, Any]:
-    """
-    Retrieves the conversation context.
-    If a value in the context dict is callable, it executes it with ctx.
-    """
-    raw_context = get_attr(ctx, conversation_context_attr, {}, auto_render=False)
-    if not isinstance(raw_context, dict):
-        ctx.log_warning(
-            f"Conversation context resolved to type {type(raw_context)}, "
-            "expected dict. Returning empty context."
-        )
-        return {}
-    # If conversation_context contains callable value, execute them.
-    processed_context: dict[str, Any] = {}
-    for key, value in raw_context.items():
-        if callable(value):
-            try:
-                # Check if the callable expects 'ctx'
-                sig = inspect.signature(value)
-                if "ctx" in sig.parameters:
-                    processed_context[key] = value(ctx)
-                else:
-                    processed_context[key] = value()
-            except Exception as e:
-                ctx.log_warning(
-                    f"Error executing callable for context key '{key}': {e}. "
-                    "Skipping."
-                )
-                processed_context[key] = None
-        else:
-            processed_context[key] = value
-    return processed_context

zrb/task/llm/context_enrichment.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import json
 import traceback
-from typing import TYPE_CHECKING, Any
-from pydantic import BaseModel
+from typing import TYPE_CHECKING
 from zrb.attr.type import BoolAttr, IntAttr
 from zrb.context.any_context import AnyContext
@@ -20,95 +18,63 @@ from zrb.util.cli.style import stylize_faint
 if TYPE_CHECKING:
     from pydantic_ai.models import Model
     from pydantic_ai.settings import ModelSettings
-else:
-    Model = Any
-    ModelSettings = Any
-class EnrichmentConfig(BaseModel):
-    model_config = {"arbitrary_types_allowed": True}
-    model: Model | str | None = None
-    settings: ModelSettings | None = None
-    prompt: str
-    retries: int = 3
-class EnrichmentResult(BaseModel):
-    response: dict[str, Any]  # or further decompose as needed
 async def enrich_context(
     ctx: AnyContext,
-    config: EnrichmentConfig,
-    conversation_context: dict[str, Any],
+    model: "Model | str | None",
+    settings: "ModelSettings | None",
+    prompt: str,
+    previous_long_term_context: str,
     history_list: ListOfDict,
     rate_limitter: LLMRateLimiter | None = None,
-) -> dict[str, Any]:
-    """Runs an LLM call to extract key info and merge it into the context."""
+    retries: int = 3,
+) -> str:
+    """Runs an LLM call to update the long-term context and returns the new context string."""
     from pydantic_ai import Agent
     ctx.log_info("Attempting to enrich conversation context...")
-    # Prepare context and history for the enrichment prompt
-    history_summary = conversation_context.get("history_summary")
-    try:
-        context_json = json.dumps(conversation_context)
-        history_json = json.dumps(history_list)
-        # The user prompt will now contain the dynamic data
-        user_prompt_data = "\n".join(
-            [
-                "Extract context from the following conversation info.",
-                "Extract only contexts that will be relevant across multiple conversations, like",  # noqa
-                "- user name",
-                "- user hobby",
-                "- user's long life goal",
-                "- standard/SOP",
-                "- etc.",
-                "Always maintain the relevant context and remove the irrelevant ones.",
-                "Restructure the context in a helpful way",
-                "Keep the context small",
-                f"Existing Context: {context_json}",
-                f"Conversation History: {history_json}",
-            ]
-        )
-    except Exception as e:
-        ctx.log_warning(f"Error formatting context/history for enrichment: {e}")
-        return conversation_context  # Return original context if formatting fails
+    # Construct the user prompt according to the new prompt format
+    user_prompt = json.dumps(
+        {
+            "previous_long_term_context": previous_long_term_context,
+            "recent_conversation_history": history_list,
+        }
+    )
     enrichment_agent = Agent(
-        model=config.model,
-        system_prompt=config.prompt,  # Use the main prompt as system prompt
-        model_settings=config.settings,
-        retries=config.retries,
-        output_type=EnrichmentResult,
+        model=model,
+        system_prompt=prompt,
+        model_settings=settings,
+        retries=retries,
     )
     try:
-        ctx.print(stylize_faint("[Context Enrichment Triggered]"), plain=True)
+        ctx.print(stylize_faint("💡 Enrich Context"), plain=True)
         enrichment_run = await run_agent_iteration(
             ctx=ctx,
             agent=enrichment_agent,
-            user_prompt=user_prompt_data,  # Pass the formatted data as user prompt
-            history_list=[],  # Enrichment agent doesn't need prior history itself
+            user_prompt=user_prompt,
+            history_list=[],  # Enrichment agent works off the prompt, not history
             rate_limitter=rate_limitter,
         )
         if enrichment_run and enrichment_run.result.output:
-            response = enrichment_run.result.output.response
+            new_long_term_context = str(enrichment_run.result.output)
             usage = enrichment_run.result.usage()
-            ctx.print(stylize_faint(f"[Token Usage] {usage}"), plain=True)
-            if response:
-                conversation_context = response
-                # Re inject history summary
-                conversation_context["history_summary"] = history_summary
-                ctx.log_info("Context enriched based on history.")
-                ctx.log_info(
-                    f"Updated conversation context: {json.dumps(conversation_context)}"
-                )
+            ctx.print(
+                stylize_faint(f"💡 Context Enrichment Token: {usage}"), plain=True
+            )
+            ctx.print(plain=True)
+            ctx.log_info("Context enriched based on history.")
+            ctx.log_info(f"Updated long-term context:\n{new_long_term_context}")
+            return new_long_term_context
         else:
-            ctx.log_warning("Context enrichment returned no data")
+            ctx.log_warning("Context enrichment returned no data.")
     except Exception as e:
         ctx.log_warning(f"Error during context enrichment LLM call: {e}")
         traceback.print_exc()
-    return conversation_context
+    # Return the original context if enrichment fails
+    return previous_long_term_context
 def get_context_enrichment_threshold(
@@ -121,7 +87,6 @@ def get_context_enrichment_threshold(
         return get_int_attr(
             ctx,
             context_enrichment_threshold_attr,
-            # Use llm_config default if attribute is None
             llm_config.default_context_enrichment_threshold,
             auto_render=render_context_enrichment_threshold,
         )
@@ -136,7 +101,7 @@ def get_context_enrichment_threshold(
 def should_enrich_context(
     ctx: AnyContext,
     history_list: ListOfDict,
-    should_enrich_context_attr: BoolAttr | None,  # Allow None
+    should_enrich_context_attr: BoolAttr | None,
     render_enrich_context: bool,
     context_enrichment_threshold_attr: IntAttr | None,
     render_context_enrichment_threshold: bool,
@@ -165,16 +130,16 @@ def should_enrich_context(
 async def maybe_enrich_context(
     ctx: AnyContext,
     history_list: ListOfDict,
-    conversation_context: dict[str, Any],
+    long_term_context: str,
     should_enrich_context_attr: BoolAttr | None,
     render_enrich_context: bool,
     context_enrichment_threshold_attr: IntAttr | None,
     render_context_enrichment_threshold: bool,
-    model: str | Model | None,
-    model_settings: ModelSettings | None,
+    model: "str | Model | None",
+    model_settings: "ModelSettings | None",
     context_enrichment_prompt: str,
     rate_limitter: LLMRateLimiter | None = None,
-) -> dict[str, Any]:
+) -> str:
     """Enriches context based on history if enabled and threshold met."""
     shorten_history_list = replace_system_prompt_in_history_list(history_list)
     if should_enrich_context(
@@ -187,13 +152,11 @@ async def maybe_enrich_context(
     ):
         return await enrich_context(
             ctx=ctx,
-            config=EnrichmentConfig(
-                model=model,
-                settings=model_settings,
-                prompt=context_enrichment_prompt,
-            ),
-            conversation_context=conversation_context,
+            model=model,
+            settings=model_settings,
+            prompt=context_enrichment_prompt,
+            previous_long_term_context=long_term_context,
             history_list=shorten_history_list,
             rate_limitter=rate_limitter,
         )
-    return conversation_context
+    return long_term_context

zrb/task/llm/error.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import json
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Optional
 from pydantic import BaseModel
 if TYPE_CHECKING:
     from openai import APIError
-else:
-    APIError = Any
 # Define a structured error model for tool execution failures
@@ -17,7 +15,7 @@ class ToolExecutionError(BaseModel):
     details: Optional[str] = None
-def extract_api_error_details(error: APIError) -> str:
+def extract_api_error_details(error: "APIError") -> str:
     """Extract detailed error information from an APIError."""
     details = f"{error.message}"
     # Try to parse the error body as JSON

zrb/task/llm/history.py CHANGED Viewed

@@ -4,7 +4,7 @@ from collections.abc import Callable
 from copy import deepcopy
 from typing import Any, Optional
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from zrb.attr.type import StrAttr
 from zrb.context.any_context import AnyContext
@@ -17,8 +17,18 @@ from zrb.util.run import run_async
 # Define the new ConversationHistoryData model
 class ConversationHistoryData(BaseModel):
-    context: dict[str, Any] = {}
-    history: ListOfDict = []
+    long_term_context: str = Field(
+        default="",
+        description="A markdown-formatted string containing curated, long-term context.",
+    )
+    conversation_summary: str = Field(
+        default="",
+        description="A free-text summary of the conversation history.",
+    )
+    history: ListOfDict = Field(
+        default_factory=list,
+        description="The recent, un-summarized conversation history.",
+    )
     @classmethod
     async def read_from_sources(
@@ -69,19 +79,17 @@ class ConversationHistoryData(BaseModel):
         try:
             if isinstance(data, cls):
                 return data  # Already a valid instance
-            if isinstance(data, dict) and "history" in data:
-                # Standard format {'context': ..., 'history': ...}
-                # Ensure context exists, even if empty
-                data.setdefault("context", {})
+            if isinstance(data, dict):
+                # This handles both the new format and the old {'context': ..., 'history': ...}
                 return cls.model_validate(data)
             elif isinstance(data, list):
-                # Handle old format (just a list) - wrap it
+                # Handle very old format (just a list) - wrap it
                 ctx.log_warning(
-                    f"History from {source} contains old list format. "
-                    "Wrapping it into the new structure {'context': {}, 'history': [...]}. "
+                    f"History from {source} contains legacy list format. "
+                    "Wrapping it into the new structure. "
                     "Consider updating the source format."
                 )
-                return cls(history=data, context={})
+                return cls(history=data)
             else:
                 ctx.log_warning(
                     f"History data from {source} has unexpected format "

zrb/task/llm/history_summarization.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
-from typing import TYPE_CHECKING, Any
-from pydantic import BaseModel
+import traceback
+from typing import TYPE_CHECKING
 from zrb.attr.type import BoolAttr, IntAttr
 from zrb.context.any_context import AnyContext
@@ -19,9 +18,6 @@ from zrb.util.cli.style import stylize_faint
 if TYPE_CHECKING:
     from pydantic_ai.models import Model
     from pydantic_ai.settings import ModelSettings
-else:
-    Model = Any
-    ModelSettings = Any
 def get_history_summarization_threshold(
@@ -34,7 +30,6 @@ def get_history_summarization_threshold(
         return get_int_attr(
             ctx,
             history_summarization_threshold_attr,
-            # Use llm_config default if attribute is None
             llm_config.default_history_summarization_threshold,
             auto_render=render_history_summarization_threshold,
         )
@@ -49,9 +44,9 @@ def get_history_summarization_threshold(
 def should_summarize_history(
     ctx: AnyContext,
     history_list: ListOfDict,
-    should_summarize_history_attr: BoolAttr | None,  # Allow None
+    should_summarize_history_attr: BoolAttr | None,
     render_summarize_history: bool,
-    history_summarization_threshold_attr: IntAttr | None,  # Allow None
+    history_summarization_threshold_attr: IntAttr | None,
     render_history_summarization_threshold: bool,
 ) -> bool:
     """Determines if history summarization should occur based on length and config."""
@@ -68,91 +63,76 @@ def should_summarize_history(
     return get_bool_attr(
         ctx,
         should_summarize_history_attr,
-        # Use llm_config default if attribute is None
         llm_config.default_summarize_history,
         auto_render=render_summarize_history,
     )
-class SummarizationConfig(BaseModel):
-    model_config = {"arbitrary_types_allowed": True}
-    model: Model | str | None = None
-    settings: ModelSettings | None = None
-    prompt: str
-    retries: int = 3
 async def summarize_history(
     ctx: AnyContext,
-    config: SummarizationConfig,
-    conversation_context: dict[str, Any],
+    model: "Model | str | None",
+    settings: "ModelSettings | None",
+    prompt: str,
+    previous_summary: str,
     history_list: ListOfDict,
     rate_limitter: LLMRateLimiter | None = None,
-) -> dict[str, Any]:
-    """Runs an LLM call to summarize history and update the context."""
+    retries: int = 3,
+) -> str:
+    """Runs an LLM call to update the conversation summary."""
     from pydantic_ai import Agent
     ctx.log_info("Attempting to summarize conversation history...")
+    # Construct the user prompt for the summarization agent
+    user_prompt = json.dumps(
+        {"previous_summary": previous_summary, "recent_history": history_list}
+    )
     summarization_agent = Agent(
-        model=config.model,
-        system_prompt=config.prompt,
-        model_settings=config.settings,
-        retries=config.retries,
+        model=model,
+        system_prompt=prompt,
+        model_settings=settings,
+        retries=retries,
     )
-    # Prepare context and history for summarization prompt
     try:
-        context_json = json.dumps(conversation_context)
-        history_to_summarize_json = json.dumps(history_list)
-        summarization_user_prompt = "\n".join(
-            [
-                f"Current Context: {context_json}",
-                f"Conversation History to Summarize: {history_to_summarize_json}",
-            ]
-        )
-    except Exception as e:
-        ctx.log_warning(f"Error formatting context/history for summarization: {e}")
-        return conversation_context  # Return original context if formatting fails
-    try:
-        ctx.print(stylize_faint("[Summarization Triggered]"), plain=True)
+        ctx.print(stylize_faint("📝 Summarize"), plain=True)
         summary_run = await run_agent_iteration(
             ctx=ctx,
             agent=summarization_agent,
-            user_prompt=summarization_user_prompt,
-            history_list=[],  # Summarization agent doesn't need prior history
+            user_prompt=user_prompt,
+            history_list=[],
             rate_limitter=rate_limitter,
         )
         if summary_run and summary_run.result.output:
-            summary_text = str(summary_run.result.output)
+            new_summary = str(summary_run.result.output)
             usage = summary_run.result.usage()
-            ctx.print(stylize_faint(f"[Token Usage] {usage}"), plain=True)
-            # Update context with the new summary
-            conversation_context["history_summary"] = summary_text
-            ctx.log_info("History summarized and added/updated in context.")
-            ctx.log_info(f"Conversation summary: {summary_text}")
+            ctx.print(stylize_faint(f"📝 Summarization Token: {usage}"), plain=True)
+            ctx.print(plain=True)
+            ctx.log_info("History summarized and updated.")
+            ctx.log_info(f"New conversation summary:\n{new_summary}")
+            return new_summary
         else:
             ctx.log_warning("History summarization failed or returned no data.")
     except Exception as e:
         ctx.log_warning(f"Error during history summarization: {e}")
+        traceback.print_exc()
-    return conversation_context
+    # Return the original summary if summarization fails
+    return previous_summary
 async def maybe_summarize_history(
     ctx: AnyContext,
     history_list: ListOfDict,
-    conversation_context: dict[str, Any],
-    should_summarize_history_attr: BoolAttr | None,  # Allow None
+    conversation_summary: str,
+    should_summarize_history_attr: BoolAttr | None,
     render_summarize_history: bool,
-    history_summarization_threshold_attr: IntAttr | None,  # Allow None
+    history_summarization_threshold_attr: IntAttr | None,
     render_history_summarization_threshold: bool,
-    model: str | Model | None,
-    model_settings: ModelSettings | None,
+    model: "str | Model | None",
+    model_settings: "ModelSettings | None",
     summarization_prompt: str,
     rate_limitter: LLMRateLimiter | None = None,
-) -> tuple[ListOfDict, dict[str, Any]]:
+) -> tuple[ListOfDict, str]:
     """Summarizes history and updates context if enabled and threshold met."""
     shorten_history_list = replace_system_prompt_in_history_list(history_list)
     if should_summarize_history(
@@ -163,18 +143,15 @@ async def maybe_summarize_history(
         history_summarization_threshold_attr,
         render_history_summarization_threshold,
     ):
-        # Use summarize_history defined above
-        updated_context = await summarize_history(
+        new_summary = await summarize_history(
             ctx=ctx,
-            config=SummarizationConfig(
-                model=model,
-                settings=model_settings,
-                prompt=summarization_prompt,
-            ),
-            conversation_context=conversation_context,
-            history_list=shorten_history_list,  # Pass the full list for context
+            model=model,
+            settings=model_settings,
+            prompt=summarization_prompt,
+            previous_summary=conversation_summary,
+            history_list=shorten_history_list,
             rate_limitter=rate_limitter,
         )
-        # Truncate the history list after summarization
-        return [], updated_context
-    return history_list, conversation_context
+        # After summarization, the history is cleared and replaced by the new summary
+        return [], new_summary
+    return history_list, conversation_summary

zrb/task/llm/print_node.py CHANGED Viewed

@@ -19,11 +19,11 @@ async def print_node(print_func: Callable, agent_run: Any, node: Any):
     if Agent.is_user_prompt_node(node):
         # A user prompt node => The user has provided input
-        print_func(stylize_faint(f">> UserPromptNode: {node.user_prompt}"))
+        print_func(stylize_faint(f"    >> UserPromptNode: {node.user_prompt}"))
     elif Agent.is_model_request_node(node):
         # A model request node => We can stream tokens from the model's request
         print_func(
-            stylize_faint(">> ModelRequestNode: streaming partial request tokens")
+            stylize_faint("    >> ModelRequestNode: streaming partial request tokens")
         )
         async with node.stream(agent_run.ctx) as request_stream:
             is_streaming = False
@@ -33,7 +33,7 @@ async def print_node(print_func: Callable, agent_run: Any, node: Any):
                         print_func("")
                     print_func(
                         stylize_faint(
-                            f"[Request] Starting part {event.index}: {event.part!r}"
+                            f"    [Request] Starting part {event.index}: {event.part!r}"
                         ),
                     )
                     is_streaming = False
@@ -53,7 +53,7 @@ async def print_node(print_func: Callable, agent_run: Any, node: Any):
                     if is_streaming:
                         print_func("")
                     print_func(
-                        stylize_faint(f"[Result] tool_name={event.tool_name}"),
+                        stylize_faint(f"    [Result] tool_name={event.tool_name}"),
                     )
                     is_streaming = False
             if is_streaming:
@@ -61,7 +61,9 @@ async def print_node(print_func: Callable, agent_run: Any, node: Any):
     elif Agent.is_call_tools_node(node):
         # A handle-response node => The model returned some data, potentially calls a tool
         print_func(
-            stylize_faint(">> CallToolsNode: streaming partial response & tool usage")
+            stylize_faint(
+                "    >> CallToolsNode: streaming partial response & tool usage"
+            )
         )
         async with node.stream(agent_run.ctx) as handle_stream:
             async for event in handle_stream:
@@ -82,16 +84,16 @@ async def print_node(print_func: Callable, agent_run: Any, node: Any):
                         del event.part.args["_dummy"]
                     print_func(
                         stylize_faint(
-                            f"[Tools] The LLM calls tool={event.part.tool_name!r} with args={event.part.args} (tool_call_id={event.part.tool_call_id!r})"  # noqa
+                            f"    [Tools] The LLM calls tool={event.part.tool_name!r} with args={event.part.args} (tool_call_id={event.part.tool_call_id!r})"  # noqa
                         )
                     )
                 elif isinstance(event, FunctionToolResultEvent):
                     print_func(
                         stylize_faint(
-                            f"[Tools] Tool call {event.tool_call_id!r} returned => {event.result.content}"  # noqa
+                            f"    [Tools] Tool call {event.tool_call_id!r} returned => {event.result.content}"  # noqa
                         )
                     )
     elif Agent.is_end_node(node):
         # Once an End node is reached, the agent run is complete
-        print_func(stylize_faint("[End of Response]"))
+        print_func(stylize_faint("    [End of Response]"))
         # print_func(stylize_faint(f"{agent_run.result.data}"))

zrb 1.8.15__py3-none-any.whl → 1.9.0__py3-none-any.whl

zrb 1.8.15py3-none-any.whl → 1.9.0py3-none-any.whl