PyPI - vectara-agentic - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

vectara-agentic 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vectara-agentic might be problematic. Click here for more details.

Files changed (56) hide show

tests/__init__.py +7 -0
tests/conftest.py +316 -0
tests/endpoint.py +54 -17
tests/run_tests.py +112 -0
tests/test_agent.py +35 -33
tests/test_agent_fallback_memory.py +270 -0
tests/test_agent_memory_consistency.py +229 -0
tests/test_agent_type.py +86 -143
tests/test_api_endpoint.py +4 -0
tests/test_bedrock.py +50 -31
tests/test_fallback.py +4 -0
tests/test_gemini.py +27 -59
tests/test_groq.py +50 -31
tests/test_private_llm.py +11 -2
tests/test_return_direct.py +6 -2
tests/test_serialization.py +7 -6
tests/test_session_memory.py +252 -0
tests/test_streaming.py +109 -0
tests/test_together.py +62 -0
tests/test_tools.py +10 -82
tests/test_vectara_llms.py +4 -0
tests/test_vhc.py +67 -0
tests/test_workflow.py +13 -28
vectara_agentic/__init__.py +27 -4
vectara_agentic/_callback.py +65 -67
vectara_agentic/_observability.py +30 -30
vectara_agentic/_version.py +1 -1
vectara_agentic/agent.py +565 -859
vectara_agentic/agent_config.py +15 -14
vectara_agentic/agent_core/__init__.py +22 -0
vectara_agentic/agent_core/factory.py +383 -0
vectara_agentic/{_prompts.py → agent_core/prompts.py} +21 -46
vectara_agentic/agent_core/serialization.py +348 -0
vectara_agentic/agent_core/streaming.py +483 -0
vectara_agentic/agent_core/utils/__init__.py +29 -0
vectara_agentic/agent_core/utils/hallucination.py +157 -0
vectara_agentic/agent_core/utils/logging.py +52 -0
vectara_agentic/agent_core/utils/schemas.py +87 -0
vectara_agentic/agent_core/utils/tools.py +125 -0
vectara_agentic/agent_endpoint.py +4 -6
vectara_agentic/db_tools.py +37 -12
vectara_agentic/llm_utils.py +42 -43
vectara_agentic/sub_query_workflow.py +9 -14
vectara_agentic/tool_utils.py +138 -83
vectara_agentic/tools.py +36 -21
vectara_agentic/tools_catalog.py +16 -16
vectara_agentic/types.py +106 -8
{vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.1.dist-info}/METADATA +111 -31
vectara_agentic-0.4.1.dist-info/RECORD +53 -0
tests/test_agent_planning.py +0 -64
tests/test_hhem.py +0 -100
vectara_agentic/hhem.py +0 -82
vectara_agentic-0.3.3.dist-info/RECORD +0 -39
{vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.1.dist-info}/WHEEL +0 -0
{vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.1.dist-info}/licenses/LICENSE +0 -0
{vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.1.dist-info}/top_level.txt +0 -0

vectara_agentic/agent.py CHANGED Viewed

@@ -2,85 +2,70 @@
 This module contains the Agent class for handling different types of agents and their interactions.
 """
-from typing import List, Callable, Optional, Dict, Any, Union, Tuple
+import warnings
+warnings.simplefilter("ignore", DeprecationWarning)
+# pylint: disable=wrong-import-position
+from typing import List, Callable, Optional, Dict, Any, Tuple, TYPE_CHECKING
 import os
-import re
 from datetime import date
-import time
 import json
 import logging
 import asyncio
-import importlib
-from collections import Counter
-import inspect
-from inspect import Signature, Parameter, ismethod
-from pydantic import Field, create_model, ValidationError, BaseModel
-from pydantic_core import PydanticUndefined
-import cloudpickle as pickle
+from pydantic import ValidationError
+from pydantic_core import PydanticUndefined
 from dotenv import load_dotenv
-from llama_index.core.memory import ChatMemoryBuffer
-from llama_index.core.llms import ChatMessage, MessageRole
-from llama_index.core.tools import FunctionTool
-from llama_index.core.agent import (
-    ReActAgent,
-    StructuredPlannerAgent,
-    FunctionCallingAgent,
-)
-from llama_index.core.agent.react.formatter import ReActChatFormatter
-from llama_index.agent.llm_compiler import LLMCompilerAgentWorker
-from llama_index.agent.lats import LATSAgentWorker
-from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
-from llama_index.core.callbacks.base_handler import BaseCallbackHandler
-from llama_index.agent.openai import OpenAIAgent
-from llama_index.core.agent.runner.base import AgentRunner
-from llama_index.core.agent.types import BaseAgent
-from llama_index.core.workflow import Workflow, Context
+# Runtime imports for components used at module level
+from llama_index.core.llms import MessageRole, ChatMessage
+from llama_index.core.callbacks import CallbackManager
+from llama_index.core.memory import Memory
+# Heavy llama_index imports moved to TYPE_CHECKING for lazy loading
+if TYPE_CHECKING:
+    from llama_index.core.tools import FunctionTool
+    from llama_index.core.workflow import Workflow
+    from llama_index.core.agent.types import BaseAgent
+    from llama_index.core.callbacks.base_handler import BaseCallbackHandler
 from .types import (
     AgentType,
     AgentStatusType,
     LLMRole,
-    ToolType,
     ModelProvider,
     AgentResponse,
     AgentStreamingResponse,
     AgentConfigType,
 )
-from .llm_utils import get_llm, get_tokenizer_for_model
-from ._prompts import (
-    REACT_PROMPT_TEMPLATE,
-    GENERAL_PROMPT_TEMPLATE,
-    GENERAL_INSTRUCTIONS,
-    STRUCTURED_PLANNER_PLAN_REFINE_PROMPT,
-    STRUCTURED_PLANNER_INITIAL_PLAN_PROMPT,
-)
+from .llm_utils import get_llm
+from .agent_core.prompts import GENERAL_INSTRUCTIONS
 from ._callback import AgentCallbackHandler
-from ._observability import setup_observer, eval_fcs
-from .tools import VectaraToolFactory, VectaraTool, ToolsFactory
-from .tool_utils import _is_human_readable_output
+from ._observability import setup_observer
+from .tools import ToolsFactory
 from .tools_catalog import get_current_date
 from .agent_config import AgentConfig
-from .hhem import HHEM
-class IgnoreUnpickleableAttributeFilter(logging.Filter):
-    """
-    Filter to ignore log messages that contain certain strings
-    """
-    def filter(self, record):
-        msgs_to_ignore = [
-            "Removing unpickleable private attribute _chunking_tokenizer_fn",
-            "Removing unpickleable private attribute _split_fns",
-            "Removing unpickleable private attribute _sub_sentence_split_fns",
-        ]
-        return all(msg not in record.getMessage() for msg in msgs_to_ignore)
+# Import utilities from agent core modules
+from .agent_core.streaming import (
+    FunctionCallingStreamHandler,
+    execute_post_stream_processing,
+)
+from .agent_core.factory import create_agent_from_config, create_agent_from_corpus
+from .agent_core.serialization import (
+    serialize_agent_to_dict,
+    deserialize_agent_from_dict,
+)
+from .agent_core.utils import (
+    sanitize_tools_for_gemini,
+    setup_agent_logging,
+)
+from .agent_core.utils.tools import validate_tool_consistency
-logging.getLogger().addFilter(IgnoreUnpickleableAttributeFilter())
+setup_agent_logging()
 logger = logging.getLogger("opentelemetry.exporter.otlp.proto.http.trace_exporter")
 logger.setLevel(logging.CRITICAL)
@@ -88,113 +73,6 @@ logger.setLevel(logging.CRITICAL)
 load_dotenv(override=True)
-def _get_prompt(
-    prompt_template: str,
-    general_instructions: str,
-    topic: str,
-    custom_instructions: str,
-):
-    """
-    Generate a prompt by replacing placeholders with topic and date.
-    Args:
-        prompt_template (str): The template for the prompt.
-        general_instructions (str): General instructions to be included in the prompt.
-        topic (str): The topic to be included in the prompt.
-        custom_instructions(str): The custom instructions to be included in the prompt.
-    Returns:
-        str: The formatted prompt.
-    """
-    return (
-        prompt_template.replace("{chat_topic}", topic)
-        .replace("{today}", date.today().strftime("%A, %B %d, %Y"))
-        .replace("{custom_instructions}", custom_instructions)
-        .replace("{INSTRUCTIONS}", general_instructions)
-    )
-def _get_llm_compiler_prompt(
-    prompt: str, general_instructions: str, topic: str, custom_instructions: str
-) -> str:
-    """
-    Add custom instructions to the prompt.
-    Args:
-        prompt (str): The prompt to which custom instructions should be added.
-    Returns:
-        str: The prompt with custom instructions added.
-    """
-    prompt += "\nAdditional Instructions:\n"
-    prompt += f"You have experise in {topic}.\n"
-    prompt += general_instructions
-    prompt += custom_instructions
-    prompt += f"Today is {date.today().strftime('%A, %B %d, %Y')}"
-    return prompt
-def get_field_type(field_schema: dict) -> Any:
-    """
-    Convert a JSON schema field definition to a Python type.
-    Handles 'type' and 'anyOf' cases.
-    """
-    json_type_to_python = {
-        "string": str,
-        "integer": int,
-        "boolean": bool,
-        "array": list,
-        "object": dict,
-        "number": float,
-        "null": type(None),
-    }
-    if not field_schema:  # Handles empty schema {}
-        return Any
-    if "anyOf" in field_schema:
-        types = []
-        for option_schema in field_schema["anyOf"]:
-            types.append(get_field_type(option_schema))  # Recursive call
-        if not types:
-            return Any
-        return Union[tuple(types)]
-    if "type" in field_schema and isinstance(field_schema["type"], list):
-        types = []
-        for type_name in field_schema["type"]:
-            if type_name == "array":
-                item_schema = field_schema.get("items", {})
-                types.append(List[get_field_type(item_schema)])
-            elif type_name in json_type_to_python:
-                types.append(json_type_to_python[type_name])
-            else:
-                types.append(Any)  # Fallback for unknown types in the list
-        if not types:
-            return Any
-        return Union[tuple(types)]  # type: ignore
-    if "type" in field_schema:
-        schema_type_name = field_schema["type"]
-        if schema_type_name == "array":
-            item_schema = field_schema.get(
-                "items", {}
-            )  # Default to Any if "items" is missing
-            return List[get_field_type(item_schema)]
-        return json_type_to_python.get(schema_type_name, Any)
-    # If only "items" is present (implies array by some conventions, but less standard)
-    # Or if it's a schema with other keywords like 'properties' (implying object)
-    # For simplicity, if no "type" or "anyOf" at this point, default to Any or add more specific handling.
-    # If 'properties' in field_schema or 'additionalProperties' in field_schema, it's likely an object.
-    if "properties" in field_schema or "additionalProperties" in field_schema:
-        # This path might need to reconstruct a nested Pydantic model if you encounter such schemas.
-        # For now, treating as 'dict' or 'Any' might be a simpler placeholder.
-        return dict  # Or Any, or more sophisticated object reconstruction.
-    return Any
 class Agent:
     """
     Agent class for handling different types of agents and their interactions.
@@ -202,13 +80,11 @@ class Agent:
     def __init__(
         self,
-        tools: List[FunctionTool],
+        tools: List["FunctionTool"],
         topic: str = "general",
         custom_instructions: str = "",
         general_instructions: str = GENERAL_INSTRUCTIONS,
-        verbose: bool = True,
-        use_structured_planning: bool = False,
-        update_func: Optional[Callable[[AgentStatusType, dict, str], None]] = None,
+        verbose: bool = False,
         agent_progress_callback: Optional[
             Callable[[AgentStatusType, dict, str], None]
         ] = None,
@@ -217,9 +93,10 @@ class Agent:
         fallback_agent_config: Optional[AgentConfig] = None,
         chat_history: Optional[list[Tuple[str, str]]] = None,
         validate_tools: bool = False,
-        workflow_cls: Optional[Workflow] = None,
+        workflow_cls: Optional["Workflow"] = None,
         workflow_timeout: int = 120,
         vectara_api_key: Optional[str] = None,
+        session_id: Optional[str] = None,
     ) -> None:
         """
         Initialize the agent with the specified type, tools, topic, and system message.
@@ -232,11 +109,8 @@ class Agent:
             general_instructions (str, optional): General instructions for the agent.
                 The Agent has a default set of instructions that are crafted to help it operate effectively.
                 This allows you to customize the agent's behavior and personality, but use with caution.
-            verbose (bool, optional): Whether the agent should print its steps. Defaults to True.
-            use_structured_planning (bool, optional)
-                Whether or not we want to wrap the agent with LlamaIndex StructuredPlannerAgent.
+            verbose (bool, optional): Whether the agent should print its steps. Defaults to False.
             agent_progress_callback (Callable): A callback function the code calls on any agent updates.
-                update_func (Callable): old name for agent_progress_callback. Will be deprecated in future.
             query_logging_callback (Callable): A callback function the code calls upon completion of a query
             agent_config (AgentConfig, optional): The configuration of the agent.
                 Defaults to AgentConfig(), which reads from environment variables.
@@ -247,103 +121,61 @@ class Agent:
                 Defaults to False.
             workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
             workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
-            vectara_api_key (str, optional): The Vectara API key for FCS evaluation. Defaults to None.
+            vectara_api_key (str, optional): The Vectara API key for VHC computation. Defaults to None.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
         """
         self.agent_config = agent_config or AgentConfig()
         self.agent_config_type = AgentConfigType.DEFAULT
         self.tools = tools
         if not any(tool.metadata.name == "get_current_date" for tool in self.tools):
-            self.tools += [ToolsFactory().create_tool(get_current_date)]
+            self.tools += [
+                ToolsFactory().create_tool(get_current_date, vhc_eligible=False)
+            ]
         self.agent_type = self.agent_config.agent_type
-        self.use_structured_planning = use_structured_planning
         self._llm = None  # Lazy loading
         self._custom_instructions = custom_instructions
         self._general_instructions = general_instructions
         self._topic = topic
-        self.agent_progress_callback = (
-            agent_progress_callback if agent_progress_callback else update_func
-        )
-        self.query_logging_callback = query_logging_callback
+        self.agent_progress_callback = agent_progress_callback
+        self.query_logging_callback = query_logging_callback
         self.workflow_cls = workflow_cls
         self.workflow_timeout = workflow_timeout
         self.vectara_api_key = vectara_api_key or os.environ.get("VECTARA_API_KEY", "")
         # Sanitize tools for Gemini if needed
         if self.agent_config.main_llm_provider == ModelProvider.GEMINI:
-            self.tools = self._sanitize_tools_for_gemini(self.tools)
+            self.tools = sanitize_tools_for_gemini(self.tools)
         # Validate tools
-        # Check for:
-        # 1. multiple copies of the same tool
-        # 2. Instructions for using tools that do not exist
-        tool_names = [tool.metadata.name for tool in self.tools]
-        duplicates = [tool for tool, count in Counter(tool_names).items() if count > 1]
-        if duplicates:
-            raise ValueError(f"Duplicate tools detected: {', '.join(duplicates)}")
         if validate_tools:
-            prompt = f"""
-            You are provided these tools:
-            <tools>{','.join(tool_names)}</tools>
-            And these instructions:
-            <instructions>
-            {self._custom_instructions}
-            </instructions>
-            Your task is to identify invalid tools.
-            A tool is invalid if it is mentioned in the instructions but not in the tools list.
-            A tool's name must have at least two characters.
-            Your response should be a comma-separated list of the invalid tools.
-            If no invalid tools exist, respond with "<OKAY>" (and nothing else).
-            """
-            llm = get_llm(LLMRole.MAIN, config=self.agent_config)
-            bad_tools_str = llm.complete(prompt).text.strip("\n")
-            if bad_tools_str and bad_tools_str != "<OKAY>":
-                bad_tools = [tool.strip() for tool in bad_tools_str.split(",")]
-                numbered = ", ".join(
-                    f"({i}) {tool}" for i, tool in enumerate(bad_tools, 1)
-                )
-                raise ValueError(
-                    f"The Agent custom instructions mention these invalid tools: {numbered}"
-                )
-        # Create token counters for the main and tool LLMs
-        main_tok = get_tokenizer_for_model(role=LLMRole.MAIN)
-        self.main_token_counter = (
-            TokenCountingHandler(tokenizer=main_tok) if main_tok else None
-        )
-        tool_tok = get_tokenizer_for_model(role=LLMRole.TOOL)
-        self.tool_token_counter = (
-            TokenCountingHandler(tokenizer=tool_tok) if tool_tok else None
-        )
+            validate_tool_consistency(
+                self.tools, self._custom_instructions, self.agent_config
+            )
         # Setup callback manager
         callbacks: list[BaseCallbackHandler] = [
             AgentCallbackHandler(self.agent_progress_callback)
         ]
-        if self.main_token_counter:
-            callbacks.append(self.main_token_counter)
-        if self.tool_token_counter:
-            callbacks.append(self.tool_token_counter)
         self.callback_manager = CallbackManager(callbacks)  # type: ignore
         self.verbose = verbose
+        self.session_id = (
+            session_id
+            or getattr(self, "session_id", None)
+            or f"{topic}:{date.today().isoformat()}"
+        )
+        self.memory = Memory.from_defaults(
+            session_id=self.session_id, token_limit=65536
+        )
         if chat_history:
-            msg_history = []
-            for text_pairs in chat_history:
-                msg_history.append(
-                    ChatMessage.from_str(content=text_pairs[0], role=MessageRole.USER)
-                )
-                msg_history.append(
-                    ChatMessage.from_str(
-                        content=text_pairs[1], role=MessageRole.ASSISTANT
-                    )
-                )
-            self.memory = ChatMemoryBuffer.from_defaults(
-                token_limit=128000, chat_history=msg_history
-            )
-        else:
-            self.memory = ChatMemoryBuffer.from_defaults(token_limit=128000)
+            msgs = []
+            for u, a in chat_history:
+                msgs.append(ChatMessage.from_str(u, role=MessageRole.USER))
+                msgs.append(ChatMessage.from_str(a, role=MessageRole.ASSISTANT))
+            self.memory.put_messages(msgs)
         # Set up main agent and fallback agent
         self._agent = None  # Lazy loading
@@ -354,9 +186,15 @@ class Agent:
         try:
             self.observability_enabled = setup_observer(self.agent_config, self.verbose)
         except Exception as e:
-            print(f"Failed to set up observer ({e}), ignoring")
+            logger.warning(f"Failed to set up observer ({e}), ignoring")
             self.observability_enabled = False
+        # VHC state tracking
+        self._vhc_cache = {}  # Cache VHC results by query hash
+        self._last_query = None
+        self._last_response = None
+        self._current_tool_outputs = []  # Store tool outputs from current query for VHC
     @property
     def llm(self):
         """Lazy-loads the LLM."""
@@ -380,231 +218,56 @@ class Agent:
             )
         return self._fallback_agent
-    def _sanitize_tools_for_gemini(
-        self, tools: list[FunctionTool]
-    ) -> list[FunctionTool]:
-        """
-        Strip all default values from:
-        - tool.fn
-        - tool.async_fn
-        - tool.metadata.fn_schema
-        so Gemini sees *only* required parameters, no defaults.
-        """
-        for tool in tools:
-            # 1) strip defaults off the actual callables
-            for func in (tool.fn, tool.async_fn):
-                if not func:
-                    continue
-                orig_sig = inspect.signature(func)
-                new_params = [
-                    p.replace(default=Parameter.empty)
-                    for p in orig_sig.parameters.values()
-                ]
-                new_sig = Signature(
-                    new_params, return_annotation=orig_sig.return_annotation
-                )
-                if ismethod(func):
-                    func.__func__.__signature__ = new_sig
-                else:
-                    func.__signature__ = new_sig
-            # 2) rebuild the Pydantic schema so that *every* field is required
-            schema_cls = getattr(tool.metadata, "fn_schema", None)
-            if schema_cls and hasattr(schema_cls, "model_fields"):
-                # collect (name → (type, Field(...))) for all fields
-                new_fields: dict[str, tuple[type, Any]] = {}
-                for name, mf in schema_cls.model_fields.items():
-                    typ = mf.annotation
-                    desc = getattr(mf, "description", "")
-                    # force required (no default) with Field(...)
-                    new_fields[name] = (typ, Field(..., description=desc))
-                # make a brand-new schema class where every field is required
-                no_default_schema = create_model(
-                    f"{schema_cls.__name__}",  # new class name
-                    **new_fields,  # type: ignore
-                )
-                # give it a clean __signature__ so inspect.signature sees no defaults
-                params = [
-                    Parameter(n, Parameter.POSITIONAL_OR_KEYWORD, annotation=typ)
-                    for n, (typ, _) in new_fields.items()
-                ]
-                no_default_schema.__signature__ = Signature(params)
-                # swap it back onto the tool
-                tool.metadata.fn_schema = no_default_schema
-        return tools
     def _create_agent(
-        self, config: AgentConfig, llm_callback_manager: CallbackManager
-    ) -> Union[BaseAgent, AgentRunner]:
+        self, config: AgentConfig, llm_callback_manager: "CallbackManager"
+    ) -> "BaseAgent":
         """
         Creates the agent based on the configuration object.
         Args:
             config: The configuration of the agent.
             llm_callback_manager: The callback manager for the agent's llm.
         Returns:
-            Union[BaseAgent, AgentRunner]: The configured agent object.
+            BaseAgent: The configured agent object.
         """
-        agent_type = config.agent_type
         # Use the same LLM instance for consistency
-        llm = self.llm if config == self.agent_config else get_llm(LLMRole.MAIN, config=config)
+        llm = (
+            self.llm
+            if config == self.agent_config
+            else get_llm(LLMRole.MAIN, config=config)
+        )
         llm.callback_manager = llm_callback_manager
-        if agent_type == AgentType.FUNCTION_CALLING:
-            if config.tool_llm_provider == ModelProvider.OPENAI:
-                raise ValueError(
-                    "Vectara-agentic: Function calling agent type is not supported with the OpenAI LLM."
-                )
-            prompt = _get_prompt(
-                GENERAL_PROMPT_TEMPLATE,
-                self._general_instructions,
-                self._topic,
-                self._custom_instructions,
-            )
-            agent = FunctionCallingAgent.from_tools(
-                tools=self.tools,
-                llm=llm,
-                memory=self.memory,
-                verbose=self.verbose,
-                max_function_calls=config.max_reasoning_steps,
-                callback_manager=llm_callback_manager,
-                system_prompt=prompt,
-                allow_parallel_tool_calls=True,
-            )
-        elif agent_type == AgentType.REACT:
-            prompt = _get_prompt(
-                REACT_PROMPT_TEMPLATE,
-                self._general_instructions,
-                self._topic,
-                self._custom_instructions,
-            )
-            agent = ReActAgent.from_tools(
-                tools=self.tools,
-                llm=llm,
-                memory=self.memory,
-                verbose=self.verbose,
-                react_chat_formatter=ReActChatFormatter(system_header=prompt),
-                max_iterations=config.max_reasoning_steps,
-                callable_manager=llm_callback_manager,
-            )
-        elif agent_type == AgentType.OPENAI:
-            if config.tool_llm_provider != ModelProvider.OPENAI:
-                raise ValueError(
-                    "Vectara-agentic: OPENAI agent type requires the OpenAI LLM."
-                )
-            prompt = _get_prompt(
-                GENERAL_PROMPT_TEMPLATE,
-                self._general_instructions,
-                self._topic,
-                self._custom_instructions,
-            )
-            agent = OpenAIAgent.from_tools(
-                tools=self.tools,
-                llm=llm,
-                memory=self.memory,
-                verbose=self.verbose,
-                callback_manager=llm_callback_manager,
-                max_function_calls=config.max_reasoning_steps,
-                system_prompt=prompt,
-            )
-        elif agent_type == AgentType.LLMCOMPILER:
-            agent_worker = LLMCompilerAgentWorker.from_tools(
-                tools=self.tools,
-                llm=llm,
-                verbose=self.verbose,
-                callback_manager=llm_callback_manager,
-            )
-            agent_worker.system_prompt = _get_prompt(
-                prompt_template=_get_llm_compiler_prompt(
-                    prompt=agent_worker.system_prompt,
-                    general_instructions=self._general_instructions,
-                    topic=self._topic,
-                    custom_instructions=self._custom_instructions,
-                ),
-                general_instructions=self._general_instructions,
-                topic=self._topic,
-                custom_instructions=self._custom_instructions,
-            )
-            agent_worker.system_prompt_replan = _get_prompt(
-                prompt_template=_get_llm_compiler_prompt(
-                    prompt=agent_worker.system_prompt_replan,
-                    general_instructions=GENERAL_INSTRUCTIONS,
-                    topic=self._topic,
-                    custom_instructions=self._custom_instructions,
-                ),
-                general_instructions=GENERAL_INSTRUCTIONS,
-                topic=self._topic,
-                custom_instructions=self._custom_instructions,
-            )
-            agent = agent_worker.as_agent()
-        elif agent_type == AgentType.LATS:
-            agent_worker = LATSAgentWorker.from_tools(
-                tools=self.tools,
-                llm=llm,
-                num_expansions=3,
-                max_rollouts=-1,
-                verbose=self.verbose,
-                callback_manager=llm_callback_manager,
-            )
-            prompt = _get_prompt(
-                REACT_PROMPT_TEMPLATE,
-                self._general_instructions,
-                self._topic,
-                self._custom_instructions,
-            )
-            agent_worker.chat_formatter = ReActChatFormatter(system_header=prompt)
-            agent = agent_worker.as_agent()
-        else:
-            raise ValueError(f"Unknown agent type: {agent_type}")
-        # Set up structured planner if needed
-        if self.use_structured_planning or self.agent_type in [
-            AgentType.LLMCOMPILER,
-            AgentType.LATS,
-        ]:
-            planner_llm = get_llm(LLMRole.TOOL, config=config)
-            agent = StructuredPlannerAgent(
-                agent_worker=agent.agent_worker,
-                tools=self.tools,
-                llm=planner_llm,
-                memory=self.memory,
-                verbose=self.verbose,
-                initial_plan_prompt=STRUCTURED_PLANNER_INITIAL_PLAN_PROMPT,
-                plan_refine_prompt=STRUCTURED_PLANNER_PLAN_REFINE_PROMPT,
-            )
-        return agent
+        return create_agent_from_config(
+            tools=self.tools,
+            llm=llm,
+            memory=self.memory,
+            config=config,
+            callback_manager=llm_callback_manager,
+            general_instructions=self._general_instructions,
+            topic=self._topic,
+            custom_instructions=self._custom_instructions,
+            verbose=self.verbose,
+        )
     def clear_memory(self) -> None:
-        """
-        Clear the agent's memory.
-        """
-        if self.agent_config_type == AgentConfigType.DEFAULT:
-            self.agent.memory.reset()
-        elif (
-            self.agent_config_type == AgentConfigType.FALLBACK
-            and self.fallback_agent_config
-        ):
-            self.fallback_agent.memory.reset()
-        else:
-            raise ValueError(f"Invalid agent config type {self.agent_config_type}")
+        """Clear the agent's memory and reset agent instances to ensure consistency."""
+        self.memory.reset()
+        # Clear agent instances so they get recreated with the cleared memory
+        self._agent = None
+        self._fallback_agent = None
     def __eq__(self, other):
         if not isinstance(other, Agent):
-            print(
+            logger.debug(
                 f"Comparison failed: other is not an instance of Agent. (self: {type(self)}, other: {type(other)})"
             )
             return False
         # Compare agent_type
         if self.agent_config.agent_type != other.agent_config.agent_type:
-            print(
+            logger.debug(
                 f"Comparison failed: agent_type differs. (self.agent_config.agent_type: {self.agent_config.agent_type},"
                 f" other.agent_config.agent_type: {other.agent_config.agent_type})"
             )
@@ -612,7 +275,7 @@ class Agent:
         # Compare tools
         if self.tools != other.tools:
-            print(
+            logger.debug(
                 "Comparison failed: tools differ."
                 f"(self.tools: {[t.metadata.name for t in self.tools]}, "
                 f"other.tools: {[t.metadata.name for t in other.tools]})"
@@ -621,14 +284,14 @@ class Agent:
         # Compare topic
         if self._topic != other._topic:
-            print(
+            logger.debug(
                 f"Comparison failed: topic differs. (self.topic: {self._topic}, other.topic: {other._topic})"
             )
             return False
         # Compare custom_instructions
         if self._custom_instructions != other._custom_instructions:
-            print(
+            logger.debug(
                 "Comparison failed: custom_instructions differ. (self.custom_instructions: "
                 f"{self._custom_instructions}, other.custom_instructions: {other._custom_instructions})"
             )
@@ -636,31 +299,27 @@ class Agent:
         # Compare verbose
         if self.verbose != other.verbose:
-            print(
+            logger.debug(
                 f"Comparison failed: verbose differs. (self.verbose: {self.verbose}, other.verbose: {other.verbose})"
             )
             return False
         # Compare agent memory
-        if self.agent.memory.chat_store != other.agent.memory.chat_store:
-            print(
-                f"Comparison failed: agent memory differs. (self.agent: {repr(self.agent.memory.chat_store)}, "
-                f"other.agent: {repr(other.agent.memory.chat_store)})"
-            )
+        if self.memory.get() != other.memory.get():
+            logger.debug("Comparison failed: agent memory differs.")
             return False
         # If all comparisons pass
-        print("All comparisons passed. Objects are equal.")
+        logger.debug("All comparisons passed. Objects are equal.")
         return True
     @classmethod
     def from_tools(
         cls,
-        tools: List[FunctionTool],
+        tools: List["FunctionTool"],
         topic: str = "general",
         custom_instructions: str = "",
         verbose: bool = True,
-        update_func: Optional[Callable[[AgentStatusType, dict, str], None]] = None,
         agent_progress_callback: Optional[
             Callable[[AgentStatusType, dict, str], None]
         ] = None,
@@ -669,8 +328,9 @@ class Agent:
         validate_tools: bool = False,
         fallback_agent_config: Optional[AgentConfig] = None,
         chat_history: Optional[list[Tuple[str, str]]] = None,
-        workflow_cls: Optional[Workflow] = None,
+        workflow_cls: Optional["Workflow"] = None,
         workflow_timeout: int = 120,
+        session_id: Optional[str] = None,
     ) -> "Agent":
         """
         Create an agent from tools, agent type, and language model.
@@ -682,7 +342,6 @@ class Agent:
             custom_instructions (str, optional): custom instructions for the agent. Defaults to ''.
             verbose (bool, optional): Whether the agent should print its steps. Defaults to True.
             agent_progress_callback (Callable): A callback function the code calls on any agent updates.
-                update_func (Callable): old name for agent_progress_callback. Will be deprecated in future.
             query_logging_callback (Callable): A callback function the code calls upon completion of a query
             agent_config (AgentConfig, optional): The configuration of the agent.
             fallback_agent_config (AgentConfig, optional): The fallback configuration of the agent.
@@ -691,6 +350,8 @@ class Agent:
                 Defaults to False.
             workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
             workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
         Returns:
             Agent: An instance of the Agent class.
@@ -702,13 +363,13 @@ class Agent:
             verbose=verbose,
             agent_progress_callback=agent_progress_callback,
             query_logging_callback=query_logging_callback,
-            update_func=update_func,
             agent_config=agent_config,
             chat_history=chat_history,
             validate_tools=validate_tools,
             fallback_agent_config=fallback_agent_config,
             workflow_cls=workflow_cls,
             workflow_timeout=workflow_timeout,
+            session_id=session_id,
         )
     @classmethod
@@ -753,141 +414,78 @@ class Agent:
         vectara_presence_penalty: Optional[float] = None,
         vectara_save_history: bool = True,
         return_direct: bool = False,
+        session_id: Optional[str] = None,
     ) -> "Agent":
-        """
-        Create an agent from a single Vectara corpus
+        """Create an agent from a single Vectara corpus using the factory function.
         Args:
-            tool_name (str): The name of Vectara tool used by the agent
-            vectara_corpus_key (str): The Vectara corpus key (or comma separated list of keys).
-            vectara_api_key (str): The Vectara API key.
-            agent_progress_callback (Callable): A callback function the code calls on any agent updates.
-            query_logging_callback (Callable): A callback function the code calls upon completion of a query
-            agent_config (AgentConfig, optional): The configuration of the agent.
-            fallback_agent_config (AgentConfig, optional): The fallback configuration of the agent.
-            chat_history (Tuple[str, str], optional): A list of user/agent chat pairs to initialize the agent memory.
-            data_description (str): The description of the data.
-            assistant_specialty (str): The specialty of the assistant.
-            general_instructions (str, optional): General instructions for the agent.
-                The Agent has a default set of instructions that are crafted to help it operate effectively.
-                This allows you to customize the agent's behavior and personality, but use with caution.
-            verbose (bool, optional): Whether to print verbose output.
-            vectara_filter_fields (List[dict], optional): The filterable attributes
-                (each dict maps field name to Tuple[type, description]).
-            vectara_offset (int, optional): Number of results to skip.
-            vectara_lambda_val (float, optional): Lambda value for Vectara hybrid search.
-            vectara_semantics: (str, optional): Indicates whether the query is intended as a query or response.
-            vectara_custom_dimensions: (Dict, optional): Custom dimensions for the query.
-            vectara_reranker (str, optional): The Vectara reranker name (default "slingshot")
-            vectara_rerank_k (int, optional): The number of results to use with reranking.
-            vectara_rerank_limit: (int, optional): The maximum number of results to return after reranking.
-            vectara_rerank_cutoff: (float, optional): The minimum score threshold for results to include after
-                reranking.
-            vectara_diversity_bias (float, optional): The MMR diversity bias.
-            vectara_udf_expression (str, optional): The user defined expression for reranking results.
-            vectara_rerank_chain (List[Dict], optional): A list of Vectara rerankers to be applied sequentially.
-            vectara_n_sentences_before (int, optional): The number of sentences before the matching text
-            vectara_n_sentences_after (int, optional): The number of sentences after the matching text.
-            vectara_summary_num_results (int, optional): The number of results to use in summarization.
-            vectara_summarizer (str, optional): The Vectara summarizer name.
-            vectara_summary_response_language (str, optional): The response language for the Vectara summary.
-            vectara_summary_prompt_text (str, optional): The custom prompt, using appropriate prompt variables and
-                functions.
-            vectara_max_response_chars (int, optional): The desired maximum number of characters for the generated
-                summary.
-            vectara_max_tokens (int, optional): The maximum number of tokens to be returned by the LLM.
-            vectara_temperature (float, optional): The sampling temperature; higher values lead to more randomness.
-            vectara_frequency_penalty (float, optional): How much to penalize repeating tokens in the response,
-                higher values reducing likelihood of repeating the same line.
-            vectara_presence_penalty (float, optional): How much to penalize repeating tokens in the response,
-                higher values increasing the diversity of topics.
-            vectara_save_history (bool, optional): Whether to save the query in history.
-            return_direct (bool, optional): Whether the agent should return the tool's response directly.
-        Returns:
-            Agent: An instance of the Agent class.
+            tool_name (str): Name of the tool to be created.
+            data_description (str): Description of the data/corpus.
+            assistant_specialty (str): The specialty/topic of the assistant.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
+            ... (other parameters as documented in factory function)
         """
-        vec_factory = VectaraToolFactory(
-            vectara_api_key=vectara_api_key,
+        # Use the factory function to avoid code duplication
+        config = create_agent_from_corpus(
+            tool_name=tool_name,
+            data_description=data_description,
+            assistant_specialty=assistant_specialty,
+            general_instructions=general_instructions,
             vectara_corpus_key=vectara_corpus_key,
-        )
-        field_definitions = {}
-        field_definitions["query"] = (str, Field(description="The user query"))  # type: ignore
-        for field in vectara_filter_fields:
-            field_definitions[field["name"]] = (
-                eval(field["type"]),
-                Field(description=field["description"]),
-            )  # type: ignore
-        query_args = create_model("QueryArgs", **field_definitions)  # type: ignore
-        # tool name must be valid Python function name
-        if tool_name:
-            tool_name = re.sub(r"[^A-Za-z0-9_]", "_", tool_name)
-        vectara_tool = vec_factory.create_rag_tool(
-            tool_name=tool_name or f"vectara_{vectara_corpus_key}",
-            tool_description=f"""
-            Given a user query,
-            returns a response (str) to a user question about {data_description}.
-            """,
-            tool_args_schema=query_args,
-            reranker=vectara_reranker,
-            rerank_k=vectara_rerank_k,
-            rerank_limit=vectara_rerank_limit,
-            rerank_cutoff=vectara_rerank_cutoff,
-            mmr_diversity_bias=vectara_diversity_bias,
-            udf_expression=vectara_udf_expression,
-            rerank_chain=vectara_rerank_chain,
-            n_sentences_before=vectara_n_sentences_before,
-            n_sentences_after=vectara_n_sentences_after,
-            offset=vectara_offset,
-            lambda_val=vectara_lambda_val,
-            semantics=vectara_semantics,
-            custom_dimensions=vectara_custom_dimensions,
-            summary_num_results=vectara_summary_num_results,
-            vectara_summarizer=vectara_summarizer,
-            summary_response_lang=vectara_summary_response_language,
-            vectara_prompt_text=vectara_summary_prompt_text,
-            max_response_chars=vectara_max_response_chars,
-            max_tokens=vectara_max_tokens,
-            temperature=vectara_temperature,
-            frequency_penalty=vectara_frequency_penalty,
-            presence_penalty=vectara_presence_penalty,
-            save_history=vectara_save_history,
-            include_citations=True,
+            vectara_api_key=vectara_api_key,
+            agent_config=agent_config,
+            fallback_agent_config=fallback_agent_config,
             verbose=verbose,
+            vectara_filter_fields=vectara_filter_fields,
+            vectara_offset=vectara_offset,
+            vectara_lambda_val=vectara_lambda_val,
+            vectara_semantics=vectara_semantics,
+            vectara_custom_dimensions=vectara_custom_dimensions,
+            vectara_reranker=vectara_reranker,
+            vectara_rerank_k=vectara_rerank_k,
+            vectara_rerank_limit=vectara_rerank_limit,
+            vectara_rerank_cutoff=vectara_rerank_cutoff,
+            vectara_diversity_bias=vectara_diversity_bias,
+            vectara_udf_expression=vectara_udf_expression,
+            vectara_rerank_chain=vectara_rerank_chain,
+            vectara_n_sentences_before=vectara_n_sentences_before,
+            vectara_n_sentences_after=vectara_n_sentences_after,
+            vectara_summary_num_results=vectara_summary_num_results,
+            vectara_summarizer=vectara_summarizer,
+            vectara_summary_response_language=vectara_summary_response_language,
+            vectara_summary_prompt_text=vectara_summary_prompt_text,
+            vectara_max_response_chars=vectara_max_response_chars,
+            vectara_max_tokens=vectara_max_tokens,
+            vectara_temperature=vectara_temperature,
+            vectara_frequency_penalty=vectara_frequency_penalty,
+            vectara_presence_penalty=vectara_presence_penalty,
+            vectara_save_history=vectara_save_history,
             return_direct=return_direct,
         )
-        assistant_instructions = f"""
-        - You are a helpful {assistant_specialty} assistant.
-        - You can answer questions about {data_description}.
-        - Never discuss politics, and always respond politely.
-        """
         return cls(
-            tools=[vectara_tool],
-            topic=assistant_specialty,
-            custom_instructions=assistant_instructions,
-            general_instructions=general_instructions,
-            verbose=verbose,
+            chat_history=chat_history,
             agent_progress_callback=agent_progress_callback,
             query_logging_callback=query_logging_callback,
-            agent_config=agent_config,
-            fallback_agent_config=fallback_agent_config,
-            chat_history=chat_history,
-            vectara_api_key=vectara_api_key,
+            session_id=session_id,
+            **config,
         )
     def _switch_agent_config(self) -> None:
-        """ "
+        """
         Switch the configuration type of the agent.
         This function is called automatically to switch the agent configuration if the current configuration fails.
+        Ensures memory consistency by clearing agent instances so they are recreated with current memory.
         """
         if self.agent_config_type == AgentConfigType.DEFAULT:
             self.agent_config_type = AgentConfigType.FALLBACK
+            # Clear the fallback agent so it gets recreated with current memory
+            self._fallback_agent = None
         else:
             self.agent_config_type = AgentConfigType.DEFAULT
+            # Clear the main agent so it gets recreated with current memory
+            self._agent = None
     def report(self, detailed: bool = False) -> None:
         """
@@ -899,45 +497,25 @@ class Agent:
         Returns:
             str: The report from the agent.
         """
-        print("Vectara agentic Report:")
-        print(f"Agent Type = {self.agent_config.agent_type}")
-        print(f"Topic = {self._topic}")
-        print("Tools:")
+        logger.info("Vectara agentic Report:")
+        logger.info(f"Agent Type = {self.agent_config.agent_type}")
+        logger.info(f"Topic = {self._topic}")
+        logger.info("Tools:")
         for tool in self.tools:
             if hasattr(tool, "metadata"):
                 if detailed:
-                    print(f"- {tool.metadata.description}")
+                    logger.info(f"- {tool.metadata.description}")
                 else:
-                    print(f"- {tool.metadata.name}")
+                    logger.info(f"- {tool.metadata.name}")
             else:
-                print("- tool without metadata")
-        print(
+                logger.info("- tool without metadata")
+        logger.info(
             f"Agent LLM = {get_llm(LLMRole.MAIN, config=self.agent_config).metadata.model_name}"
         )
-        print(
+        logger.info(
             f"Tool LLM = {get_llm(LLMRole.TOOL, config=self.agent_config).metadata.model_name}"
         )
-    def token_counts(self) -> dict:
-        """
-        Get the token counts for the agent and tools.
-        Returns:
-            dict: The token counts for the agent and tools.
-        """
-        return {
-            "main token count": (
-                self.main_token_counter.total_llm_token_count
-                if self.main_token_counter
-                else -1
-            ),
-            "tool token count": (
-                self.tool_token_counter.total_llm_token_count
-                if self.tool_token_counter
-                else -1
-            ),
-        }
     def _get_current_agent(self):
         return (
             self.agent
@@ -949,23 +527,11 @@ class Agent:
         return (
             self.agent_config.agent_type
             if self.agent_config_type == AgentConfigType.DEFAULT
+            or not self.fallback_agent_config
             else self.fallback_agent_config.agent_type
         )
-    async def _aformat_for_lats(self, prompt, agent_response):
-        llm_prompt = f"""
-        Given the question '{prompt}', and agent response '{agent_response.response}',
-        Please provide a well formatted final response to the query.
-        final response:
-        """
-        agent_type = self._get_current_agent_type()
-        if agent_type != AgentType.LATS:
-            return
-        agent = self._get_current_agent()
-        agent_response.response = str(agent.llm.acomplete(llm_prompt))
-    def chat(self, prompt: str) -> AgentResponse:  # type: ignore
+    def chat(self, prompt: str) -> AgentResponse:
         """
         Interact with the agent using a chat prompt.
@@ -975,48 +541,15 @@ class Agent:
         Returns:
             AgentResponse: The response from the agent.
         """
-        return asyncio.run(self.achat(prompt))
-    def _calc_fcs(self, agent_response: AgentResponse) -> None:
-        """
-        Calculate the Factual consistency score for the agent response.
-        """
-        if not self.vectara_api_key:
-            logging.debug("FCS calculation skipped: 'vectara_api_key' is missing.")
-            return  # can't calculate FCS without Vectara API key
-        chat_history = self.memory.get()
-        context = []
-        for msg in chat_history:
-            if msg.role == MessageRole.TOOL:
-                content = msg.content
-                if _is_human_readable_output(content):
-                    try:
-                        content = content.to_human_readable()
-                    except Exception as e:
-                        logging.debug(
-                            f"Failed to get human-readable format for FCS calculation: {e}"
-                        )
-                        # Fall back to string representation of the object
-                        content = str(content)
-                context.append(content)
-            elif msg.role in [MessageRole.USER, MessageRole.ASSISTANT] and msg.content:
-                context.append(msg.content)
-        if not context:
-            return
-        context_str = "\n".join(context)
         try:
-            score = HHEM(self.vectara_api_key).compute(
-                context_str, agent_response.response
-            )
-            if agent_response.metadata is None:
-                agent_response.metadata = {}
-            agent_response.metadata["fcs"] = score
-        except Exception as e:
-            logging.error(f"Failed to calculate FCS: {e}")
+            _ = asyncio.get_running_loop()
+        except RuntimeError:
+            return asyncio.run(self.achat(prompt))
+        # We are inside a running loop (Jupyter, uvicorn, etc.)
+        raise RuntimeError(
+            "Use `await agent.achat(...)` inside an event loop (e.g. Jupyter)."
+        )
     async def achat(self, prompt: str) -> AgentResponse:  # type: ignore
         """
@@ -1028,6 +561,9 @@ class Agent:
         Returns:
             AgentResponse: The response from the agent.
         """
+        if not prompt:
+            return AgentResponse(response="")
         max_attempts = 4 if self.fallback_agent_config else 2
         attempt = 0
         orig_llm = self.llm.metadata.model_name
@@ -1035,36 +571,205 @@ class Agent:
         while attempt < max_attempts:
             try:
                 current_agent = self._get_current_agent()
-                agent_response = await current_agent.achat(prompt)
-                self._calc_fcs(agent_response)
-                await self._aformat_for_lats(prompt, agent_response)
-                if self.observability_enabled:
-                    eval_fcs()
-                if self.query_logging_callback:
-                    self.query_logging_callback(prompt, agent_response.response)
+                # Deal with workflow-based agent types (Function Calling and ReAct)
+                if self._get_current_agent_type() in [
+                    AgentType.FUNCTION_CALLING,
+                    AgentType.REACT,
+                ]:
+                    from llama_index.core.workflow import Context
+                    # Create context and pass memory to the workflow agent
+                    # According to LlamaIndex docs, we should let the workflow manage memory internally
+                    ctx = Context(current_agent)
+                    handler = current_agent.run(
+                        user_msg=prompt, memory=self.memory, ctx=ctx
+                    )
+                    # Listen to workflow events if progress callback is set
+                    if self.agent_progress_callback:
+                        # Create event tracker for consistent event ID generation
+                        from .agent_core.streaming import ToolEventTracker
+                        event_tracker = ToolEventTracker()
+                        async for event in handler.stream_events():
+                            # Use consistent event ID tracking to ensure tool calls and outputs are paired
+                            event_id = event_tracker.get_event_id(event)
+                            # Handle different types of workflow events using same logic as FunctionCallingStreamHandler
+                            from llama_index.core.agent.workflow import (
+                                ToolCall,
+                                ToolCallResult,
+                                AgentInput,
+                                AgentOutput,
+                            )
+                            if isinstance(event, ToolCall):
+                                self.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_CALL,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "arguments": json.dumps(event.tool_kwargs),
+                                    },
+                                    event_id=event_id,
+                                )
+                            elif isinstance(event, ToolCallResult):
+                                self.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_OUTPUT,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "content": str(event.tool_output),
+                                    },
+                                    event_id=event_id,
+                                )
+                            elif isinstance(event, AgentInput):
+                                self.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent input: {event.input}"},
+                                    event_id=event_id,
+                                )
+                            elif isinstance(event, AgentOutput):
+                                self.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent output: {event.response}"},
+                                    event_id=event_id,
+                                )
+                    result = await handler
+                    # Ensure we have an AgentResponse object with a string response
+                    if hasattr(result, "response"):
+                        response_text = result.response
+                    else:
+                        response_text = str(result)
+                    # Handle case where response is a ChatMessage object
+                    if hasattr(response_text, "content"):
+                        response_text = response_text.content
+                    elif not isinstance(response_text, str):
+                        response_text = str(response_text)
+                    if response_text is None or response_text == "None":
+                        # Try to find tool outputs in the result object
+                        response_text = None
+                        # Check various possible locations for tool outputs
+                        if hasattr(result, "tool_outputs") and result.tool_outputs:
+                            # Get the latest tool output
+                            latest_output = (
+                                result.tool_outputs[-1]
+                                if isinstance(result.tool_outputs, list)
+                                else result.tool_outputs
+                            )
+                            response_text = str(latest_output)
+                        # Check if there are tool_calls with results
+                        elif hasattr(result, "tool_calls") and result.tool_calls:
+                            # Tool calls might contain the outputs - let's try to extract them
+                            for tool_call in result.tool_calls:
+                                if (
+                                    hasattr(tool_call, "tool_output")
+                                    and tool_call.tool_output is not None
+                                ):
+                                    response_text = str(tool_call.tool_output)
+                                    break
+                        elif hasattr(result, "sources") or hasattr(
+                            result, "source_nodes"
+                        ):
+                            sources = getattr(
+                                result, "sources", getattr(result, "source_nodes", [])
+                            )
+                            if (
+                                sources
+                                and len(sources) > 0
+                                and hasattr(sources[0], "text")
+                            ):
+                                response_text = sources[0].text
+                        # Check for workflow context or chat history that might contain tool results
+                        elif hasattr(result, "chat_history"):
+                            # Look for the most recent assistant message that might contain tool results
+                            chat_history = result.chat_history
+                            if chat_history and len(chat_history) > 0:
+                                for msg in reversed(chat_history):
+                                    if (
+                                        msg.role == MessageRole.TOOL
+                                        and msg.content
+                                        and str(msg.content).strip()
+                                    ):
+                                        response_text = msg.content
+                                        break
+                                    if (
+                                        hasattr(msg, "content")
+                                        and msg.content
+                                        and str(msg.content).strip()
+                                    ):
+                                        response_text = msg.content
+                                        break
+                        # If we still don't have a response, provide a fallback
+                        if response_text is None or response_text == "None":
+                            response_text = "Response completed."
+                    agent_response = AgentResponse(
+                        response=response_text, metadata=getattr(result, "metadata", {})
+                    )
+                    # Retrieve updated memory from workflow context
+                    # According to LlamaIndex docs, workflow agents manage memory internally
+                    # and we can access it via ctx.store.get("memory")
+                    try:
+                        workflow_memory = await ctx.store.get("memory")
+                        if workflow_memory:
+                            # Update our external memory with the workflow's memory
+                            self.memory = workflow_memory
+                    except Exception as e:
+                        # If we can't retrieve workflow memory, fall back to manual management
+                        warning_msg = (
+                            f"Could not retrieve workflow memory, falling back to "
+                            f"manual management: {e}"
+                        )
+                        logger.warning(warning_msg)
+                        user_msg = ChatMessage.from_str(prompt, role=MessageRole.USER)
+                        assistant_msg = ChatMessage.from_str(
+                            response_text, role=MessageRole.ASSISTANT
+                        )
+                        self.memory.put_messages([user_msg, assistant_msg])
+                # Standard chat interaction for other agent types
+                else:
+                    agent_response = await current_agent.achat(prompt)
+                # Post processing after response is generated
+                agent_response.metadata = agent_response.metadata or {}
+                user_metadata = agent_response.metadata
+                agent_response = await execute_post_stream_processing(
+                    agent_response, prompt, self, user_metadata
+                )
                 return agent_response
             except Exception as e:
                 last_error = e
                 if self.verbose:
-                    print(f"LLM call failed on attempt {attempt}. " f"Error: {e}.")
-                if attempt >= 2:
-                    if self.verbose:
-                        print(
-                            f"LLM call failed on attempt {attempt}. Switching agent configuration."
-                        )
+                    logger.warning(
+                        f"LLM call failed on attempt {attempt}. " f"Error: {e}."
+                    )
+                if attempt >= 2 and self.fallback_agent_config:
                     self._switch_agent_config()
-                time.sleep(1)
+                await asyncio.sleep(1)
                 attempt += 1
         return AgentResponse(
             response=(
                 f"For {orig_llm} LLM - failure can't be resolved after "
-                f"{max_attempts} attempts ({last_error}."
+                f"{max_attempts} attempts ({last_error})."
             )
         )
-    def stream_chat(self, prompt: str) -> AgentStreamingResponse:  # type: ignore
+    def stream_chat(self, prompt: str) -> AgentStreamingResponse:
         """
         Interact with the agent using a chat prompt with streaming.
         Args:
@@ -1072,7 +777,13 @@ class Agent:
         Returns:
             AgentStreamingResponse: The streaming response from the agent.
         """
-        return asyncio.run(self.astream_chat(prompt))
+        try:
+            _ = asyncio.get_running_loop()
+        except RuntimeError:
+            return asyncio.run(self.astream_chat(prompt))
+        raise RuntimeError(
+            "Use `await agent.astream_chat(...)` inside an event loop (e.g. Jupyter)."
+        )
     async def astream_chat(self, prompt: str) -> AgentStreamingResponse:  # type: ignore
         """
@@ -1082,50 +793,199 @@ class Agent:
         Returns:
             AgentStreamingResponse: The streaming response from the agent.
         """
+        # Store query for VHC processing and clear previous tool outputs
+        self._last_query = prompt
+        self._clear_tool_outputs()
         max_attempts = 4 if self.fallback_agent_config else 2
         attempt = 0
         orig_llm = self.llm.metadata.model_name
+        last_error = None
         while attempt < max_attempts:
             try:
                 current_agent = self._get_current_agent()
-                agent_response = await current_agent.astream_chat(prompt)
-                original_async_response_gen = agent_response.async_response_gen
+                user_meta: Dict[str, Any] = {}
+                # Deal with Function Calling agent type
+                if self._get_current_agent_type() == AgentType.FUNCTION_CALLING:
+                    from llama_index.core.workflow import Context
+                    # Create context and pass memory to the workflow agent
+                    # According to LlamaIndex docs, we should let the workflow manage memory internally
+                    ctx = Context(current_agent)
+                    handler = current_agent.run(
+                        user_msg=prompt, memory=self.memory, ctx=ctx
+                    )
+                    # Use the dedicated FunctionCallingStreamHandler
+                    stream_handler = FunctionCallingStreamHandler(self, handler, prompt)
+                    streaming_adapter = stream_handler.create_streaming_response(
+                        user_meta
+                    )
+                    return AgentStreamingResponse(
+                        base=streaming_adapter, metadata=user_meta
+                    )
+                #
+                # For other agent types, use the standard async chat method
+                #
+                li_stream = await current_agent.astream_chat(prompt)
+                orig_async = li_stream.async_response_gen
                 # Define a wrapper to preserve streaming behavior while executing post-stream logic.
                 async def _stream_response_wrapper():
-                    async for token in original_async_response_gen():
-                        yield token  # Yield tokens as they are generated
-                    # Post-streaming additional logic:
-                    await self._aformat_for_lats(prompt, agent_response)
-                    if self.query_logging_callback:
-                        self.query_logging_callback(prompt, agent_response.response)
-                    if self.observability_enabled:
-                        eval_fcs()
-                    self._calc_fcs(agent_response)
-                agent_response.async_response_gen = (
-                    _stream_response_wrapper  # Override the generator
-                )
-                return agent_response
+                    async for tok in orig_async():
+                        yield tok
+                    # Use shared post-processing function
+                    await execute_post_stream_processing(
+                        li_stream, prompt, self, user_meta
+                    )
+                li_stream.async_response_gen = _stream_response_wrapper
+                return AgentStreamingResponse(base=li_stream, metadata=user_meta)
             except Exception as e:
                 last_error = e
-                if attempt >= 2:
-                    if self.verbose:
-                        print(
-                            f"LLM call failed on attempt {attempt}. Switching agent configuration."
-                        )
+                if attempt >= 2 and self.fallback_agent_config:
                     self._switch_agent_config()
-                time.sleep(1)
+                await asyncio.sleep(1)
                 attempt += 1
-        return AgentStreamingResponse(
-            response=(
-                f"For {orig_llm} LLM - failure can't be resolved after "
-                f"{max_attempts} attempts ({last_error})."
-            )
+        return AgentStreamingResponse.from_error(
+            f"For {orig_llm} LLM - failure can't be resolved after "
+            f"{max_attempts} attempts ({last_error})."
         )
+    def _clear_tool_outputs(self):
+        """Clear stored tool outputs at the start of a new query."""
+        self._current_tool_outputs.clear()
+        logging.info("🔧 [TOOL_STORAGE] Cleared stored tool outputs for new query")
+    def _add_tool_output(self, tool_name: str, content: str):
+        """Add a tool output to the current collection for VHC."""
+        tool_output = {
+            'status_type': 'TOOL_OUTPUT',
+            'content': content,
+            'tool_name': tool_name
+        }
+        self._current_tool_outputs.append(tool_output)
+        logging.info(f"🔧 [TOOL_STORAGE] Added tool output from '{tool_name}': {len(content)} chars")
+    def _get_stored_tool_outputs(self) -> List[dict]:
+        """Get the stored tool outputs from the current query."""
+        logging.info(f"🔧 [TOOL_STORAGE] Retrieved {len(self._current_tool_outputs)} stored tool outputs")
+        return self._current_tool_outputs.copy()
+    async def acompute_vhc(self) -> Dict[str, Any]:
+        """
+        Compute VHC for the last query/response pair (async version).
+        Results are cached for subsequent calls. Tool outputs are automatically
+        collected during streaming and used internally.
+        Returns:
+            Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
+        """
+        logging.info(
+            f"🔍🔍🔍 [VHC_AGENT_ENTRY] UNIQUE_DEBUG_MESSAGE acompute_vhc method called - "
+            f"stored_tool_outputs_count={len(self._current_tool_outputs)}"
+        )
+        logging.info(
+            f"🔍🔍🔍 [VHC_AGENT_ENTRY] _last_query: {'set' if self._last_query else 'None'}"
+        )
+        if not self._last_query:
+            logging.info("🔍 [VHC_AGENT] Returning early - no _last_query")
+            return {"corrected_text": None, "corrections": []}
+        # For VHC to work, we need the response text from memory
+        # Get the latest assistant response from memory
+        messages = self.memory.get()
+        logging.info(
+            f"🔍 [VHC_AGENT] memory.get() returned {len(messages) if messages else 0} messages"
+        )
+        if not messages:
+            logging.info("🔍 [VHC_AGENT] Returning early - no messages in memory")
+            return {"corrected_text": None, "corrections": []}
+        # Find the last assistant message
+        last_response = None
+        for msg in reversed(messages):
+            if msg.role == MessageRole.ASSISTANT:
+                last_response = msg.content
+                break
+        logging.info(
+            f"🔍 [VHC_AGENT] Found last_response: {'set' if last_response else 'None'}"
+        )
+        if not last_response:
+            logging.info("🔍 [VHC_AGENT] Returning early - no last assistant response found")
+            return {"corrected_text": None, "corrections": []}
+        # Update stored response for caching
+        self._last_response = last_response
+        # Create cache key from query + response
+        cache_key = hash(f"{self._last_query}:{self._last_response}")
+        # Return cached results if available
+        if cache_key in self._vhc_cache:
+            return self._vhc_cache[cache_key]
+        # Check if we have VHC API key
+        logging.info(
+            f"🔍 [VHC_AGENT] acompute_vhc called with vectara_api_key={'set' if self.vectara_api_key else 'None'}"
+        )
+        if not self.vectara_api_key:
+            logging.info("🔍 [VHC_AGENT] No vectara_api_key - returning early with None")
+            return {"corrected_text": None, "corrections": []}
+        # Compute VHC using existing library function
+        from .agent_core.utils.hallucination import analyze_hallucinations
+        try:
+            # Use stored tool outputs from current query
+            stored_tool_outputs = self._get_stored_tool_outputs()
+            logging.info(f"🔧 [VHC_AGENT] Using {len(stored_tool_outputs)} stored tool outputs for VHC")
+            corrected_text, corrections = analyze_hallucinations(
+                query=self._last_query,
+                chat_history=self.memory.get(),
+                agent_response=self._last_response,
+                tools=self.tools,
+                vectara_api_key=self.vectara_api_key,
+                tool_outputs=stored_tool_outputs,
+            )
+            # Cache results
+            results = {"corrected_text": corrected_text, "corrections": corrections}
+            self._vhc_cache[cache_key] = results
+            return results
+        except Exception as e:
+            logger.error(f"VHC computation failed: {e}")
+            return {"corrected_text": None, "corrections": []}
+    def compute_vhc(self) -> Dict[str, Any]:
+        """
+        Compute VHC for the last query/response pair (sync version).
+        Results are cached for subsequent calls. Tool outputs are automatically
+        collected during streaming and used internally.
+        Returns:
+            Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
+        """
+        try:
+            loop = asyncio.get_event_loop()
+            return loop.run_until_complete(self.acompute_vhc())
+        except RuntimeError:
+            # No event loop running, create a new one
+            return asyncio.run(self.acompute_vhc())
     #
     # run() method for running a workflow
     # workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
@@ -1168,6 +1028,8 @@ class Agent:
                     f"Fields without default values: {fields_without_default}"
                 )
+        from llama_index.core.workflow import Context
         workflow_context = Context(workflow=workflow)
         try:
             # run workflow
@@ -1197,7 +1059,9 @@ class Agent:
                         input_dict[key] = value
                 output = outputs_model_on_fail_cls.model_validate(input_dict)
             else:
-                print(f"Vectara Agentic: Workflow failed with unexpected error: {e}")
+                logger.warning(
+                    f"Vectara Agentic: Workflow failed with unexpected error: {e}"
+                )
                 raise type(e)(str(e)).with_traceback(e.__traceback__)
         return output
@@ -1225,57 +1089,7 @@ class Agent:
     def to_dict(self) -> Dict[str, Any]:
         """Serialize the Agent instance to a dictionary."""
-        tool_info = []
-        for tool in self.tools:
-            if hasattr(tool.metadata, "fn_schema"):
-                fn_schema_cls = tool.metadata.fn_schema
-                fn_schema_serialized = {
-                    "schema": (
-                        fn_schema_cls.model_json_schema()
-                        if fn_schema_cls and hasattr(fn_schema_cls, "model_json_schema")
-                        else None
-                    ),
-                    "metadata": {
-                        "module": fn_schema_cls.__module__ if fn_schema_cls else None,
-                        "class": fn_schema_cls.__name__ if fn_schema_cls else None,
-                    },
-                }
-            else:
-                fn_schema_serialized = None
-            tool_dict = {
-                "tool_type": tool.metadata.tool_type.value,
-                "name": tool.metadata.name,
-                "description": tool.metadata.description,
-                "fn": (
-                    pickle.dumps(getattr(tool, "fn", None)).decode("latin-1")
-                    if getattr(tool, "fn", None)
-                    else None
-                ),
-                "async_fn": (
-                    pickle.dumps(getattr(tool, "async_fn", None)).decode("latin-1")
-                    if getattr(tool, "async_fn", None)
-                    else None
-                ),
-                "fn_schema": fn_schema_serialized,
-            }
-            tool_info.append(tool_dict)
-        return {
-            "agent_type": self.agent_config.agent_type.value,
-            "memory": pickle.dumps(self.agent.memory).decode("latin-1"),
-            "tools": tool_info,
-            "topic": self._topic,
-            "custom_instructions": self._custom_instructions,
-            "verbose": self.verbose,
-            "agent_config": self.agent_config.to_dict(),
-            "fallback_agent": (
-                self.fallback_agent_config.to_dict()
-                if self.fallback_agent_config
-                else None
-            ),
-            "workflow_cls": self.workflow_cls if self.workflow_cls else None,
-        }
+        return serialize_agent_to_dict(self)
     @classmethod
     def from_dict(
@@ -1285,114 +1099,6 @@ class Agent:
         query_logging_callback: Optional[Callable] = None,
     ) -> "Agent":
         """Create an Agent instance from a dictionary."""
-        agent_config = AgentConfig.from_dict(data["agent_config"])
-        fallback_agent_config = (
-            AgentConfig.from_dict(data["fallback_agent_config"])
-            if data.get("fallback_agent_config")
-            else None
-        )
-        tools: list[FunctionTool] = []
-        for tool_data in data["tools"]:
-            query_args_model = None
-            if tool_data.get("fn_schema"):
-                schema_info = tool_data["fn_schema"]
-                try:
-                    module_name = schema_info["metadata"]["module"]
-                    class_name = schema_info["metadata"]["class"]
-                    mod = importlib.import_module(module_name)
-                    candidate_cls = getattr(mod, class_name)
-                    if inspect.isclass(candidate_cls) and issubclass(
-                        candidate_cls, BaseModel
-                    ):
-                        query_args_model = candidate_cls
-                    else:
-                        # It's not the Pydantic model class we expected (e.g., it's the function itself)
-                        # Force fallback to JSON schema reconstruction by raising an error.
-                        raise ImportError(
-                            f"Retrieved '{class_name}' from '{module_name}' is not a Pydantic BaseModel class. "
-                            "Falling back to JSON schema reconstruction."
-                        )
-                except Exception:
-                    # Fallback: rebuild using the JSON schema
-                    field_definitions = {}
-                    json_schema_to_rebuild = schema_info.get("schema")
-                    if json_schema_to_rebuild and isinstance(
-                        json_schema_to_rebuild, dict
-                    ):
-                        for field, values in json_schema_to_rebuild.get(
-                            "properties", {}
-                        ).items():
-                            field_type = get_field_type(values)
-                            field_description = values.get(
-                                "description"
-                            )  # Defaults to None
-                            if "default" in values:
-                                field_definitions[field] = (
-                                    field_type,
-                                    Field(
-                                        description=field_description,
-                                        default=values["default"],
-                                    ),
-                                )
-                            else:
-                                field_definitions[field] = (
-                                    field_type,
-                                    Field(description=field_description),
-                                )
-                        query_args_model = create_model(
-                            json_schema_to_rebuild.get(
-                                "title", f"{tool_data['name']}_QueryArgs"
-                            ),
-                            **field_definitions,
-                        )
-                    else:  # If schema part is missing or not a dict, create a default empty model
-                        query_args_model = create_model(
-                            f"{tool_data['name']}_QueryArgs"
-                        )
-            # If fn_schema was not in tool_data or reconstruction failed badly, default to empty pydantic model
-            if query_args_model is None:
-                query_args_model = create_model(f"{tool_data['name']}_QueryArgs")
-            fn = (
-                pickle.loads(tool_data["fn"].encode("latin-1"))
-                if tool_data["fn"]
-                else None
-            )
-            async_fn = (
-                pickle.loads(tool_data["async_fn"].encode("latin-1"))
-                if tool_data["async_fn"]
-                else None
-            )
-            tool = VectaraTool.from_defaults(
-                name=tool_data["name"],
-                description=tool_data["description"],
-                fn=fn,
-                async_fn=async_fn,
-                fn_schema=query_args_model,  # Re-assign the recreated dynamic model
-                tool_type=ToolType(tool_data["tool_type"]),
-            )
-            tools.append(tool)
-        agent = cls(
-            tools=tools,
-            agent_config=agent_config,
-            topic=data["topic"],
-            custom_instructions=data["custom_instructions"],
-            verbose=data["verbose"],
-            fallback_agent_config=fallback_agent_config,
-            workflow_cls=data["workflow_cls"],
-            agent_progress_callback=agent_progress_callback,
-            query_logging_callback=query_logging_callback,
-        )
-        memory = (
-            pickle.loads(data["memory"].encode("latin-1"))
-            if data.get("memory")
-            else None
+        return deserialize_agent_from_dict(
+            cls, data, agent_progress_callback, query_logging_callback
         )
-        if memory:
-            agent.agent.memory = memory
-            agent.memory = memory
-        return agent

vectara-agentic 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

vectara-agentic 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl