PyPI - vectara-agentic - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

vectara-agentic 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vectara-agentic might be problematic. Click here for more details.

Files changed (38) hide show

tests/benchmark_models.py +945 -0
tests/conftest.py +9 -5
tests/run_tests.py +3 -0
tests/test_agent.py +57 -29
tests/test_agent_fallback_memory.py +270 -0
tests/test_agent_memory_consistency.py +229 -0
tests/test_agent_type.py +4 -0
tests/test_bedrock.py +46 -31
tests/test_fallback.py +1 -1
tests/test_gemini.py +7 -22
tests/test_groq.py +46 -31
tests/test_private_llm.py +1 -1
tests/test_serialization.py +3 -6
tests/test_session_memory.py +252 -0
tests/test_streaming.py +58 -37
tests/test_together.py +62 -0
tests/test_vhc.py +3 -2
tests/test_workflow.py +9 -28
vectara_agentic/_observability.py +19 -0
vectara_agentic/_version.py +1 -1
vectara_agentic/agent.py +246 -37
vectara_agentic/agent_core/factory.py +34 -153
vectara_agentic/agent_core/prompts.py +19 -13
vectara_agentic/agent_core/serialization.py +17 -8
vectara_agentic/agent_core/streaming.py +27 -43
vectara_agentic/agent_core/utils/__init__.py +0 -5
vectara_agentic/agent_core/utils/hallucination.py +54 -99
vectara_agentic/llm_utils.py +4 -2
vectara_agentic/sub_query_workflow.py +3 -2
vectara_agentic/tools.py +0 -19
vectara_agentic/types.py +9 -3
{vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/METADATA +79 -39
vectara_agentic-0.4.2.dist-info/RECORD +54 -0
vectara_agentic/agent_core/utils/prompt_formatting.py +0 -56
vectara_agentic-0.4.0.dist-info/RECORD +0 -50
{vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/WHEEL +0 -0
{vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/licenses/LICENSE +0 -0
{vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/top_level.txt +0 -0

vectara_agentic/agent.py CHANGED Viewed

@@ -3,10 +3,11 @@ This module contains the Agent class for handling different types of agents and
 """
 import warnings
 warnings.simplefilter("ignore", DeprecationWarning)
 # pylint: disable=wrong-import-position
-from typing import List, Callable, Optional, Dict, Any, Union, Tuple, TYPE_CHECKING
+from typing import List, Callable, Optional, Dict, Any, Tuple, TYPE_CHECKING
 import os
 from datetime import date
 import json
@@ -19,16 +20,16 @@ from pydantic_core import PydanticUndefined
 from dotenv import load_dotenv
 # Runtime imports for components used at module level
-from llama_index.core.llms import MessageRole
+from llama_index.core.llms import MessageRole, ChatMessage
 from llama_index.core.callbacks import CallbackManager
-from llama_index.core.memory import Memory
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.core.storage.chat_store import SimpleChatStore
 # Heavy llama_index imports moved to TYPE_CHECKING for lazy loading
 if TYPE_CHECKING:
     from llama_index.core.tools import FunctionTool
     from llama_index.core.workflow import Workflow
-    from llama_index.core.agent.runner.base import AgentRunner
-    from llama_index.core.agent.types import BaseAgent
+    from llama_index.core.agent import BaseWorkflowAgent
     from llama_index.core.callbacks.base_handler import BaseCallbackHandler
@@ -96,6 +97,7 @@ class Agent:
         workflow_cls: Optional["Workflow"] = None,
         workflow_timeout: int = 120,
         vectara_api_key: Optional[str] = None,
+        session_id: Optional[str] = None,
     ) -> None:
         """
         Initialize the agent with the specified type, tools, topic, and system message.
@@ -120,7 +122,9 @@ class Agent:
                 Defaults to False.
             workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
             workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
-            vectara_api_key (str, optional): The Vectara API key for FCS evaluation. Defaults to None.
+            vectara_api_key (str, optional): The Vectara API key for VHC computation. Defaults to None.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
         """
         self.agent_config = agent_config or AgentConfig()
         self.agent_config_type = AgentConfigType.DEFAULT
@@ -147,7 +151,9 @@ class Agent:
         # Validate tools
         if validate_tools:
-            validate_tool_consistency(self.tools, self._custom_instructions, self.agent_config)
+            validate_tool_consistency(
+                self.tools, self._custom_instructions, self.agent_config
+            )
         # Setup callback manager
         callbacks: list[BaseCallbackHandler] = [
@@ -157,15 +163,18 @@ class Agent:
         self.verbose = verbose
         self.session_id = (
-            getattr(self, "session_id", None) or f"{topic}:{date.today().isoformat()}"
+            session_id
+            or getattr(self, "session_id", None)
+            or f"{topic}:{date.today().isoformat()}"
         )
-        self.memory = Memory.from_defaults(
-            session_id=self.session_id, token_limit=65536
+        chat_store = SimpleChatStore()
+        self.memory = ChatMemoryBuffer.from_defaults(
+            chat_store=chat_store,
+            chat_store_key=self.session_id,
+            token_limit=65536
         )
         if chat_history:
-            from llama_index.core.llms import ChatMessage
             msgs = []
             for u, a in chat_history:
                 msgs.append(ChatMessage.from_str(u, role=MessageRole.USER))
@@ -184,6 +193,12 @@ class Agent:
             logger.warning(f"Failed to set up observer ({e}), ignoring")
             self.observability_enabled = False
+        # VHC state tracking
+        self._vhc_cache = {}  # Cache VHC results by query hash
+        self._last_query = None
+        self._last_response = None
+        self._current_tool_outputs = []  # Store tool outputs from current query for VHC
     @property
     def llm(self):
         """Lazy-loads the LLM."""
@@ -209,7 +224,7 @@ class Agent:
     def _create_agent(
         self, config: AgentConfig, llm_callback_manager: "CallbackManager"
-    ) -> Union["BaseAgent", "AgentRunner"]:
+    ) -> "BaseWorkflowAgent":
         """
         Creates the agent based on the configuration object.
@@ -218,7 +233,7 @@ class Agent:
             llm_callback_manager: The callback manager for the agent's llm.
         Returns:
-            Union[BaseAgent, AgentRunner]: The configured agent object.
+            BaseWorkflowAgent: The configured agent object.
         """
         # Use the same LLM instance for consistency
         llm = (
@@ -241,12 +256,11 @@ class Agent:
         )
     def clear_memory(self) -> None:
-        """Clear the agent's memory."""
+        """Clear the agent's memory and reset agent instances to ensure consistency."""
         self.memory.reset()
-        if getattr(self, "_agent", None):
-            self._agent.memory = self.memory
-        if getattr(self, "_fallback_agent", None):
-            self._fallback_agent.memory = self.memory
+        # Clear agent instances so they get recreated with the cleared memory
+        self._agent = None
+        self._fallback_agent = None
     def __eq__(self, other):
         if not isinstance(other, Agent):
@@ -320,6 +334,7 @@ class Agent:
         chat_history: Optional[list[Tuple[str, str]]] = None,
         workflow_cls: Optional["Workflow"] = None,
         workflow_timeout: int = 120,
+        session_id: Optional[str] = None,
     ) -> "Agent":
         """
         Create an agent from tools, agent type, and language model.
@@ -339,6 +354,8 @@ class Agent:
                 Defaults to False.
             workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
             workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
         Returns:
             Agent: An instance of the Agent class.
@@ -356,6 +373,7 @@ class Agent:
             fallback_agent_config=fallback_agent_config,
             workflow_cls=workflow_cls,
             workflow_timeout=workflow_timeout,
+            session_id=session_id,
         )
     @classmethod
@@ -400,8 +418,18 @@ class Agent:
         vectara_presence_penalty: Optional[float] = None,
         vectara_save_history: bool = True,
         return_direct: bool = False,
+        session_id: Optional[str] = None,
     ) -> "Agent":
-        """Create an agent from a single Vectara corpus using the factory function."""
+        """Create an agent from a single Vectara corpus using the factory function.
+        Args:
+            tool_name (str): Name of the tool to be created.
+            data_description (str): Description of the data/corpus.
+            assistant_specialty (str): The specialty/topic of the assistant.
+            session_id (str, optional): The session ID for memory persistence.
+                                        If None, auto-generates from topic and date. Defaults to None.
+            ... (other parameters as documented in factory function)
+        """
         # Use the factory function to avoid code duplication
         config = create_agent_from_corpus(
             tool_name=tool_name,
@@ -444,6 +472,7 @@ class Agent:
             chat_history=chat_history,
             agent_progress_callback=agent_progress_callback,
             query_logging_callback=query_logging_callback,
+            session_id=session_id,
             **config,
         )
@@ -451,11 +480,16 @@ class Agent:
         """
         Switch the configuration type of the agent.
         This function is called automatically to switch the agent configuration if the current configuration fails.
+        Ensures memory consistency by clearing agent instances so they are recreated with current memory.
         """
         if self.agent_config_type == AgentConfigType.DEFAULT:
             self.agent_config_type = AgentConfigType.FALLBACK
+            # Clear the fallback agent so it gets recreated with current memory
+            self._fallback_agent = None
         else:
             self.agent_config_type = AgentConfigType.DEFAULT
+            # Clear the main agent so it gets recreated with current memory
+            self._agent = None
     def report(self, detailed: bool = False) -> None:
         """
@@ -501,19 +535,6 @@ class Agent:
             else self.fallback_agent_config.agent_type
         )
-    async def _aformat_for_lats(self, prompt, agent_response):
-        llm_prompt = f"""
-        Given the question '{prompt}', and agent response '{agent_response.response}',
-        Please provide a well formatted final response to the query.
-        final response:
-        """
-        agent_type = self._get_current_agent_type()
-        if agent_type != AgentType.LATS:
-            return
-        agent = self._get_current_agent()
-        agent_response.response = (await agent.llm.acomplete(llm_prompt)).text
     def chat(self, prompt: str) -> AgentResponse:
         """
         Interact with the agent using a chat prompt.
@@ -562,9 +583,12 @@ class Agent:
                 ]:
                     from llama_index.core.workflow import Context
+                    # Create context and pass memory to the workflow agent
+                    # According to LlamaIndex docs, we should let the workflow manage memory internally
                     ctx = Context(current_agent)
                     handler = current_agent.run(
-                        user_msg=prompt, ctx=ctx, memory=self.memory
+                        user_msg=prompt, memory=self.memory, ctx=ctx
                     )
                     # Listen to workflow events if progress callback is set
@@ -698,6 +722,27 @@ class Agent:
                         response=response_text, metadata=getattr(result, "metadata", {})
                     )
+                    # Retrieve updated memory from workflow context
+                    # According to LlamaIndex docs, workflow agents manage memory internally
+                    # and we can access it via ctx.store.get("memory")
+                    try:
+                        workflow_memory = await ctx.store.get("memory")
+                        if workflow_memory:
+                            # Update our external memory with the workflow's memory
+                            self.memory = workflow_memory
+                    except Exception as e:
+                        # If we can't retrieve workflow memory, fall back to manual management
+                        warning_msg = (
+                            f"Could not retrieve workflow memory, falling back to "
+                            f"manual management: {e}"
+                        )
+                        logger.warning(warning_msg)
+                        user_msg = ChatMessage.from_str(prompt, role=MessageRole.USER)
+                        assistant_msg = ChatMessage.from_str(
+                            response_text, role=MessageRole.ASSISTANT
+                        )
+                        self.memory.put_messages([user_msg, assistant_msg])
                 # Standard chat interaction for other agent types
                 else:
                     agent_response = await current_agent.achat(prompt)
@@ -713,7 +758,9 @@ class Agent:
             except Exception as e:
                 last_error = e
                 if self.verbose:
-                    logger.warning(f"LLM call failed on attempt {attempt}. " f"Error: {e}.")
+                    logger.warning(
+                        f"LLM call failed on attempt {attempt}. " f"Error: {e}."
+                    )
                 if attempt >= 2 and self.fallback_agent_config:
                     self._switch_agent_config()
                 await asyncio.sleep(1)
@@ -750,6 +797,9 @@ class Agent:
         Returns:
             AgentStreamingResponse: The streaming response from the agent.
         """
+        # Store query for VHC processing and clear previous tool outputs
+        self._last_query = prompt
+        self._clear_tool_outputs()
         max_attempts = 4 if self.fallback_agent_config else 2
         attempt = 0
         orig_llm = self.llm.metadata.model_name
@@ -763,9 +813,12 @@ class Agent:
                 if self._get_current_agent_type() == AgentType.FUNCTION_CALLING:
                     from llama_index.core.workflow import Context
+                    # Create context and pass memory to the workflow agent
+                    # According to LlamaIndex docs, we should let the workflow manage memory internally
                     ctx = Context(current_agent)
                     handler = current_agent.run(
-                        user_msg=prompt, ctx=ctx, memory=self.memory
+                        user_msg=prompt, memory=self.memory, ctx=ctx
                     )
                     # Use the dedicated FunctionCallingStreamHandler
@@ -809,6 +862,134 @@ class Agent:
             f"{max_attempts} attempts ({last_error})."
         )
+    def _clear_tool_outputs(self):
+        """Clear stored tool outputs at the start of a new query."""
+        self._current_tool_outputs.clear()
+        logging.info("🔧 [TOOL_STORAGE] Cleared stored tool outputs for new query")
+    def _add_tool_output(self, tool_name: str, content: str):
+        """Add a tool output to the current collection for VHC."""
+        tool_output = {
+            'status_type': 'TOOL_OUTPUT',
+            'content': content,
+            'tool_name': tool_name
+        }
+        self._current_tool_outputs.append(tool_output)
+        logging.info(f"🔧 [TOOL_STORAGE] Added tool output from '{tool_name}': {len(content)} chars")
+    def _get_stored_tool_outputs(self) -> List[dict]:
+        """Get the stored tool outputs from the current query."""
+        logging.info(f"🔧 [TOOL_STORAGE] Retrieved {len(self._current_tool_outputs)} stored tool outputs")
+        return self._current_tool_outputs.copy()
+    async def acompute_vhc(self) -> Dict[str, Any]:
+        """
+        Compute VHC for the last query/response pair (async version).
+        Results are cached for subsequent calls. Tool outputs are automatically
+        collected during streaming and used internally.
+        Returns:
+            Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
+        """
+        logging.info(
+            f"🔍🔍🔍 [VHC_AGENT_ENTRY] UNIQUE_DEBUG_MESSAGE acompute_vhc method called - "
+            f"stored_tool_outputs_count={len(self._current_tool_outputs)}"
+        )
+        logging.info(
+            f"🔍🔍🔍 [VHC_AGENT_ENTRY] _last_query: {'set' if self._last_query else 'None'}"
+        )
+        if not self._last_query:
+            logging.info("🔍 [VHC_AGENT] Returning early - no _last_query")
+            return {"corrected_text": None, "corrections": []}
+        # For VHC to work, we need the response text from memory
+        # Get the latest assistant response from memory
+        messages = self.memory.get()
+        logging.info(
+            f"🔍 [VHC_AGENT] memory.get() returned {len(messages) if messages else 0} messages"
+        )
+        if not messages:
+            logging.info("🔍 [VHC_AGENT] Returning early - no messages in memory")
+            return {"corrected_text": None, "corrections": []}
+        # Find the last assistant message
+        last_response = None
+        for msg in reversed(messages):
+            if msg.role == MessageRole.ASSISTANT:
+                last_response = msg.content
+                break
+        logging.info(
+            f"🔍 [VHC_AGENT] Found last_response: {'set' if last_response else 'None'}"
+        )
+        if not last_response:
+            logging.info("🔍 [VHC_AGENT] Returning early - no last assistant response found")
+            return {"corrected_text": None, "corrections": []}
+        # Update stored response for caching
+        self._last_response = last_response
+        # Create cache key from query + response
+        cache_key = hash(f"{self._last_query}:{self._last_response}")
+        # Return cached results if available
+        if cache_key in self._vhc_cache:
+            return self._vhc_cache[cache_key]
+        # Check if we have VHC API key
+        logging.info(
+            f"🔍 [VHC_AGENT] acompute_vhc called with vectara_api_key={'set' if self.vectara_api_key else 'None'}"
+        )
+        if not self.vectara_api_key:
+            logging.info("🔍 [VHC_AGENT] No vectara_api_key - returning early with None")
+            return {"corrected_text": None, "corrections": []}
+        # Compute VHC using existing library function
+        from .agent_core.utils.hallucination import analyze_hallucinations
+        try:
+            # Use stored tool outputs from current query
+            stored_tool_outputs = self._get_stored_tool_outputs()
+            logging.info(f"🔧 [VHC_AGENT] Using {len(stored_tool_outputs)} stored tool outputs for VHC")
+            corrected_text, corrections = analyze_hallucinations(
+                query=self._last_query,
+                chat_history=self.memory.get(),
+                agent_response=self._last_response,
+                tools=self.tools,
+                vectara_api_key=self.vectara_api_key,
+                tool_outputs=stored_tool_outputs,
+            )
+            # Cache results
+            results = {"corrected_text": corrected_text, "corrections": corrections}
+            self._vhc_cache[cache_key] = results
+            return results
+        except Exception as e:
+            logger.error(f"VHC computation failed: {e}")
+            return {"corrected_text": None, "corrections": []}
+    def compute_vhc(self) -> Dict[str, Any]:
+        """
+        Compute VHC for the last query/response pair (sync version).
+        Results are cached for subsequent calls. Tool outputs are automatically
+        collected during streaming and used internally.
+        Returns:
+            Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
+        """
+        try:
+            loop = asyncio.get_event_loop()
+            return loop.run_until_complete(self.acompute_vhc())
+        except RuntimeError:
+            # No event loop running, create a new one
+            return asyncio.run(self.acompute_vhc())
     #
     # run() method for running a workflow
     # workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
@@ -882,7 +1063,9 @@ class Agent:
                         input_dict[key] = value
                 output = outputs_model_on_fail_cls.model_validate(input_dict)
             else:
-                logger.warning(f"Vectara Agentic: Workflow failed with unexpected error: {e}")
+                logger.warning(
+                    f"Vectara Agentic: Workflow failed with unexpected error: {e}"
+                )
                 raise type(e)(str(e)).with_traceback(e.__traceback__)
         return output
@@ -923,3 +1106,29 @@ class Agent:
         return deserialize_agent_from_dict(
             cls, data, agent_progress_callback, query_logging_callback
         )
+    def cleanup(self) -> None:
+        """Clean up resources used by the agent."""
+        from ._observability import shutdown_observer
+        if hasattr(self, 'agent') and hasattr(self.agent, '_llm'):
+            llm = self.agent._llm
+            if hasattr(llm, 'client') and hasattr(llm.client, 'close'):
+                try:
+                    if asyncio.iscoroutinefunction(llm.client.close):
+                        asyncio.run(llm.client.close())
+                    else:
+                        llm.client.close()
+                except Exception:
+                    pass
+        # Shutdown observability connections
+        shutdown_observer()
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit with cleanup."""
+        self.cleanup()

vectara-agentic 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

Potentially problematic release.

vectara-agentic 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl