PyPI - vectara-agentic - Versions diffs - 0.2.23__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

vectara-agentic 0.2.23py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vectara-agentic might be problematic. Click here for more details.

Files changed (19) hide show

tests/test_agent.py +2 -2
tests/test_hhem.py +100 -0
tests/test_return_direct.py +2 -6
tests/test_tools.py +1 -1
vectara_agentic/_callback.py +26 -18
vectara_agentic/_prompts.py +4 -0
vectara_agentic/_version.py +1 -1
vectara_agentic/agent.py +69 -12
vectara_agentic/db_tools.py +1 -1
vectara_agentic/hhem.py +45 -0
vectara_agentic/llm_utils.py +34 -10
vectara_agentic/tool_utils.py +177 -15
vectara_agentic/tools.py +102 -86
vectara_agentic/types.py +22 -1
{vectara_agentic-0.2.23.dist-info → vectara_agentic-0.3.0.dist-info}/METADATA +24 -2
{vectara_agentic-0.2.23.dist-info → vectara_agentic-0.3.0.dist-info}/RECORD +19 -17
{vectara_agentic-0.2.23.dist-info → vectara_agentic-0.3.0.dist-info}/WHEEL +0 -0
{vectara_agentic-0.2.23.dist-info → vectara_agentic-0.3.0.dist-info}/licenses/LICENSE +0 -0
{vectara_agentic-0.2.23.dist-info → vectara_agentic-0.3.0.dist-info}/top_level.txt +0 -0

vectara_agentic/tool_utils.py CHANGED Viewed

@@ -6,8 +6,16 @@ import inspect
 import re
 from typing import (
-    Callable, List, Dict, Any, Optional, Union, Type, Tuple,
-    get_origin, get_args
+    Callable,
+    List,
+    Dict,
+    Any,
+    Optional,
+    Union,
+    Type,
+    Tuple,
+    get_origin,
+    get_args,
 )
 from pydantic import BaseModel, create_model
 from pydantic_core import PydanticUndefined
@@ -83,7 +91,7 @@ class VectaraTool(FunctionTool):
             tool_metadata,
             callback,
             async_callback,
-            partial_params
+            partial_params,
         )
         vectara_tool = cls(
             tool_type=tool_type,
@@ -119,7 +127,8 @@ class VectaraTool(FunctionTool):
         self, *args: Any, ctx: Optional[Context] = None, **kwargs: Any
     ) -> ToolOutput:
         try:
-            return super().call(*args, ctx=ctx, **kwargs)
+            result = super().call(*args, ctx=ctx, **kwargs)
+            return self._format_tool_output(result)
         except TypeError as e:
             sig = inspect.signature(self.metadata.fn_schema)
             valid_parameters = list(sig.parameters.keys())
@@ -148,7 +157,8 @@ class VectaraTool(FunctionTool):
         self, *args: Any, ctx: Optional[Context] = None, **kwargs: Any
     ) -> ToolOutput:
         try:
-            return await super().acall(*args, ctx=ctx, **kwargs)
+            result = await super().acall(*args, ctx=ctx, **kwargs)
+            return self._format_tool_output(result)
         except TypeError as e:
             sig = inspect.signature(self.metadata.fn_schema)
             valid_parameters = list(sig.parameters.keys())
@@ -166,6 +176,7 @@ class VectaraTool(FunctionTool):
             return err_output
         except Exception as e:
             import traceback
             err_output = ToolOutput(
                 tool_name=self.metadata.name,
                 content=f"Tool {self.metadata.name} Malfunction: {str(e)}, traceback: {traceback.format_exc()}",
@@ -174,10 +185,39 @@ class VectaraTool(FunctionTool):
             )
             return err_output
+    def _format_tool_output(self, result: ToolOutput) -> ToolOutput:
+        """Format tool output to use human-readable representation if available."""
+        if hasattr(result, "content") and _is_human_readable_output(result.content):
+            try:
+                # Use human-readable format for content, keep raw output
+                human_readable_content = result.content.to_human_readable()
+                raw_output = result.content.get_raw_output()
+                return ToolOutput(
+                    tool_name=result.tool_name,
+                    content=human_readable_content,
+                    raw_input=result.raw_input,
+                    raw_output=raw_output,
+                )
+            except Exception as e:
+                # If formatting fails, fall back to original content with error info
+                import logging
+                logging.warning(
+                    f"Failed to format tool output for {result.tool_name}: {e}"
+                )
+                return ToolOutput(
+                    tool_name=result.tool_name,
+                    content=f"[Formatting Error] {str(result.content)}",
+                    raw_input=result.raw_input,
+                    raw_output={"error": str(e), "original_content": result.content},
+                )
+        return result
 class EmptyBaseModel(BaseModel):
     """empty base model"""
 def _clean_type_repr(type_repr: str) -> str:
     """Cleans the string representation of a type."""
     # Replace <class 'somename'> with somename
@@ -188,6 +228,7 @@ def _clean_type_repr(type_repr: str) -> str:
     type_repr = type_repr.replace("typing.", "")
     return type_repr
 def _format_type(annotation) -> str:
     """
     Turn things like Union[int, str, NoneType] into 'int | str | None',
@@ -209,6 +250,7 @@ def _format_type(annotation) -> str:
     type_repr = _clean_type_repr(type_repr)
     return type_repr.replace("NoneType", "None")
 def _make_docstring(
     function: Callable[..., ToolOutput],
     tool_name: str,
@@ -267,11 +309,15 @@ def _make_docstring(
                 ty_info = schema_prop["type"]
                 if isinstance(ty_info, str):
                     ty_str = _clean_type_repr(ty_info)
-                elif isinstance(ty_info, list):  # Handle JSON schema array type e.g., ["integer", "string"]
+                elif isinstance(
+                    ty_info, list
+                ):  # Handle JSON schema array type e.g., ["integer", "string"]
                     ty_str = " | ".join([_clean_type_repr(t) for t in ty_info])
             # inline default if present
-            default_txt = f", default={default!r}" if default is not PydanticUndefined else ""
+            default_txt = (
+                f", default={default!r}" if default is not PydanticUndefined else ""
+            )
             # inline examples if any
             if examples:
@@ -288,8 +334,8 @@ def _make_docstring(
     doc_lines.append(f"    dict[str, Any]: {return_desc}")
     initial_docstring = "\n".join(doc_lines)
-    collapsed_spaces = re.sub(r' {2,}', ' ', initial_docstring)
-    final_docstring = re.sub(r'\n{2,}', '\n', collapsed_spaces).strip()
+    collapsed_spaces = re.sub(r" {2,}", " ", initial_docstring)
+    final_docstring = re.sub(r"\n{2,}", "\n", collapsed_spaces).strip()
     return final_docstring
@@ -317,13 +363,17 @@ def create_tool_from_dynamic_function(
     if tool_args_schema is None:
         tool_args_schema = EmptyBaseModel
-    if not isinstance(tool_args_schema, type) or not issubclass(tool_args_schema, BaseModel):
+    if not isinstance(tool_args_schema, type) or not issubclass(
+        tool_args_schema, BaseModel
+    ):
         raise TypeError("tool_args_schema must be a Pydantic BaseModel subclass")
     fields: Dict[str, Any] = {}
     base_params = []
     for field_name, field_info in base_params_model.model_fields.items():
-        default = Ellipsis if field_info.default is PydanticUndefined else field_info.default
+        default = (
+            Ellipsis if field_info.default is PydanticUndefined else field_info.default
+        )
         param = inspect.Parameter(
             field_name,
             inspect.Parameter.POSITIONAL_OR_KEYWORD,
@@ -338,7 +388,9 @@ def create_tool_from_dynamic_function(
         if field_name in fields:
             continue
-        default = Ellipsis if field_info.default is PydanticUndefined else field_info.default
+        default = (
+            Ellipsis if field_info.default is PydanticUndefined else field_info.default
+        )
         param = inspect.Parameter(
             field_name,
             inspect.Parameter.POSITIONAL_OR_KEYWORD,
@@ -362,9 +414,7 @@ def create_tool_from_dynamic_function(
     function.__name__ = re.sub(r"[^A-Za-z0-9_]", "_", tool_name)
     function.__doc__ = _make_docstring(
-        function,
-        tool_name, tool_description, fn_schema,
-        all_params, compact_docstring
+        function, tool_name, tool_description, fn_schema, all_params, compact_docstring
     )
     tool = VectaraTool.from_defaults(
         fn=function,
@@ -526,3 +576,115 @@ def build_filter_string(
     if fixed_filter and joined:
         return f"({fixed_filter}) AND ({joined})"
     return fixed_filter or joined
+def _is_human_readable_output(obj: Any) -> bool:
+    """Check if an object implements the HumanReadableOutput protocol."""
+    return (
+        hasattr(obj, "to_human_readable")
+        and hasattr(obj, "get_raw_output")
+        and callable(getattr(obj, "to_human_readable", None))
+        and callable(getattr(obj, "get_raw_output", None))
+    )
+def create_human_readable_output(
+    raw_output: Any, formatter: Optional[Callable[[Any], str]] = None
+) -> "HumanReadableToolOutput":
+    """Create a HumanReadableToolOutput wrapper for tool outputs."""
+    return HumanReadableToolOutput(raw_output, formatter)
+def format_as_table(data: List[Dict[str, Any]], max_width: int = 80) -> str:
+    """Format list of dictionaries as a table."""
+    if not data:
+        return "No data to display"
+    # Get all unique keys
+    all_keys = set()
+    for item in data:
+        all_keys.update(item.keys())
+    headers = list(all_keys)
+    # Calculate column widths
+    col_widths = {}
+    for header in headers:
+        col_widths[header] = max(
+            len(header), max(len(str(item.get(header, ""))) for item in data)
+        )
+        # Limit column width
+        col_widths[header] = min(col_widths[header], max_width // len(headers))
+    # Create table
+    lines = []
+    # Header row
+    header_row = " | ".join(header.ljust(col_widths[header]) for header in headers)
+    lines.append(header_row)
+    lines.append("-" * len(header_row))
+    # Data rows
+    for item in data:
+        row = " | ".join(
+            str(item.get(header, "")).ljust(col_widths[header])[: col_widths[header]]
+            for header in headers
+        )
+        lines.append(row)
+    return "\n".join(lines)
+def format_as_json(data: Any, indent: int = 2) -> str:
+    """Format data as pretty-printed JSON."""
+    import json
+    try:
+        return json.dumps(data, indent=indent, ensure_ascii=False)
+    except (TypeError, ValueError):
+        return str(data)
+def format_as_markdown_list(items: List[Any], numbered: bool = False) -> str:
+    """Format items as markdown list."""
+    if not items:
+        return "No items to display"
+    if numbered:
+        return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
+    else:
+        return "\n".join(f"- {item}" for item in items)
+class HumanReadableToolOutput:
+    """Wrapper class that implements HumanReadableOutput protocol."""
+    def __init__(
+        self, raw_output: Any, formatter: Optional[Callable[[Any], str]] = None
+    ):
+        self._raw_output = raw_output
+        self._formatter = formatter or str
+    def to_human_readable(self) -> str:
+        """Convert the output to a human-readable format."""
+        try:
+            return self._formatter(self._raw_output)
+        except Exception as e:
+            import logging
+            logging.warning(f"Failed to format output with custom formatter: {e}")
+            # Fallback to string representation
+            try:
+                return str(self._raw_output)
+            except Exception:
+                return f"[Error formatting output: {e}]"
+    def get_raw_output(self) -> Any:
+        """Get the raw output data."""
+        return self._raw_output
+    def __str__(self) -> str:
+        return self.to_human_readable()
+    def __repr__(self) -> str:
+        return f"HumanReadableToolOutput({self._raw_output!r})"

vectara_agentic/tools.py CHANGED Viewed

@@ -14,7 +14,6 @@ from pydantic import BaseModel, Field
 from llama_index.core.tools import FunctionTool
 from llama_index.indices.managed.vectara import VectaraIndex
 from llama_index.core.utilities.sql_wrapper import SQLDatabase
-from llama_index.core.tools.types import ToolOutput
 from .types import ToolType
 from .tools_catalog import ToolsCatalog, get_bad_topics
@@ -25,6 +24,7 @@ from .tool_utils import (
     create_tool_from_dynamic_function,
     build_filter_string,
     VectaraTool,
+    create_human_readable_output,
 )
 LI_packages = {
@@ -170,7 +170,7 @@ class VectaraToolFactory:
         )
         # Dynamically generate the search function
-        def search_function(*args: Any, **kwargs: Any) -> ToolOutput:
+        def search_function(*args: Any, **kwargs: Any) -> list[dict]:
             """
             Dynamically generated function for semantic search Vectara.
             """
@@ -192,12 +192,11 @@ class VectaraToolFactory:
                     kwargs, tool_args_type, fixed_filter
                 )
             except ValueError as e:
-                return ToolOutput(
-                    tool_name=search_function.__name__,
-                    content=str(e),
-                    raw_input={"args": args, "kwargs": kwargs},
-                    raw_output={"response": str(e)},
+                msg = (
+                    f"Building filter string failed in search tool due to invalid input or configuration ({e}). "
+                    "Please verify the input arguments and ensure they meet the expected format or conditions."
                 )
+                return [{"text": msg, "metadata": {"args": args, "kwargs": kwargs}}]
             vectara_retriever = vectara.as_retriever(
                 summary_enabled=False,
@@ -228,20 +227,19 @@ class VectaraToolFactory:
             if len(response) == 0:
                 msg = "Vectara Tool failed to retrieve any results for the query."
-                return ToolOutput(
-                    tool_name=search_function.__name__,
-                    content=msg,
-                    raw_input={"args": args, "kwargs": kwargs},
-                    raw_output={"response": msg},
-                )
+                return [{"text": msg, "metadata": {"args": args, "kwargs": kwargs}}]
             unique_ids = set()
             docs = []
+            doc_matches = {}
             for doc in response:
                 if doc.id_ in unique_ids:
+                    doc_matches[doc.id_].append(doc.node.get_content())
                     continue
                 unique_ids.add(doc.id_)
+                doc_matches[doc.id_] = [doc.node.get_content()]
                 docs.append((doc.id_, doc.metadata))
-            tool_output = "Matching documents:\n"
+            res = []
             if summarize:
                 summaries_dict = asyncio.run(
                     summarize_documents(
@@ -251,22 +249,50 @@ class VectaraToolFactory:
                         doc_ids=list(unique_ids),
                     )
                 )
-                for doc_id, metadata in docs:
-                    summary = summaries_dict.get(doc_id, "")
-                    tool_output += f"document_id: '{doc_id}'\nmetadata: '{metadata}'\nsummary: '{summary}'\n\n"
             else:
-                for doc_id, metadata in docs:
-                    tool_output += (
-                        f"document_id: '{doc_id}'\nmetadata: '{metadata}'\n\n"
+                summaries_dict = {}
+            for doc_id, metadata in docs:
+                res.append(
+                    {
+                        "text": summaries_dict.get(doc_id, "") if summarize else "",
+                        "metadata": {
+                            "document_id": doc_id,
+                            "metadata": metadata,
+                            "matching_text": doc_matches[doc_id],
+                        },
+                    }
+                )
+            # Create human-readable output using sequential format
+            def format_search_results(results):
+                if not results:
+                    return "No search results found"
+                # Create a sequential view for human reading
+                formatted_results = []
+                for i, result in enumerate(results, 1):
+                    result_str = f"**Result #{i}**\n"
+                    result_str += f"Document ID: {result['metadata']['document_id']}\n"
+                    result_str += (
+                        f"Matches: {len(result['metadata']['matching_text'])}\n"
                     )
-            out = ToolOutput(
-                tool_name=search_function.__name__,
-                content=tool_output,
-                raw_input={"args": args, "kwargs": kwargs},
-                raw_output=response,
-            )
-            return out
+                    if summarize and result["text"]:
+                        result_str += f"Summary: {result['text']}\n"
+                    # Add sample matching text if available
+                    if result["metadata"]["matching_text"]:
+                        sample_matches = result["metadata"]["matching_text"][
+                            :2
+                        ]  # Show first 2 matches
+                        result_str += f"Sample matches: {', '.join(sample_matches)}\n"
+                    formatted_results.append(result_str)
+                return "\n".join(formatted_results)
+            return create_human_readable_output(res, format_search_results)
         class SearchToolBaseParams(BaseModel):
             """Model for the base parameters of the search tool."""
@@ -346,6 +372,7 @@ class VectaraToolFactory:
         frequency_penalty: Optional[float] = None,
         presence_penalty: Optional[float] = None,
         include_citations: bool = True,
+        citation_pattern: str = "{doc.url}",
         save_history: bool = False,
         fcs_threshold: float = 0.0,
         return_direct: bool = False,
@@ -399,6 +426,9 @@ class VectaraToolFactory:
                 higher values increasing the diversity of topics.
             include_citations (bool, optional): Whether to include citations in the response.
                 If True, uses markdown vectara citations that requires the Vectara scale plan.
+            citation_pattern (str, optional): The pattern for the citations in the response.
+                Default is "{doc.url}" which uses the document URL.
+                If include_citations is False, this parameter is ignored.
             save_history (bool, optional): Whether to save the query in history.
             fcs_threshold (float, optional): A threshold for factual consistency.
                 If set above 0, the tool notifies the calling agent that it "cannot respond" if FCS is too low.
@@ -420,7 +450,7 @@ class VectaraToolFactory:
         )
         # Dynamically generate the RAG function
-        def rag_function(*args: Any, **kwargs: Any) -> ToolOutput:
+        def rag_function(*args: Any, **kwargs: Any) -> dict:
             """
             Dynamically generated function for RAG query with Vectara.
             """
@@ -436,12 +466,12 @@ class VectaraToolFactory:
                     kwargs, tool_args_type, fixed_filter
                 )
             except ValueError as e:
-                return ToolOutput(
-                    tool_name=rag_function.__name__,
-                    content=str(e),
-                    raw_input={"args": args, "kwargs": kwargs},
-                    raw_output={"response": str(e)},
+                msg = (
+                    f"Building filter string failed in rag tool. "
+                    f"Reason: {e}. Ensure that the input arguments match the expected "
+                    f"format and include all required fields. "
                 )
+                return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
             vectara_query_engine = vectara.as_query_engine(
                 summary_enabled=True,
@@ -475,7 +505,7 @@ class VectaraToolFactory:
                 frequency_penalty=frequency_penalty,
                 presence_penalty=presence_penalty,
                 citations_style="markdown" if include_citations else None,
-                citations_url_pattern="{doc.url}" if include_citations else None,
+                citations_url_pattern=citation_pattern if include_citations else None,
                 save_history=save_history,
                 x_source_str="vectara-agentic",
                 verbose=verbose,
@@ -487,73 +517,59 @@ class VectaraToolFactory:
                     "Tool failed to generate a response since no matches were found. "
                     "Please check the arguments and try again."
                 )
-                return ToolOutput(
-                    tool_name=rag_function.__name__,
-                    content=msg,
-                    raw_input={"args": args, "kwargs": kwargs},
-                    raw_output={"response": msg},
-                )
+                return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
             if str(response) == "None":
                 msg = "Tool failed to generate a response."
-                return ToolOutput(
-                    tool_name=rag_function.__name__,
-                    content=msg,
-                    raw_input={"args": args, "kwargs": kwargs},
-                    raw_output={"response": msg},
-                )
+                return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
             # Extract citation metadata
             pattern = r"\[(\d+)\]"
             matches = re.findall(pattern, response.response)
             citation_numbers = sorted(set(int(match) for match in matches))
-            citation_metadata = ""
+            citation_metadata = {}
             keys_to_ignore = ["lang", "offset", "len"]
             for citation_number in citation_numbers:
-                metadata = response.source_nodes[citation_number - 1].metadata
-                citation_metadata += (
-                    f"[{citation_number}]: "
-                    + "; ".join(
-                        [
-                            f"{k}='{v}'"
-                            for k, v in metadata.items()
-                            if k not in keys_to_ignore
-                        ]
-                    )
-                    + ".\n"
-                )
+                metadata = {
+                    k: v
+                    for k, v in response.source_nodes[
+                        citation_number - 1
+                    ].metadata.items()
+                    if k not in keys_to_ignore
+                }
+                citation_metadata[str(citation_number)] = metadata
             fcs = 0.0
             fcs_str = response.metadata["fcs"] if "fcs" in response.metadata else "0.0"
             if fcs_str and is_float(fcs_str):
                 fcs = float(fcs_str)
                 if fcs < fcs_threshold:
                     msg = f"Could not answer the query due to suspected hallucination (fcs={fcs})."
-                    return ToolOutput(
-                        tool_name=rag_function.__name__,
-                        content=msg,
-                        raw_input={"args": args, "kwargs": kwargs},
-                        raw_output={"response": msg},
-                    )
-            res = {
-                "response": response.response,
-                "references_metadata": citation_metadata,
-                "fcs_score": fcs,
-            }
-            if len(citation_metadata) > 0:
-                tool_output = f"""
-                    Response: '''{res['response']}'''
-                    fcs_score: {res['fcs_score']:.4f}
-                    References:
-                    {res['references_metadata']}
-                """
-            else:
-                tool_output = f"Response: '''{res['response']}'''"
-            out = ToolOutput(
-                tool_name=rag_function.__name__,
-                content=tool_output,
-                raw_input={"args": args, "kwargs": kwargs},
-                raw_output=res,
-            )
-            return out
+                    return {
+                        "text": msg,
+                        "metadata": {"args": args, "kwargs": kwargs, "fcs": fcs},
+                    }
+            if fcs:
+                citation_metadata["fcs"] = fcs
+            res = {"text": response.response, "metadata": citation_metadata}
+            # Create human-readable output with citation formatting
+            def format_rag_response(result):
+                text = result["text"]
+                metadata = result["metadata"]
+                # Format citations if present
+                citation_info = []
+                for key, value in metadata.items():
+                    if key.isdigit():
+                        url = value.get("document", {}).get("url", None)
+                        if url:
+                            citation_info.append(f"[{key}]: {url}")
+                if citation_info:
+                    text += "\n\nCitations:\n" + "\n".join(citation_info)
+                return text
+            return create_human_readable_output(res, format_rag_response)
         class RagToolBaseParams(BaseModel):
             """Model for the base parameters of the RAG tool."""

vectara_agentic/types.py CHANGED Viewed

@@ -1,11 +1,17 @@
 """
 This module contains the types used in the Vectara Agentic.
 """
 from enum import Enum
+from typing import Protocol, Any
+from llama_index.core.schema import Document as LI_Document
 from llama_index.core.tools.types import ToolOutput as LI_ToolOutput
 from llama_index.core.chat_engine.types import AgentChatResponse as LI_AgentChatResponse
-from llama_index.core.chat_engine.types import StreamingAgentChatResponse as LI_StreamingAgentChatResponse
+from llama_index.core.chat_engine.types import (
+    StreamingAgentChatResponse as LI_StreamingAgentChatResponse,
+)
 class AgentType(Enum):
     """Enumeration for different types of agents."""
@@ -16,6 +22,7 @@ class AgentType(Enum):
     LLMCOMPILER = "LLMCOMPILER"
     LATS = "LATS"
 class ObserverType(Enum):
     """Enumeration for different types of observability integrations."""
@@ -55,16 +62,30 @@ class LLMRole(Enum):
 class ToolType(Enum):
     """Enumeration for different types of tools."""
     QUERY = "query"
     ACTION = "action"
 class AgentConfigType(Enum):
     """Enumeration for different types of agent configurations."""
     DEFAULT = "default"
     FALLBACK = "fallback"
+class HumanReadableOutput(Protocol):
+    """Protocol for tool outputs that can provide human-readable representations."""
+    def to_human_readable(self) -> str:
+        """Convert the output to a human-readable format."""
+    def get_raw_output(self) -> Any:
+        """Get the raw output data."""
 # classes for Agent responses
 ToolOutput = LI_ToolOutput
 AgentResponse = LI_AgentChatResponse
 AgentStreamingResponse = LI_StreamingAgentChatResponse
+Document = LI_Document

vectara-agentic 0.2.23__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

vectara-agentic 0.2.23py3-none-any.whl → 0.3.0py3-none-any.whl