PyPI - vectara-agentic - Versions diffs - 0.3.0__tar.gz → 0.3.1__tar.gz - Mend

vectara-agentic 0.3.0tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{vectara_agentic-0.3.0/vectara_agentic.egg-info → vectara_agentic-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vectara_agentic
-Version: 0.3.0
+Version: 0.3.1
 Summary: A Python package for creating AI Assistants and AI Agents with Vectara
 Home-page: https://github.com/vectara/py-vectara-agentic
 Author: Ofer Mendelevitch
@@ -61,6 +61,7 @@ Requires-Dist: python-dotenv==1.0.1
 Requires-Dist: tiktoken==0.9.0
 Requires-Dist: cloudpickle>=3.1.1
 Requires-Dist: httpx==0.28.1
+Requires-Dist: commonmark==0.9.1
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1}/requirements.txt RENAMED Viewed

@@ -43,3 +43,4 @@ python-dotenv==1.0.1
 tiktoken==0.9.0
 cloudpickle>=3.1.1
 httpx==0.28.1
+commonmark==0.9.1

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1}/vectara_agentic/_prompts.py RENAMED Viewed

@@ -28,11 +28,11 @@ GENERAL_INSTRUCTIONS = """
 - If after retrying you can't get the information or answer the question, respond with "I don't know".
 - Handling references and citations:
   1) Include references and citations in your response to increase the credibility of your answer.
-  2) Citations should be included in the response, along with URLs, as in-text markers, such as [1](https://www.xxx.com), [2](https://www.yyy.com), etc.
+  2) Citations should be included in the response, along with URLs, as in-text markers, such as [1](https://www.xxx.com), [2](https://www.yyy.com/doc.pdf#page=2), etc.
      You can also replace the number with a word or sentence that describes the reference, such as "[according to Nvidia 10-K](https://www.xxx.com)".
      When adding a citation inline in the text, make sure to use proper spacing and punctuation.
   3) If a URL is a PDF file, and the tool also provided a page number - then combine the URL and page number in your response.
-     For example, if the URL returned from the tool is "https://www.xxx.com/doc.pdf" and "page=5", then the combined URL would be "https://www.xxx.com/doc.pdf#page=5".
+     For example, if the URL returned from the tool is "https://www.xxx.com/doc.pdf" and "page='5'", then the combined URL would be "https://www.xxx.com/doc.pdf#page=5".
   4) Where possible, integrate citations into the text of your response, such as "According to the [Nvidia 10-K](https://www.xxx.com), the revenue in 2021 was $10B".
   5) Only include citations if provided with a valid URL as part of the tool's output (directly or in the metadata).
   6) If a tool returns in the metadata invalid URLs or an empty URL (e.g. "[[1]()]"), ignore it and do not include that citation or reference in your response.

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1}/vectara_agentic/_version.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """
 Define the version of the package.
 """
-__version__ = "0.3.0"
+__version__ = "0.3.1"

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1}/vectara_agentic/hhem.py RENAMED Viewed

@@ -1,6 +1,34 @@
 """Vectara HHEM (Hypothesis Hypothetical Evaluation Model) client."""
 import requests
+from commonmark import Parser
+def markdown_to_text(md: str) -> str:
+    """
+    Convert a Markdown-formatted string into plain text.
+    """
+    parser = Parser()
+    ast = parser.parse(md)
+    out: list[str] = []
+    def recurse(node):
+        if node.t in ("text", "code", "html_inline"):
+            out.append(node.literal or "")
+        elif node.t == "softbreak":
+            out.append(" ")
+        elif node.t == "linebreak":
+            out.append("\n")
+        child = getattr(node, "first_child", None)
+        while child is not None:
+            recurse(child)
+            child = getattr(child, "next", None)
+    recurse(ast)
+    text = "".join(out)
+    # collapse runs of spaces but preserve newlines
+    lines = [" ".join(line.split()) for line in text.splitlines()]
+    return "\n".join(line if line.strip() else "" for line in lines)
 class HHEM:
@@ -23,9 +51,18 @@ class HHEM:
         Raises:
             requests.exceptions.RequestException: If there is a network-related error or the API call fails.
         """
+        # clean response from any markdown or other formatting.
+        try:
+            clean_hypothesis = markdown_to_text(hypothesis)
+        except Exception as e:
+            # If markdown parsing fails, use the original text
+            raise ValueError(f"Markdown parsing of hypothesis failed: {e}") from e
+        # compute HHEM with Vectara endpoint
         payload = {
             "model_parameters": {"model_name": "hhem_v2.3"},
-            "generated_text": hypothesis,
+            "generated_text": clean_hypothesis,
             "source_texts": [context],
         }
         headers = {

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1}/vectara_agentic/tools.py RENAMED Viewed

@@ -274,22 +274,17 @@ class VectaraToolFactory:
                 for i, result in enumerate(results, 1):
                     result_str = f"**Result #{i}**\n"
                     result_str += f"Document ID: {result['metadata']['document_id']}\n"
-                    result_str += (
-                        f"Matches: {len(result['metadata']['matching_text'])}\n"
-                    )
                     if summarize and result["text"]:
                         result_str += f"Summary: {result['text']}\n"
-                    # Add sample matching text if available
-                    if result["metadata"]["matching_text"]:
-                        sample_matches = result["metadata"]["matching_text"][
-                            :2
-                        ]  # Show first 2 matches
-                        result_str += f"Sample matches: {', '.join(sample_matches)}\n"
+                    # Add all matching text if available
+                    matches = result["metadata"]["matching_text"]
+                    if matches:
+                        result_str += ''.join(
+                            f"Match #{inx} Text: {match}\n"
+                            for inx, match in enumerate(matches, 1)
+                        )
                     formatted_results.append(result_str)
                 return "\n".join(formatted_results)
             return create_human_readable_output(res, format_search_results)
@@ -448,6 +443,7 @@ class VectaraToolFactory:
             vectara_base_url=vectara_base_url,
             vectara_verify_ssl=vectara_verify_ssl,
         )
+        keys_to_ignore = ["lang", "offset", "len"]
         # Dynamically generate the RAG function
         def rag_function(*args: Any, **kwargs: Any) -> dict:
@@ -527,7 +523,6 @@ class VectaraToolFactory:
             matches = re.findall(pattern, response.response)
             citation_numbers = sorted(set(int(match) for match in matches))
             citation_metadata = {}
-            keys_to_ignore = ["lang", "offset", "len"]
             for citation_number in citation_numbers:
                 metadata = {
                     k: v
@@ -549,21 +544,30 @@ class VectaraToolFactory:
                     }
             if fcs:
                 citation_metadata["fcs"] = fcs
             res = {"text": response.response, "metadata": citation_metadata}
             # Create human-readable output with citation formatting
             def format_rag_response(result):
                 text = result["text"]
-                metadata = result["metadata"]
                 # Format citations if present
+                metadata = result["metadata"]
                 citation_info = []
                 for key, value in metadata.items():
                     if key.isdigit():
-                        url = value.get("document", {}).get("url", None)
-                        if url:
-                            citation_info.append(f"[{key}]: {url}")
+                        doc = value.get("document", {})
+                        doc_metadata = f"{key}: " + "; ".join(
+                            [
+                                f"{k}='{v}'"
+                                for k, v in doc.items()
+                            ] +
+                            [
+                                f"{k}='{v}'"
+                                for k, v in value.items()
+                                if k not in ["document"] + keys_to_ignore
+                            ]
+                        )
+                        citation_info.append(doc_metadata)
                 if citation_info:
                     text += "\n\nCitations:\n" + "\n".join(citation_info)

{vectara_agentic-0.3.0 → vectara_agentic-0.3.1/vectara_agentic.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vectara_agentic
-Version: 0.3.0
+Version: 0.3.1
 Summary: A Python package for creating AI Assistants and AI Agents with Vectara
 Home-page: https://github.com/vectara/py-vectara-agentic
 Author: Ofer Mendelevitch
@@ -61,6 +61,7 @@ Requires-Dist: python-dotenv==1.0.1
 Requires-Dist: tiktoken==0.9.0
 Requires-Dist: cloudpickle>=3.1.1
 Requires-Dist: httpx==0.28.1
+Requires-Dist: commonmark==0.9.1
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier