vectara-agentic 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- vectara_agentic/_prompts.py +2 -2
- vectara_agentic/_version.py +1 -1
- vectara_agentic/hhem.py +38 -1
- vectara_agentic/tools.py +22 -18
- {vectara_agentic-0.3.0.dist-info → vectara_agentic-0.3.1.dist-info}/METADATA +2 -1
- {vectara_agentic-0.3.0.dist-info → vectara_agentic-0.3.1.dist-info}/RECORD +9 -9
- {vectara_agentic-0.3.0.dist-info → vectara_agentic-0.3.1.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.3.0.dist-info → vectara_agentic-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.3.0.dist-info → vectara_agentic-0.3.1.dist-info}/top_level.txt +0 -0
vectara_agentic/_prompts.py
CHANGED
|
@@ -28,11 +28,11 @@ GENERAL_INSTRUCTIONS = """
|
|
|
28
28
|
- If after retrying you can't get the information or answer the question, respond with "I don't know".
|
|
29
29
|
- Handling references and citations:
|
|
30
30
|
1) Include references and citations in your response to increase the credibility of your answer.
|
|
31
|
-
2) Citations should be included in the response, along with URLs, as in-text markers, such as [1](https://www.xxx.com), [2](https://www.yyy.com), etc.
|
|
31
|
+
2) Citations should be included in the response, along with URLs, as in-text markers, such as [1](https://www.xxx.com), [2](https://www.yyy.com/doc.pdf#page=2), etc.
|
|
32
32
|
You can also replace the number with a word or sentence that describes the reference, such as "[according to Nvidia 10-K](https://www.xxx.com)".
|
|
33
33
|
When adding a citation inline in the text, make sure to use proper spacing and punctuation.
|
|
34
34
|
3) If a URL is a PDF file, and the tool also provided a page number - then combine the URL and page number in your response.
|
|
35
|
-
For example, if the URL returned from the tool is "https://www.xxx.com/doc.pdf" and "page=5", then the combined URL would be "https://www.xxx.com/doc.pdf#page=5".
|
|
35
|
+
For example, if the URL returned from the tool is "https://www.xxx.com/doc.pdf" and "page='5'", then the combined URL would be "https://www.xxx.com/doc.pdf#page=5".
|
|
36
36
|
4) Where possible, integrate citations into the text of your response, such as "According to the [Nvidia 10-K](https://www.xxx.com), the revenue in 2021 was $10B".
|
|
37
37
|
5) Only include citations if provided with a valid URL as part of the tool's output (directly or in the metadata).
|
|
38
38
|
6) If a tool returns in the metadata invalid URLs or an empty URL (e.g. "[[1]()]"), ignore it and do not include that citation or reference in your response.
|
vectara_agentic/_version.py
CHANGED
vectara_agentic/hhem.py
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
"""Vectara HHEM (Hypothesis Hypothetical Evaluation Model) client."""
|
|
2
2
|
|
|
3
3
|
import requests
|
|
4
|
+
from commonmark import Parser
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def markdown_to_text(md: str) -> str:
|
|
8
|
+
"""
|
|
9
|
+
Convert a Markdown-formatted string into plain text.
|
|
10
|
+
"""
|
|
11
|
+
parser = Parser()
|
|
12
|
+
ast = parser.parse(md)
|
|
13
|
+
out: list[str] = []
|
|
14
|
+
|
|
15
|
+
def recurse(node):
|
|
16
|
+
if node.t in ("text", "code", "html_inline"):
|
|
17
|
+
out.append(node.literal or "")
|
|
18
|
+
elif node.t == "softbreak":
|
|
19
|
+
out.append(" ")
|
|
20
|
+
elif node.t == "linebreak":
|
|
21
|
+
out.append("\n")
|
|
22
|
+
child = getattr(node, "first_child", None)
|
|
23
|
+
while child is not None:
|
|
24
|
+
recurse(child)
|
|
25
|
+
child = getattr(child, "next", None)
|
|
26
|
+
|
|
27
|
+
recurse(ast)
|
|
28
|
+
text = "".join(out)
|
|
29
|
+
# collapse runs of spaces but preserve newlines
|
|
30
|
+
lines = [" ".join(line.split()) for line in text.splitlines()]
|
|
31
|
+
return "\n".join(line if line.strip() else "" for line in lines)
|
|
4
32
|
|
|
5
33
|
|
|
6
34
|
class HHEM:
|
|
@@ -23,9 +51,18 @@ class HHEM:
|
|
|
23
51
|
Raises:
|
|
24
52
|
requests.exceptions.RequestException: If there is a network-related error or the API call fails.
|
|
25
53
|
"""
|
|
54
|
+
|
|
55
|
+
# clean response from any markdown or other formatting.
|
|
56
|
+
try:
|
|
57
|
+
clean_hypothesis = markdown_to_text(hypothesis)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
# If markdown parsing fails, use the original text
|
|
60
|
+
raise ValueError(f"Markdown parsing of hypothesis failed: {e}") from e
|
|
61
|
+
|
|
62
|
+
# compute HHEM with Vectara endpoint
|
|
26
63
|
payload = {
|
|
27
64
|
"model_parameters": {"model_name": "hhem_v2.3"},
|
|
28
|
-
"generated_text":
|
|
65
|
+
"generated_text": clean_hypothesis,
|
|
29
66
|
"source_texts": [context],
|
|
30
67
|
}
|
|
31
68
|
headers = {
|
vectara_agentic/tools.py
CHANGED
|
@@ -274,22 +274,17 @@ class VectaraToolFactory:
|
|
|
274
274
|
for i, result in enumerate(results, 1):
|
|
275
275
|
result_str = f"**Result #{i}**\n"
|
|
276
276
|
result_str += f"Document ID: {result['metadata']['document_id']}\n"
|
|
277
|
-
result_str += (
|
|
278
|
-
f"Matches: {len(result['metadata']['matching_text'])}\n"
|
|
279
|
-
)
|
|
280
|
-
|
|
281
277
|
if summarize and result["text"]:
|
|
282
278
|
result_str += f"Summary: {result['text']}\n"
|
|
283
279
|
|
|
284
|
-
# Add
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
280
|
+
# Add all matching text if available
|
|
281
|
+
matches = result["metadata"]["matching_text"]
|
|
282
|
+
if matches:
|
|
283
|
+
result_str += ''.join(
|
|
284
|
+
f"Match #{inx} Text: {match}\n"
|
|
285
|
+
for inx, match in enumerate(matches, 1)
|
|
286
|
+
)
|
|
291
287
|
formatted_results.append(result_str)
|
|
292
|
-
|
|
293
288
|
return "\n".join(formatted_results)
|
|
294
289
|
|
|
295
290
|
return create_human_readable_output(res, format_search_results)
|
|
@@ -448,6 +443,7 @@ class VectaraToolFactory:
|
|
|
448
443
|
vectara_base_url=vectara_base_url,
|
|
449
444
|
vectara_verify_ssl=vectara_verify_ssl,
|
|
450
445
|
)
|
|
446
|
+
keys_to_ignore = ["lang", "offset", "len"]
|
|
451
447
|
|
|
452
448
|
# Dynamically generate the RAG function
|
|
453
449
|
def rag_function(*args: Any, **kwargs: Any) -> dict:
|
|
@@ -527,7 +523,6 @@ class VectaraToolFactory:
|
|
|
527
523
|
matches = re.findall(pattern, response.response)
|
|
528
524
|
citation_numbers = sorted(set(int(match) for match in matches))
|
|
529
525
|
citation_metadata = {}
|
|
530
|
-
keys_to_ignore = ["lang", "offset", "len"]
|
|
531
526
|
for citation_number in citation_numbers:
|
|
532
527
|
metadata = {
|
|
533
528
|
k: v
|
|
@@ -549,21 +544,30 @@ class VectaraToolFactory:
|
|
|
549
544
|
}
|
|
550
545
|
if fcs:
|
|
551
546
|
citation_metadata["fcs"] = fcs
|
|
552
|
-
|
|
553
547
|
res = {"text": response.response, "metadata": citation_metadata}
|
|
554
548
|
|
|
555
549
|
# Create human-readable output with citation formatting
|
|
556
550
|
def format_rag_response(result):
|
|
557
551
|
text = result["text"]
|
|
558
|
-
metadata = result["metadata"]
|
|
559
552
|
|
|
560
553
|
# Format citations if present
|
|
554
|
+
metadata = result["metadata"]
|
|
561
555
|
citation_info = []
|
|
562
556
|
for key, value in metadata.items():
|
|
563
557
|
if key.isdigit():
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
558
|
+
doc = value.get("document", {})
|
|
559
|
+
doc_metadata = f"{key}: " + "; ".join(
|
|
560
|
+
[
|
|
561
|
+
f"{k}='{v}'"
|
|
562
|
+
for k, v in doc.items()
|
|
563
|
+
] +
|
|
564
|
+
[
|
|
565
|
+
f"{k}='{v}'"
|
|
566
|
+
for k, v in value.items()
|
|
567
|
+
if k not in ["document"] + keys_to_ignore
|
|
568
|
+
]
|
|
569
|
+
)
|
|
570
|
+
citation_info.append(doc_metadata)
|
|
567
571
|
if citation_info:
|
|
568
572
|
text += "\n\nCitations:\n" + "\n".join(citation_info)
|
|
569
573
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vectara_agentic
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: A Python package for creating AI Assistants and AI Agents with Vectara
|
|
5
5
|
Home-page: https://github.com/vectara/py-vectara-agentic
|
|
6
6
|
Author: Ofer Mendelevitch
|
|
@@ -61,6 +61,7 @@ Requires-Dist: python-dotenv==1.0.1
|
|
|
61
61
|
Requires-Dist: tiktoken==0.9.0
|
|
62
62
|
Requires-Dist: cloudpickle>=3.1.1
|
|
63
63
|
Requires-Dist: httpx==0.28.1
|
|
64
|
+
Requires-Dist: commonmark==0.9.1
|
|
64
65
|
Dynamic: author
|
|
65
66
|
Dynamic: author-email
|
|
66
67
|
Dynamic: classifier
|
|
@@ -18,22 +18,22 @@ tests/test_workflow.py,sha256=TmNBxBqSW5owk_Nz9LLtHvqryVNsFPkf-M1G_uFSsAM,3739
|
|
|
18
18
|
vectara_agentic/__init__.py,sha256=2GLDS3U6KckK-dBRl9v_x1kSV507gEhjOfuMmmu0Qxg,850
|
|
19
19
|
vectara_agentic/_callback.py,sha256=c3848EMSpaQWXtuwdqRGbhgbZhiDwgGnemJkgm9yWAc,13238
|
|
20
20
|
vectara_agentic/_observability.py,sha256=iZlByeQTyx6g3Y8aBYcdGcxdRkoYrfxHdcrTEKO26UE,4485
|
|
21
|
-
vectara_agentic/_prompts.py,sha256=
|
|
22
|
-
vectara_agentic/_version.py,sha256=
|
|
21
|
+
vectara_agentic/_prompts.py,sha256=7PY1XBqFM5JGXSw5JzhE2QJylLawIjFv3xAEJ2AA0LQ,10550
|
|
22
|
+
vectara_agentic/_version.py,sha256=_2691WFCS6Oetu4wBzc3283NHXo4gUI7OxlOWeNJwjI,65
|
|
23
23
|
vectara_agentic/agent.py,sha256=S1Rek9Dp9HabDQPqdQlkIMUR701-XTonyoXeCRE9WtA,58215
|
|
24
24
|
vectara_agentic/agent_config.py,sha256=E-rtYMcpoGxnEAyy8231bizo2n0uGQ2qWxuSgTEfwdQ,4327
|
|
25
25
|
vectara_agentic/agent_endpoint.py,sha256=PzIN7HhEHv8Mq_Zo5cZ2xYrgdv2AN6kx6dc_2AJq28I,7497
|
|
26
26
|
vectara_agentic/db_tools.py,sha256=GUsQTZfRbT9F5K_e5HNaKXUkU6x8RErUyjDVKlZi1IA,11196
|
|
27
|
-
vectara_agentic/hhem.py,sha256=
|
|
27
|
+
vectara_agentic/hhem.py,sha256=j4euBX24PSCQ8P_MhhsKKnm1kv6nHKAbduHsTwtQuR0,2774
|
|
28
28
|
vectara_agentic/llm_utils.py,sha256=g-8Ja4g8X67u02pi7mQrb3O1nRre9lgeC6gJqngl5ow,7668
|
|
29
29
|
vectara_agentic/sub_query_workflow.py,sha256=JYwN0wK4QzHjTaFDsSCAQvMx9GD4g6CnqxZCnzi6xb4,13086
|
|
30
30
|
vectara_agentic/tool_utils.py,sha256=9xoqVPB97CIDXOxuFIw4yZ2RlXvdayCEGPUaUPC2Tbc,24168
|
|
31
|
-
vectara_agentic/tools.py,sha256=
|
|
31
|
+
vectara_agentic/tools.py,sha256=bj8Zn3Lv63vWxu7N6_kkvOk9Vr2ZtuiiBetXUCzsK0w,34860
|
|
32
32
|
vectara_agentic/tools_catalog.py,sha256=cAN_kDOWZUoW4GNFwY5GdS6ImMUQNnF2sggx9OGK9Cg,4906
|
|
33
33
|
vectara_agentic/types.py,sha256=3mrtshHiy-d5JHVxl-4tJk5DRspvYKwAYiI5LvKO1Bw,2226
|
|
34
34
|
vectara_agentic/utils.py,sha256=R9HitEG5K3Q_p2M_teosT181OUxkhs1-hnj98qDYGbE,2545
|
|
35
|
-
vectara_agentic-0.3.
|
|
36
|
-
vectara_agentic-0.3.
|
|
37
|
-
vectara_agentic-0.3.
|
|
38
|
-
vectara_agentic-0.3.
|
|
39
|
-
vectara_agentic-0.3.
|
|
35
|
+
vectara_agentic-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
36
|
+
vectara_agentic-0.3.1.dist-info/METADATA,sha256=5QXewroE8dsANYXCoYr-MqAm0wlNhe205tVzWaCZnEw,32079
|
|
37
|
+
vectara_agentic-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
vectara_agentic-0.3.1.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
|
|
39
|
+
vectara_agentic-0.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|