vectara-agentic 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/test_tools.py +64 -13
- vectara_agentic/_callback.py +2 -2
- vectara_agentic/_observability.py +3 -2
- vectara_agentic/_prompts.py +5 -4
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +1 -1
- vectara_agentic/db_tools.py +34 -14
- vectara_agentic/sub_query_workflow.py +82 -45
- vectara_agentic/tools.py +148 -60
- {vectara_agentic-0.2.9.dist-info → vectara_agentic-0.2.11.dist-info}/METADATA +12 -12
- {vectara_agentic-0.2.9.dist-info → vectara_agentic-0.2.11.dist-info}/RECORD +14 -14
- {vectara_agentic-0.2.9.dist-info → vectara_agentic-0.2.11.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.2.9.dist-info → vectara_agentic-0.2.11.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.2.9.dist-info → vectara_agentic-0.2.11.dist-info}/top_level.txt +0 -0
tests/test_tools.py
CHANGED
|
@@ -2,7 +2,12 @@ import unittest
|
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, BaseModel
|
|
4
4
|
|
|
5
|
-
from vectara_agentic.tools import
|
|
5
|
+
from vectara_agentic.tools import (
|
|
6
|
+
VectaraTool,
|
|
7
|
+
VectaraToolFactory,
|
|
8
|
+
ToolsFactory,
|
|
9
|
+
ToolType,
|
|
10
|
+
)
|
|
6
11
|
from vectara_agentic.agent import Agent
|
|
7
12
|
from vectara_agentic.agent_config import AgentConfig
|
|
8
13
|
|
|
@@ -13,9 +18,7 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
13
18
|
def test_vectara_tool_factory(self):
|
|
14
19
|
vectara_corpus_key = "corpus_key"
|
|
15
20
|
vectara_api_key = "api_key"
|
|
16
|
-
vec_factory = VectaraToolFactory(
|
|
17
|
-
vectara_corpus_key, vectara_api_key
|
|
18
|
-
)
|
|
21
|
+
vec_factory = VectaraToolFactory(vectara_corpus_key, vectara_api_key)
|
|
19
22
|
|
|
20
23
|
self.assertEqual(vectara_corpus_key, vec_factory.vectara_corpus_key)
|
|
21
24
|
self.assertEqual(vectara_api_key, vec_factory.vectara_api_key)
|
|
@@ -46,6 +49,48 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
46
49
|
self.assertIsInstance(search_tool, FunctionTool)
|
|
47
50
|
self.assertEqual(search_tool.metadata.tool_type, ToolType.QUERY)
|
|
48
51
|
|
|
52
|
+
def test_vectara_tool_validation(self):
|
|
53
|
+
vectara_corpus_key = "corpus_key"
|
|
54
|
+
vectara_api_key = "api_key"
|
|
55
|
+
vec_factory = VectaraToolFactory(vectara_corpus_key, vectara_api_key)
|
|
56
|
+
|
|
57
|
+
class QueryToolArgs(BaseModel):
|
|
58
|
+
query: str = Field(description="The user query")
|
|
59
|
+
year: int = Field(
|
|
60
|
+
description="The year of the document",
|
|
61
|
+
example=2023,
|
|
62
|
+
)
|
|
63
|
+
ticker: str = Field(
|
|
64
|
+
description="The stock ticker",
|
|
65
|
+
example="AAPL",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
query_tool = vec_factory.create_rag_tool(
|
|
69
|
+
tool_name="rag_tool",
|
|
70
|
+
tool_description="""
|
|
71
|
+
Returns a response (str) to the user query based on the data in this corpus.
|
|
72
|
+
""",
|
|
73
|
+
tool_args_schema=QueryToolArgs,
|
|
74
|
+
)
|
|
75
|
+
res = query_tool(
|
|
76
|
+
query="What is the stock price?",
|
|
77
|
+
the_year=2023,
|
|
78
|
+
)
|
|
79
|
+
self.assertIn("Malfunction", str(res))
|
|
80
|
+
|
|
81
|
+
search_tool = vec_factory.create_search_tool(
|
|
82
|
+
tool_name="search_tool",
|
|
83
|
+
tool_description="""
|
|
84
|
+
Returns a list of documents (str) that match the user query.
|
|
85
|
+
""",
|
|
86
|
+
tool_args_schema=QueryToolArgs,
|
|
87
|
+
)
|
|
88
|
+
res = search_tool(
|
|
89
|
+
query="What is the stock price?",
|
|
90
|
+
the_year=2023,
|
|
91
|
+
)
|
|
92
|
+
self.assertIn("Malfunction", str(res))
|
|
93
|
+
|
|
49
94
|
def test_tool_factory(self):
|
|
50
95
|
def mult(x: float, y: float) -> float:
|
|
51
96
|
return x * y
|
|
@@ -59,17 +104,21 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
59
104
|
def test_llama_index_tools(self):
|
|
60
105
|
tools_factory = ToolsFactory()
|
|
61
106
|
|
|
62
|
-
|
|
63
|
-
tool_package_name="arxiv",
|
|
64
|
-
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
arxiv_tool = llama_tools[0]
|
|
107
|
+
arxiv_tool = tools_factory.get_llama_index_tools(
|
|
108
|
+
tool_package_name="arxiv", tool_spec_name="ArxivToolSpec"
|
|
109
|
+
)[0]
|
|
68
110
|
|
|
69
111
|
self.assertIsInstance(arxiv_tool, VectaraTool)
|
|
70
112
|
self.assertIsInstance(arxiv_tool, FunctionTool)
|
|
71
113
|
self.assertEqual(arxiv_tool.metadata.tool_type, ToolType.QUERY)
|
|
72
114
|
|
|
115
|
+
yfinance_tool = tools_factory.get_llama_index_tools(
|
|
116
|
+
tool_package_name="yahoo_finance", tool_spec_name="YahooFinanceToolSpec"
|
|
117
|
+
)[0]
|
|
118
|
+
self.assertIsInstance(yfinance_tool, VectaraTool)
|
|
119
|
+
self.assertIsInstance(yfinance_tool, FunctionTool)
|
|
120
|
+
self.assertEqual(yfinance_tool.metadata.tool_type, ToolType.QUERY)
|
|
121
|
+
|
|
73
122
|
def test_public_repo(self):
|
|
74
123
|
vectara_corpus_key = "vectara-docs_1"
|
|
75
124
|
vectara_api_key = "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA"
|
|
@@ -80,10 +129,12 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
80
129
|
tool_name="ask_vectara",
|
|
81
130
|
data_description="data from Vectara website",
|
|
82
131
|
assistant_specialty="RAG as a service",
|
|
83
|
-
vectara_summarizer="mockingbird-1.0-2024-07-16"
|
|
132
|
+
vectara_summarizer="mockingbird-1.0-2024-07-16",
|
|
84
133
|
)
|
|
85
134
|
|
|
86
|
-
self.assertIn(
|
|
135
|
+
self.assertIn(
|
|
136
|
+
"Vectara is an end-to-end platform", str(agent.chat("What is Vectara?"))
|
|
137
|
+
)
|
|
87
138
|
|
|
88
139
|
def test_class_method_as_tool(self):
|
|
89
140
|
class TestClass:
|
|
@@ -102,7 +153,7 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
102
153
|
tools=tools,
|
|
103
154
|
topic=topic,
|
|
104
155
|
custom_instructions=instructions,
|
|
105
|
-
agent_config=config
|
|
156
|
+
agent_config=config,
|
|
106
157
|
)
|
|
107
158
|
|
|
108
159
|
self.assertEqual(
|
vectara_agentic/_callback.py
CHANGED
|
@@ -154,7 +154,7 @@ class AgentCallbackHandler(BaseCallbackHandler):
|
|
|
154
154
|
elif event_type == CBEventType.AGENT_STEP:
|
|
155
155
|
self._handle_agent_step(payload, event_id)
|
|
156
156
|
elif event_type == CBEventType.EXCEPTION:
|
|
157
|
-
print(f"Exception: {payload.get(EventPayload.EXCEPTION)}")
|
|
157
|
+
print(f"Exception in handle_event: {payload.get(EventPayload.EXCEPTION)}")
|
|
158
158
|
else:
|
|
159
159
|
print(f"Unknown event type: {event_type}, payload={payload}")
|
|
160
160
|
|
|
@@ -168,7 +168,7 @@ class AgentCallbackHandler(BaseCallbackHandler):
|
|
|
168
168
|
elif event_type == CBEventType.AGENT_STEP:
|
|
169
169
|
await self._ahandle_agent_step(payload, event_id)
|
|
170
170
|
elif event_type == CBEventType.EXCEPTION:
|
|
171
|
-
print(f"Exception: {payload.get(EventPayload.EXCEPTION)}")
|
|
171
|
+
print(f"Exception in ahandle_event: {payload.get(EventPayload.EXCEPTION)}")
|
|
172
172
|
else:
|
|
173
173
|
print(f"Unknown event type: {event_type}, payload={payload}")
|
|
174
174
|
|
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from .types import ObserverType
|
|
9
9
|
from .agent_config import AgentConfig
|
|
10
10
|
|
|
11
|
-
def setup_observer(config: AgentConfig) -> bool:
|
|
11
|
+
def setup_observer(config: AgentConfig, verbose: bool) -> bool:
|
|
12
12
|
'''
|
|
13
13
|
Setup the observer.
|
|
14
14
|
'''
|
|
@@ -31,7 +31,8 @@ def setup_observer(config: AgentConfig) -> bool:
|
|
|
31
31
|
tracer_provider = register(endpoint=phoenix_endpoint, project_name="vectara-agentic")
|
|
32
32
|
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
33
33
|
return True
|
|
34
|
-
|
|
34
|
+
if verbose:
|
|
35
|
+
print("No observer set.")
|
|
35
36
|
return False
|
|
36
37
|
|
|
37
38
|
|
vectara_agentic/_prompts.py
CHANGED
|
@@ -37,10 +37,11 @@ GENERAL_INSTRUCTIONS = """
|
|
|
37
37
|
- If including latex equations in the markdown response, make sure the equations are on a separate line and enclosed in double dollar signs.
|
|
38
38
|
- Always respond in the language of the question, and in text (no images, videos or code).
|
|
39
39
|
- If you are provided with database tools use them for analytical queries (such as counting, calculating max, min, average, sum, or other statistics).
|
|
40
|
-
For each database, the database tools include: x_list_tables, x_load_data, x_describe_tables, and x_load_sample_data, where 'x' in the database name.
|
|
41
|
-
|
|
42
|
-
Before using the x_load_data with a SQL query, always follow these steps:
|
|
43
|
-
-
|
|
40
|
+
For each database, the database tools include: x_list_tables, x_load_data, x_describe_tables, x_load_unique_values, and x_load_sample_data, where 'x' in the database name.
|
|
41
|
+
for example, if the database name is "ev", the tools are: ev_list_tables, ev_load_data, ev_describe_tables, ev_load_unique_values, and ev_load_sample_data.
|
|
42
|
+
Before using the x_load_data with a SQL query, always follow these discovery steps:
|
|
43
|
+
- call the x_list_tables tool to list of available tables in the x database.
|
|
44
|
+
- Call the x_describe_tables tool to understand the schema of each table you want to query data from.
|
|
44
45
|
- Use the x_load_unique_values tool to understand the unique values in each column.
|
|
45
46
|
Sometimes the user may ask for a specific column value, but the actual value in the table may be different, and you will need to use the correct value.
|
|
46
47
|
- Use the x_load_sample_data tool to understand the column names, and typical values in each column.
|
vectara_agentic/_version.py
CHANGED
vectara_agentic/agent.py
CHANGED
|
@@ -252,7 +252,7 @@ class Agent:
|
|
|
252
252
|
|
|
253
253
|
# Setup observability
|
|
254
254
|
try:
|
|
255
|
-
self.observability_enabled = setup_observer(self.agent_config)
|
|
255
|
+
self.observability_enabled = setup_observer(self.agent_config, self.verbose)
|
|
256
256
|
except Exception as e:
|
|
257
257
|
print(f"Failed to set up observer ({e}), ignoring")
|
|
258
258
|
self.observability_enabled = False
|
vectara_agentic/db_tools.py
CHANGED
|
@@ -15,7 +15,6 @@ from sqlalchemy.engine import Engine
|
|
|
15
15
|
from sqlalchemy.exc import NoSuchTableError
|
|
16
16
|
from sqlalchemy.schema import CreateTable
|
|
17
17
|
|
|
18
|
-
from llama_index.core.readers.base import BaseReader
|
|
19
18
|
from llama_index.core.utilities.sql_wrapper import SQLDatabase
|
|
20
19
|
from llama_index.core.schema import Document
|
|
21
20
|
from llama_index.core.tools.function_tool import FunctionTool
|
|
@@ -25,7 +24,7 @@ from llama_index.core.tools.utils import create_schema_from_function
|
|
|
25
24
|
|
|
26
25
|
AsyncCallable = Callable[..., Awaitable[Any]]
|
|
27
26
|
|
|
28
|
-
class DatabaseTools
|
|
27
|
+
class DatabaseTools:
|
|
29
28
|
"""Database tools for vectara-agentic
|
|
30
29
|
This class provides a set of tools to interact with a database.
|
|
31
30
|
It allows you to load data, list tables, describe tables, and load unique values.
|
|
@@ -49,9 +48,11 @@ class DatabaseTools(BaseReader):
|
|
|
49
48
|
user: Optional[str] = None,
|
|
50
49
|
password: Optional[str] = None,
|
|
51
50
|
dbname: Optional[str] = None,
|
|
51
|
+
tool_name_prefix: str = "db",
|
|
52
52
|
**kwargs: Any,
|
|
53
53
|
) -> None:
|
|
54
54
|
self.max_rows = max_rows
|
|
55
|
+
self.tool_name_prefix = tool_name_prefix
|
|
55
56
|
|
|
56
57
|
if sql_database:
|
|
57
58
|
self.sql_database = sql_database
|
|
@@ -86,35 +87,34 @@ class DatabaseTools(BaseReader):
|
|
|
86
87
|
func = getattr(self, fn_name)
|
|
87
88
|
except AttributeError:
|
|
88
89
|
return None
|
|
89
|
-
name = fn_name
|
|
90
|
+
name = self.tool_name_prefix + "_" + fn_name if self.tool_name_prefix else fn_name
|
|
90
91
|
docstring = func.__doc__ or ""
|
|
91
92
|
description = f"{name}{signature(func)}\n{docstring}"
|
|
92
93
|
fn_schema = create_schema_from_function(fn_name, getattr(self, fn_name))
|
|
93
94
|
return ToolMetadata(name=name, description=description, fn_schema=fn_schema)
|
|
94
95
|
|
|
95
|
-
def _load_data(self,
|
|
96
|
+
def _load_data(self, sql_query: str) -> List[Document]:
|
|
96
97
|
documents = []
|
|
97
98
|
with self.sql_database.engine.connect() as connection:
|
|
98
|
-
if
|
|
99
|
+
if sql_query is None:
|
|
99
100
|
raise ValueError("A query parameter is necessary to filter the data")
|
|
100
|
-
result = connection.execute(text(
|
|
101
|
+
result = connection.execute(text(sql_query))
|
|
101
102
|
for item in result.fetchall():
|
|
102
103
|
doc_str = ", ".join([str(entry) for entry in item])
|
|
103
104
|
documents.append(Document(text=doc_str))
|
|
104
105
|
return documents
|
|
105
106
|
|
|
106
|
-
def load_data(self,
|
|
107
|
+
def load_data(self, sql_query: str) -> List[str]:
|
|
107
108
|
"""Query and load data from the Database, returning a list of Documents.
|
|
108
109
|
Args:
|
|
109
|
-
|
|
110
|
+
sql_query (str): an SQL query to filter tables and rows.
|
|
110
111
|
Returns:
|
|
111
|
-
List[
|
|
112
|
+
List[str]: a list of Document objects from the database.
|
|
112
113
|
"""
|
|
113
|
-
|
|
114
|
-
if query is None:
|
|
114
|
+
if sql_query is None:
|
|
115
115
|
raise ValueError("A query parameter is necessary to filter the data")
|
|
116
116
|
|
|
117
|
-
count_query = f"SELECT COUNT(*) FROM ({
|
|
117
|
+
count_query = f"SELECT COUNT(*) FROM ({sql_query})"
|
|
118
118
|
try:
|
|
119
119
|
count_rows = self._load_data(count_query)
|
|
120
120
|
except Exception as e:
|
|
@@ -126,9 +126,9 @@ class DatabaseTools(BaseReader):
|
|
|
126
126
|
"Please refactor your query to make it return less rows. "
|
|
127
127
|
]
|
|
128
128
|
try:
|
|
129
|
-
res = self._load_data(
|
|
129
|
+
res = self._load_data(sql_query)
|
|
130
130
|
except Exception as e:
|
|
131
|
-
return [f"Error ({str(e)}) occurred while executing the query {
|
|
131
|
+
return [f"Error ({str(e)}) occurred while executing the query {sql_query}"]
|
|
132
132
|
return [d.text for d in res]
|
|
133
133
|
|
|
134
134
|
def load_sample_data(self, table_name: str, num_rows: int = 25) -> Any:
|
|
@@ -141,6 +141,11 @@ class DatabaseTools(BaseReader):
|
|
|
141
141
|
Returns:
|
|
142
142
|
Any: The result of the database query.
|
|
143
143
|
"""
|
|
144
|
+
if table_name not in self.list_tables():
|
|
145
|
+
return (
|
|
146
|
+
f"Table {table_name} does not exist in the database."
|
|
147
|
+
f"Valid table names are: {self.list_tables()}"
|
|
148
|
+
)
|
|
144
149
|
try:
|
|
145
150
|
res = self._load_data(f"SELECT * FROM {table_name} LIMIT {num_rows}")
|
|
146
151
|
except Exception as e:
|
|
@@ -162,6 +167,15 @@ class DatabaseTools(BaseReader):
|
|
|
162
167
|
str: A string representation of the table schemas.
|
|
163
168
|
"""
|
|
164
169
|
table_names = tables or [table.name for table in self._metadata.sorted_tables]
|
|
170
|
+
if len(table_names) == 0:
|
|
171
|
+
return "You must specify at least one table name to describe."
|
|
172
|
+
for table_name in table_names:
|
|
173
|
+
if table_name not in self.list_tables():
|
|
174
|
+
return (
|
|
175
|
+
f"Table {table_name} does not exist in the database."
|
|
176
|
+
f"Valid table names are: {self.list_tables()}"
|
|
177
|
+
)
|
|
178
|
+
|
|
165
179
|
table_schemas = []
|
|
166
180
|
for table_name in table_names:
|
|
167
181
|
table = next(
|
|
@@ -186,6 +200,12 @@ class DatabaseTools(BaseReader):
|
|
|
186
200
|
Returns:
|
|
187
201
|
Any: the result of the database query
|
|
188
202
|
"""
|
|
203
|
+
if table_name not in self.list_tables():
|
|
204
|
+
return (
|
|
205
|
+
f"Table {table_name} does not exist in the database."
|
|
206
|
+
f"Valid table names are: {self.list_tables()}"
|
|
207
|
+
)
|
|
208
|
+
|
|
189
209
|
res = {}
|
|
190
210
|
try:
|
|
191
211
|
for column in columns:
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
This module contains the SubQuestionQueryEngine workflow, which is a workflow
|
|
3
3
|
that takes a user question and a list of tools, and outputs a list of sub-questions.
|
|
4
4
|
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
5
7
|
import json
|
|
6
8
|
from pydantic import BaseModel
|
|
7
9
|
|
|
@@ -14,6 +16,7 @@ from llama_index.core.workflow import (
|
|
|
14
16
|
StopEvent,
|
|
15
17
|
)
|
|
16
18
|
|
|
19
|
+
|
|
17
20
|
class SubQuestionQueryWorkflow(Workflow):
|
|
18
21
|
"""
|
|
19
22
|
Workflow for sub-question query engine.
|
|
@@ -24,21 +27,25 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
24
27
|
"""
|
|
25
28
|
Inputs for the workflow.
|
|
26
29
|
"""
|
|
30
|
+
|
|
27
31
|
query: str
|
|
28
32
|
|
|
29
33
|
class OutputsModel(BaseModel):
|
|
30
34
|
"""
|
|
31
35
|
Outputs for the workflow.
|
|
32
36
|
"""
|
|
37
|
+
|
|
33
38
|
response: str
|
|
34
39
|
|
|
35
40
|
# Workflow Event types
|
|
36
41
|
class QueryEvent(Event):
|
|
37
42
|
"""Event for a query."""
|
|
43
|
+
|
|
38
44
|
question: str
|
|
39
45
|
|
|
40
46
|
class AnswerEvent(Event):
|
|
41
47
|
"""Event for an answer."""
|
|
48
|
+
|
|
42
49
|
question: str
|
|
43
50
|
answer: str
|
|
44
51
|
|
|
@@ -51,35 +58,29 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
51
58
|
"""
|
|
52
59
|
if not hasattr(ev, "inputs"):
|
|
53
60
|
raise ValueError("No inputs provided to workflow Start Event.")
|
|
54
|
-
if
|
|
61
|
+
if not isinstance(ev.inputs, self.InputsModel):
|
|
55
62
|
raise ValueError(f"Expected inputs to be of type {self.InputsModel}")
|
|
56
|
-
if hasattr(ev, "inputs"):
|
|
57
|
-
query = ev.inputs.query
|
|
58
|
-
await ctx.set("original_query", query)
|
|
59
|
-
print(f"Query is {await ctx.get('original_query')}")
|
|
60
63
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
raise ValueError("Agent not provided to workflow Start Event.")
|
|
65
|
-
chat_history = [str(msg) for msg in ev.agent.memory.get()]
|
|
64
|
+
query = ev.inputs.query
|
|
65
|
+
await ctx.set("original_query", query)
|
|
66
|
+
print(f"Query is {query}")
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
required_attrs = ["agent", "llm", "tools"]
|
|
69
|
+
for attr in required_attrs:
|
|
70
|
+
if not hasattr(ev, attr):
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"{attr.capitalize()} not provided to workflow Start Event."
|
|
73
|
+
)
|
|
71
74
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
75
|
+
await ctx.set("agent", ev.agent)
|
|
76
|
+
await ctx.set("llm", ev.llm)
|
|
77
|
+
await ctx.set("tools", ev.tools)
|
|
78
|
+
await ctx.set("verbose", getattr(ev, "verbose", False))
|
|
76
79
|
|
|
77
|
-
|
|
78
|
-
await ctx.set("verbose", ev.verbose)
|
|
79
|
-
else:
|
|
80
|
-
await ctx.set("verbose", False)
|
|
80
|
+
chat_history = [str(msg) for msg in ev.agent.memory.get()]
|
|
81
81
|
|
|
82
82
|
llm = await ctx.get("llm")
|
|
83
|
+
original_query = await ctx.get("original_query")
|
|
83
84
|
response = llm.complete(
|
|
84
85
|
f"""
|
|
85
86
|
Given a user question, and a list of tools, output a list of
|
|
@@ -100,7 +101,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
100
101
|
the sub-questions could be:
|
|
101
102
|
- What is the largest city within 50 miles of San Francisco? (answer is San Jose)
|
|
102
103
|
- What is the name of the mayor of San Jose?
|
|
103
|
-
Here is the user question: {
|
|
104
|
+
Here is the user question: {original_query}.
|
|
104
105
|
Here are previous chat messages: {chat_history}.
|
|
105
106
|
And here is the list of tools: {ev.tools}
|
|
106
107
|
""",
|
|
@@ -109,17 +110,35 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
109
110
|
if await ctx.get("verbose"):
|
|
110
111
|
print(f"Sub-questions are {response}")
|
|
111
112
|
|
|
112
|
-
|
|
113
|
-
|
|
113
|
+
response_str = str(response)
|
|
114
|
+
if not response_str:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
f"No response from LLM when generating sub-questions for query {original_query}"
|
|
117
|
+
)
|
|
118
|
+
try:
|
|
119
|
+
data = json.loads(response_str)
|
|
120
|
+
except json.JSONDecodeError as e1:
|
|
121
|
+
match = re.search(r"\{.*\}", response_str, re.DOTALL)
|
|
122
|
+
if not match:
|
|
123
|
+
raise ValueError(f"Invalid LLM response format: {response_str}") from e1
|
|
124
|
+
try:
|
|
125
|
+
data = json.loads(match.group(0))
|
|
126
|
+
except json.JSONDecodeError as e2:
|
|
127
|
+
raise ValueError(f"Invalid LLM response format: {response_str}") from e2
|
|
128
|
+
|
|
129
|
+
sub_questions = data.get("sub_questions")
|
|
130
|
+
if sub_questions is None:
|
|
131
|
+
raise ValueError(f"Invalid LLM response format: {response_str}")
|
|
132
|
+
if not sub_questions:
|
|
133
|
+
raise ValueError("LLM returned empty sub-questions list")
|
|
114
134
|
|
|
115
135
|
await ctx.set("sub_question_count", len(sub_questions))
|
|
116
|
-
|
|
117
136
|
for question in sub_questions:
|
|
118
137
|
ctx.send_event(self.QueryEvent(question=question))
|
|
119
138
|
|
|
120
139
|
return None
|
|
121
140
|
|
|
122
|
-
@step(num_workers=
|
|
141
|
+
@step(num_workers=4)
|
|
123
142
|
async def sub_question(self, ctx: Context, ev: QueryEvent) -> AnswerEvent:
|
|
124
143
|
"""
|
|
125
144
|
Given a sub-question, return the answer to the sub-question, using the agent.
|
|
@@ -131,9 +150,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
131
150
|
return self.AnswerEvent(question=ev.question, answer=str(response))
|
|
132
151
|
|
|
133
152
|
@step
|
|
134
|
-
async def combine_answers(
|
|
135
|
-
self, ctx: Context, ev: AnswerEvent
|
|
136
|
-
) -> StopEvent | None:
|
|
153
|
+
async def combine_answers(self, ctx: Context, ev: AnswerEvent) -> StopEvent | None:
|
|
137
154
|
"""
|
|
138
155
|
Given a list of answers to sub-questions, combine them into a single answer.
|
|
139
156
|
"""
|
|
@@ -144,10 +161,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
144
161
|
return None
|
|
145
162
|
|
|
146
163
|
answers = "\n\n".join(
|
|
147
|
-
|
|
148
|
-
f"Question: {event.question}: \n Answer: {event.answer}"
|
|
149
|
-
for event in ready
|
|
150
|
-
]
|
|
164
|
+
f"Question: {event.question}\nAnswer: {event.answer}" for event in ready
|
|
151
165
|
)
|
|
152
166
|
|
|
153
167
|
prompt = f"""
|
|
@@ -169,8 +183,8 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
169
183
|
if await ctx.get("verbose"):
|
|
170
184
|
print("Final response is", response)
|
|
171
185
|
|
|
172
|
-
|
|
173
|
-
|
|
186
|
+
return StopEvent(result=self.OutputsModel(response=str(response)))
|
|
187
|
+
|
|
174
188
|
|
|
175
189
|
class SequentialSubQuestionsWorkflow(Workflow):
|
|
176
190
|
"""
|
|
@@ -182,17 +196,20 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
182
196
|
"""
|
|
183
197
|
Inputs for the workflow.
|
|
184
198
|
"""
|
|
199
|
+
|
|
185
200
|
query: str
|
|
186
201
|
|
|
187
202
|
class OutputsModel(BaseModel):
|
|
188
203
|
"""
|
|
189
204
|
Outputs for the workflow.
|
|
190
205
|
"""
|
|
206
|
+
|
|
191
207
|
response: str
|
|
192
208
|
|
|
193
209
|
# Workflow Event types
|
|
194
210
|
class QueryEvent(Event):
|
|
195
211
|
"""Event for a query."""
|
|
212
|
+
|
|
196
213
|
question: str
|
|
197
214
|
prev_answer: str
|
|
198
215
|
num: int
|
|
@@ -232,11 +249,12 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
232
249
|
await ctx.set("verbose", ev.verbose)
|
|
233
250
|
else:
|
|
234
251
|
await ctx.set("verbose", False)
|
|
252
|
+
|
|
253
|
+
original_query = await ctx.get("original_query")
|
|
235
254
|
if ev.verbose:
|
|
236
|
-
print(f"Query is {
|
|
255
|
+
print(f"Query is {original_query}")
|
|
237
256
|
|
|
238
257
|
llm = await ctx.get("llm")
|
|
239
|
-
orig_query = await ctx.get("original_query")
|
|
240
258
|
response = llm.complete(
|
|
241
259
|
f"""
|
|
242
260
|
Given a user question, and a list of tools, output a list of
|
|
@@ -257,14 +275,32 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
257
275
|
- Who is the mayor of this city?
|
|
258
276
|
The answer to the first question is San Jose, which is given as context to the second question.
|
|
259
277
|
The answer to the second question is Matt Mahan.
|
|
260
|
-
Here is the user question: {
|
|
278
|
+
Here is the user question: {original_query}.
|
|
261
279
|
Here are previous chat messages: {chat_history}.
|
|
262
280
|
And here is the list of tools: {ev.tools}
|
|
263
281
|
""",
|
|
264
282
|
)
|
|
265
283
|
|
|
266
|
-
|
|
267
|
-
|
|
284
|
+
if not str(response):
|
|
285
|
+
raise ValueError(f"No response from LLM for query {original_query}")
|
|
286
|
+
|
|
287
|
+
response_str = str(response)
|
|
288
|
+
try:
|
|
289
|
+
response_obj = json.loads(response_str)
|
|
290
|
+
except json.JSONDecodeError as e1:
|
|
291
|
+
match = re.search(r"\{.*\}", response_str, re.DOTALL)
|
|
292
|
+
if not match:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
f"Failed to extract JSON object with subquestions from LLM response: {response_str}"
|
|
295
|
+
) from e1
|
|
296
|
+
try:
|
|
297
|
+
response_obj = json.loads(match.group(0))
|
|
298
|
+
except json.JSONDecodeError as e2:
|
|
299
|
+
raise ValueError(
|
|
300
|
+
f"Failed to extract JSON object with subquestions from LLM response: {response_str}"
|
|
301
|
+
) from e2
|
|
302
|
+
|
|
303
|
+
sub_questions = response_obj.get("sub_questions")
|
|
268
304
|
|
|
269
305
|
await ctx.set("sub_questions", sub_questions)
|
|
270
306
|
if await ctx.get("verbose"):
|
|
@@ -273,7 +309,9 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
273
309
|
return self.QueryEvent(question=sub_questions[0], prev_answer="", num=0)
|
|
274
310
|
|
|
275
311
|
@step
|
|
276
|
-
async def sub_question(
|
|
312
|
+
async def sub_question(
|
|
313
|
+
self, ctx: Context, ev: QueryEvent
|
|
314
|
+
) -> StopEvent | QueryEvent:
|
|
277
315
|
"""
|
|
278
316
|
Given a sub-question, return the answer to the sub-question, using the agent.
|
|
279
317
|
"""
|
|
@@ -293,12 +331,11 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
293
331
|
if await ctx.get("verbose"):
|
|
294
332
|
print(f"Answer is {response}")
|
|
295
333
|
|
|
296
|
-
sub_questions = await ctx.get("sub_questions")
|
|
297
334
|
if ev.num + 1 < len(sub_questions):
|
|
298
335
|
return self.QueryEvent(
|
|
299
336
|
question=sub_questions[ev.num + 1],
|
|
300
|
-
prev_answer
|
|
301
|
-
num=ev.num + 1
|
|
337
|
+
prev_answer=response.response,
|
|
338
|
+
num=ev.num + 1,
|
|
302
339
|
)
|
|
303
340
|
|
|
304
341
|
output = self.OutputsModel(response=response.response)
|
vectara_agentic/tools.py
CHANGED
|
@@ -30,6 +30,7 @@ LI_packages = {
|
|
|
30
30
|
"arxiv": ToolType.QUERY,
|
|
31
31
|
"tavily_research": ToolType.QUERY,
|
|
32
32
|
"exa": ToolType.QUERY,
|
|
33
|
+
"brave": ToolType.QUERY,
|
|
33
34
|
"neo4j": ToolType.QUERY,
|
|
34
35
|
"kuzu": ToolType.QUERY,
|
|
35
36
|
"google": {
|
|
@@ -54,13 +55,15 @@ LI_packages = {
|
|
|
54
55
|
"send_message": ToolType.ACTION,
|
|
55
56
|
"fetch_channel": ToolType.QUERY,
|
|
56
57
|
}
|
|
57
|
-
}
|
|
58
|
+
},
|
|
58
59
|
}
|
|
59
60
|
|
|
61
|
+
|
|
60
62
|
class VectaraToolMetadata(ToolMetadata):
|
|
61
63
|
"""
|
|
62
64
|
A subclass of ToolMetadata adding the tool_type attribute.
|
|
63
65
|
"""
|
|
66
|
+
|
|
64
67
|
tool_type: ToolType
|
|
65
68
|
|
|
66
69
|
def __init__(self, tool_type: ToolType, **kwargs):
|
|
@@ -87,7 +90,9 @@ class VectaraTool(FunctionTool):
|
|
|
87
90
|
fn: Optional[Callable[..., Any]] = None,
|
|
88
91
|
async_fn: Optional[AsyncCallable] = None,
|
|
89
92
|
) -> None:
|
|
90
|
-
metadata_dict =
|
|
93
|
+
metadata_dict = (
|
|
94
|
+
metadata.dict() if hasattr(metadata, "dict") else metadata.__dict__
|
|
95
|
+
)
|
|
91
96
|
vm = VectaraToolMetadata(tool_type=tool_type, **metadata_dict)
|
|
92
97
|
super().__init__(fn, vm, async_fn)
|
|
93
98
|
|
|
@@ -106,19 +111,26 @@ class VectaraTool(FunctionTool):
|
|
|
106
111
|
tool_type: ToolType = ToolType.QUERY,
|
|
107
112
|
) -> "VectaraTool":
|
|
108
113
|
tool = FunctionTool.from_defaults(
|
|
109
|
-
fn,
|
|
110
|
-
|
|
114
|
+
fn,
|
|
115
|
+
name,
|
|
116
|
+
description,
|
|
117
|
+
return_direct,
|
|
118
|
+
fn_schema,
|
|
119
|
+
async_fn,
|
|
120
|
+
tool_metadata,
|
|
121
|
+
callback,
|
|
122
|
+
async_callback,
|
|
111
123
|
)
|
|
112
124
|
vectara_tool = cls(
|
|
113
|
-
tool_type=tool_type,
|
|
125
|
+
tool_type=tool_type,
|
|
126
|
+
fn=tool.fn,
|
|
127
|
+
metadata=tool.metadata,
|
|
128
|
+
async_fn=tool.async_fn,
|
|
114
129
|
)
|
|
115
130
|
return vectara_tool
|
|
116
131
|
|
|
117
132
|
def __str__(self) -> str:
|
|
118
|
-
return (
|
|
119
|
-
f"Tool(name={self.metadata.name}, "
|
|
120
|
-
f"Tool metadata={self.metadata})"
|
|
121
|
-
)
|
|
133
|
+
return f"Tool(name={self.metadata.name}, " f"Tool metadata={self.metadata})"
|
|
122
134
|
|
|
123
135
|
def __repr__(self) -> str:
|
|
124
136
|
return str(self)
|
|
@@ -136,19 +148,20 @@ class VectaraTool(FunctionTool):
|
|
|
136
148
|
# If schema is a dict-like object, compare the dict representation
|
|
137
149
|
try:
|
|
138
150
|
# Try to get schema as dict if possible
|
|
139
|
-
if hasattr(self.metadata.fn_schema,
|
|
151
|
+
if hasattr(self.metadata.fn_schema, "schema"):
|
|
140
152
|
self_schema = self.metadata.fn_schema.schema
|
|
141
153
|
other_schema = other.metadata.fn_schema.schema
|
|
142
154
|
|
|
143
155
|
# Compare only properties and required fields
|
|
144
|
-
self_props = self_schema.get(
|
|
145
|
-
other_props = other_schema.get(
|
|
156
|
+
self_props = self_schema.get("properties", {})
|
|
157
|
+
other_props = other_schema.get("properties", {})
|
|
146
158
|
|
|
147
|
-
self_required = self_schema.get(
|
|
148
|
-
other_required = other_schema.get(
|
|
159
|
+
self_required = self_schema.get("required", [])
|
|
160
|
+
other_required = other_schema.get("required", [])
|
|
149
161
|
|
|
150
|
-
return
|
|
151
|
-
|
|
162
|
+
return self_props.keys() == other_props.keys() and set(
|
|
163
|
+
self_required
|
|
164
|
+
) == set(other_required)
|
|
152
165
|
except Exception:
|
|
153
166
|
# If any exception occurs during schema comparison, fall back to name comparison
|
|
154
167
|
pass
|
|
@@ -163,7 +176,7 @@ class VectaraTool(FunctionTool):
|
|
|
163
176
|
except Exception as e:
|
|
164
177
|
err_output = ToolOutput(
|
|
165
178
|
tool_name=self.metadata.name,
|
|
166
|
-
content=f"Tool Malfunction: {str(e)}",
|
|
179
|
+
content=f"Tool {self.metadata.name} Malfunction: {str(e)}",
|
|
167
180
|
raw_input={"args": args, "kwargs": kwargs},
|
|
168
181
|
raw_output={"response": str(e)},
|
|
169
182
|
)
|
|
@@ -177,12 +190,13 @@ class VectaraTool(FunctionTool):
|
|
|
177
190
|
except Exception as e:
|
|
178
191
|
err_output = ToolOutput(
|
|
179
192
|
tool_name=self.metadata.name,
|
|
180
|
-
content=f"Tool Malfunction: {str(e)}",
|
|
193
|
+
content=f"Tool {self.metadata.name} Malfunction: {str(e)}",
|
|
181
194
|
raw_input={"args": args, "kwargs": kwargs},
|
|
182
195
|
raw_output={"response": str(e)},
|
|
183
196
|
)
|
|
184
197
|
return err_output
|
|
185
198
|
|
|
199
|
+
|
|
186
200
|
def _create_tool_from_dynamic_function(
|
|
187
201
|
function: Callable[..., ToolOutput],
|
|
188
202
|
tool_name: str,
|
|
@@ -196,11 +210,17 @@ def _create_tool_from_dynamic_function(
|
|
|
196
210
|
"""
|
|
197
211
|
fields = {}
|
|
198
212
|
for param in base_params:
|
|
199
|
-
default_value =
|
|
213
|
+
default_value = (
|
|
214
|
+
param.default if param.default != inspect.Parameter.empty else ...
|
|
215
|
+
)
|
|
200
216
|
fields[param.name] = (param.annotation, default_value)
|
|
201
217
|
for field_name, field_info in tool_args_schema.model_fields.items():
|
|
202
218
|
if field_name not in fields:
|
|
203
|
-
default_value =
|
|
219
|
+
default_value = (
|
|
220
|
+
field_info.default
|
|
221
|
+
if field_info.default is not PydanticUndefined
|
|
222
|
+
else ...
|
|
223
|
+
)
|
|
204
224
|
fields[field_name] = (field_info.annotation, default_value)
|
|
205
225
|
fn_schema = create_model(f"{tool_name}", **fields)
|
|
206
226
|
|
|
@@ -208,8 +228,16 @@ def _create_tool_from_dynamic_function(
|
|
|
208
228
|
inspect.Parameter(
|
|
209
229
|
name=field_name,
|
|
210
230
|
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
211
|
-
default=
|
|
212
|
-
|
|
231
|
+
default=(
|
|
232
|
+
field_info.default
|
|
233
|
+
if field_info.default is not PydanticUndefined
|
|
234
|
+
else inspect.Parameter.empty
|
|
235
|
+
),
|
|
236
|
+
annotation=(
|
|
237
|
+
field_info.annotation
|
|
238
|
+
if hasattr(field_info, "annotation")
|
|
239
|
+
else field_info
|
|
240
|
+
),
|
|
213
241
|
)
|
|
214
242
|
for field_name, field_info in tool_args_schema.model_fields.items()
|
|
215
243
|
if field_name not in [p.name for p in base_params]
|
|
@@ -217,17 +245,21 @@ def _create_tool_from_dynamic_function(
|
|
|
217
245
|
all_params = base_params + schema_params
|
|
218
246
|
|
|
219
247
|
required_params = [p for p in all_params if p.default is inspect.Parameter.empty]
|
|
220
|
-
optional_params = [
|
|
248
|
+
optional_params = [
|
|
249
|
+
p for p in all_params if p.default is not inspect.Parameter.empty
|
|
250
|
+
]
|
|
221
251
|
sig = inspect.Signature(required_params + optional_params)
|
|
222
252
|
function.__signature__ = sig
|
|
223
253
|
function.__annotations__["return"] = dict[str, Any]
|
|
224
|
-
function.__name__ =
|
|
254
|
+
function.__name__ = re.sub(r"[^A-Za-z0-9_]", "_", tool_name)
|
|
225
255
|
|
|
226
256
|
# Create the tool function signature string
|
|
227
257
|
param_strs = []
|
|
228
258
|
for param in all_params:
|
|
229
259
|
annotation = param.annotation
|
|
230
|
-
type_name =
|
|
260
|
+
type_name = (
|
|
261
|
+
annotation.__name__ if hasattr(annotation, "__name__") else str(annotation)
|
|
262
|
+
)
|
|
231
263
|
param_strs.append(f"{param.name}: {type_name}")
|
|
232
264
|
args_str = ", ".join(param_strs)
|
|
233
265
|
function_str = f"{tool_name}({args_str}) -> str"
|
|
@@ -242,7 +274,10 @@ def _create_tool_from_dynamic_function(
|
|
|
242
274
|
)
|
|
243
275
|
return tool
|
|
244
276
|
|
|
245
|
-
|
|
277
|
+
|
|
278
|
+
def _build_filter_string(
|
|
279
|
+
kwargs: Dict[str, Any], tool_args_type: Dict[str, dict], fixed_filter: str
|
|
280
|
+
) -> str:
|
|
246
281
|
"""
|
|
247
282
|
Build filter string for Vectara from kwargs
|
|
248
283
|
"""
|
|
@@ -256,9 +291,9 @@ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, dict]
|
|
|
256
291
|
|
|
257
292
|
# Determine the prefix for the key. Valid values are "doc" or "part"
|
|
258
293
|
# default to 'doc' if not specified
|
|
259
|
-
tool_args_dict = tool_args_type.get(key, {
|
|
294
|
+
tool_args_dict = tool_args_type.get(key, {"type": "doc", "is_list": False})
|
|
260
295
|
prefix = tool_args_dict.get(key, "doc")
|
|
261
|
-
is_list = tool_args_dict.get(
|
|
296
|
+
is_list = tool_args_dict.get("is_list", False)
|
|
262
297
|
|
|
263
298
|
if prefix not in ["doc", "part"]:
|
|
264
299
|
raise ValueError(
|
|
@@ -311,12 +346,10 @@ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, dict]
|
|
|
311
346
|
range_conditions.append(f"{prefix}.{key} {operator} {end_val}")
|
|
312
347
|
|
|
313
348
|
# Join the range conditions with AND
|
|
314
|
-
filter_parts.append(
|
|
349
|
+
filter_parts.append("( " + " AND ".join(range_conditions) + " )")
|
|
315
350
|
continue
|
|
316
351
|
|
|
317
|
-
raise ValueError(
|
|
318
|
-
f"Range operator requires two values for {key}: {value}"
|
|
319
|
-
)
|
|
352
|
+
raise ValueError(f"Range operator requires two values for {key}: {value}")
|
|
320
353
|
|
|
321
354
|
# Check if value contains a known comparison operator at the start
|
|
322
355
|
matched_operator = None
|
|
@@ -328,7 +361,7 @@ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, dict]
|
|
|
328
361
|
# Break down operator from value
|
|
329
362
|
# e.g. val_str = ">2022" --> operator = ">", rhs = "2022"
|
|
330
363
|
if matched_operator:
|
|
331
|
-
rhs = val_str[len(matched_operator):].strip()
|
|
364
|
+
rhs = val_str[len(matched_operator) :].strip()
|
|
332
365
|
|
|
333
366
|
if matched_operator in numeric_only_ops:
|
|
334
367
|
# Must be numeric
|
|
@@ -342,7 +375,9 @@ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, dict]
|
|
|
342
375
|
if rhs.isdigit() or is_float(rhs):
|
|
343
376
|
filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs}")
|
|
344
377
|
elif rhs.lower() in ["true", "false"]:
|
|
345
|
-
filter_parts.append(
|
|
378
|
+
filter_parts.append(
|
|
379
|
+
f"{prefix}.{key}{matched_operator}{rhs.lower()}"
|
|
380
|
+
)
|
|
346
381
|
else:
|
|
347
382
|
# For string operands, wrap them in quotes
|
|
348
383
|
filter_parts.append(f"{prefix}.{key}{matched_operator}'{rhs}'")
|
|
@@ -372,6 +407,7 @@ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, dict]
|
|
|
372
407
|
else:
|
|
373
408
|
return fixed_filter or filter_str
|
|
374
409
|
|
|
410
|
+
|
|
375
411
|
class VectaraToolFactory:
|
|
376
412
|
"""
|
|
377
413
|
A factory class for creating Vectara RAG tools.
|
|
@@ -479,7 +515,9 @@ class VectaraToolFactory:
|
|
|
479
515
|
top_k = kwargs.pop("top_k", 10)
|
|
480
516
|
summarize = kwargs.pop("summarize", True)
|
|
481
517
|
try:
|
|
482
|
-
filter_string = _build_filter_string(
|
|
518
|
+
filter_string = _build_filter_string(
|
|
519
|
+
kwargs, tool_args_type, fixed_filter
|
|
520
|
+
)
|
|
483
521
|
except ValueError as e:
|
|
484
522
|
return ToolOutput(
|
|
485
523
|
tool_name=search_function.__name__,
|
|
@@ -492,7 +530,11 @@ class VectaraToolFactory:
|
|
|
492
530
|
summary_enabled=False,
|
|
493
531
|
similarity_top_k=top_k,
|
|
494
532
|
reranker=reranker,
|
|
495
|
-
rerank_k=
|
|
533
|
+
rerank_k=(
|
|
534
|
+
rerank_k
|
|
535
|
+
if rerank_k * self.num_corpora <= 100
|
|
536
|
+
else int(100 / self.num_corpora)
|
|
537
|
+
),
|
|
496
538
|
rerank_limit=rerank_limit,
|
|
497
539
|
rerank_cutoff=rerank_cutoff,
|
|
498
540
|
mmr_diversity_bias=mmr_diversity_bias,
|
|
@@ -530,9 +572,7 @@ class VectaraToolFactory:
|
|
|
530
572
|
if summarize:
|
|
531
573
|
summaries_dict = asyncio.run(
|
|
532
574
|
summarize_documents(
|
|
533
|
-
self.vectara_corpus_key,
|
|
534
|
-
self.vectara_api_key,
|
|
535
|
-
list(unique_ids)
|
|
575
|
+
self.vectara_corpus_key, self.vectara_api_key, list(unique_ids)
|
|
536
576
|
)
|
|
537
577
|
)
|
|
538
578
|
for doc_id, metadata in docs:
|
|
@@ -540,7 +580,9 @@ class VectaraToolFactory:
|
|
|
540
580
|
tool_output += f"document_id: '{doc_id}'\nmetadata: '{metadata}'\nsummary: '{summary}'\n\n"
|
|
541
581
|
else:
|
|
542
582
|
for doc_id, metadata in docs:
|
|
543
|
-
tool_output +=
|
|
583
|
+
tool_output += (
|
|
584
|
+
f"document_id: '{doc_id}'\nmetadata: '{metadata}'\n\n"
|
|
585
|
+
)
|
|
544
586
|
|
|
545
587
|
out = ToolOutput(
|
|
546
588
|
tool_name=search_function.__name__,
|
|
@@ -551,16 +593,29 @@ class VectaraToolFactory:
|
|
|
551
593
|
return out
|
|
552
594
|
|
|
553
595
|
base_params = [
|
|
554
|
-
inspect.Parameter(
|
|
555
|
-
|
|
556
|
-
|
|
596
|
+
inspect.Parameter(
|
|
597
|
+
"query", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=str
|
|
598
|
+
),
|
|
599
|
+
inspect.Parameter(
|
|
600
|
+
"top_k", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=int
|
|
601
|
+
),
|
|
602
|
+
inspect.Parameter(
|
|
603
|
+
"summarize",
|
|
604
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
605
|
+
default=True,
|
|
606
|
+
annotation=bool,
|
|
607
|
+
),
|
|
557
608
|
]
|
|
558
|
-
search_tool_extra_desc =
|
|
609
|
+
search_tool_extra_desc = (
|
|
610
|
+
tool_description
|
|
611
|
+
+ "\n"
|
|
612
|
+
+ """
|
|
559
613
|
This tool is meant to perform a search for relevant documents, it is not meant for asking questions.
|
|
560
614
|
The response includes metadata about each relevant document.
|
|
561
615
|
If summarize=True, it also includes a summary of each document, but takes a lot longer to respond,
|
|
562
616
|
so avoid using it unless necessary.
|
|
563
617
|
"""
|
|
618
|
+
)
|
|
564
619
|
|
|
565
620
|
tool = _create_tool_from_dynamic_function(
|
|
566
621
|
search_function,
|
|
@@ -683,7 +738,9 @@ class VectaraToolFactory:
|
|
|
683
738
|
|
|
684
739
|
query = kwargs.pop("query")
|
|
685
740
|
try:
|
|
686
|
-
filter_string = _build_filter_string(
|
|
741
|
+
filter_string = _build_filter_string(
|
|
742
|
+
kwargs, tool_args_type, fixed_filter
|
|
743
|
+
)
|
|
687
744
|
except ValueError as e:
|
|
688
745
|
return ToolOutput(
|
|
689
746
|
tool_name=rag_function.__name__,
|
|
@@ -700,7 +757,11 @@ class VectaraToolFactory:
|
|
|
700
757
|
summary_prompt_name=vectara_summarizer,
|
|
701
758
|
prompt_text=vectara_prompt_text,
|
|
702
759
|
reranker=reranker,
|
|
703
|
-
rerank_k=
|
|
760
|
+
rerank_k=(
|
|
761
|
+
rerank_k
|
|
762
|
+
if rerank_k * self.num_corpora <= 100
|
|
763
|
+
else int(100 / self.num_corpora)
|
|
764
|
+
),
|
|
704
765
|
rerank_limit=rerank_limit,
|
|
705
766
|
rerank_cutoff=rerank_cutoff,
|
|
706
767
|
mmr_diversity_bias=mmr_diversity_bias,
|
|
@@ -792,7 +853,9 @@ class VectaraToolFactory:
|
|
|
792
853
|
return out
|
|
793
854
|
|
|
794
855
|
base_params = [
|
|
795
|
-
inspect.Parameter(
|
|
856
|
+
inspect.Parameter(
|
|
857
|
+
"query", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=str
|
|
858
|
+
),
|
|
796
859
|
]
|
|
797
860
|
tool = _create_tool_from_dynamic_function(
|
|
798
861
|
rag_function,
|
|
@@ -803,6 +866,7 @@ class VectaraToolFactory:
|
|
|
803
866
|
)
|
|
804
867
|
return tool
|
|
805
868
|
|
|
869
|
+
|
|
806
870
|
class ToolsFactory:
|
|
807
871
|
"""
|
|
808
872
|
A factory class for creating agent tools.
|
|
@@ -811,7 +875,9 @@ class ToolsFactory:
|
|
|
811
875
|
def __init__(self, agent_config: AgentConfig = None) -> None:
|
|
812
876
|
self.agent_config = agent_config
|
|
813
877
|
|
|
814
|
-
def create_tool(
|
|
878
|
+
def create_tool(
|
|
879
|
+
self, function: Callable, tool_type: ToolType = ToolType.QUERY
|
|
880
|
+
) -> VectaraTool:
|
|
815
881
|
"""
|
|
816
882
|
Create a tool from a function.
|
|
817
883
|
|
|
@@ -845,7 +911,9 @@ class ToolsFactory:
|
|
|
845
911
|
"""
|
|
846
912
|
# Dynamically install and import the module
|
|
847
913
|
if tool_package_name not in LI_packages:
|
|
848
|
-
raise ValueError(
|
|
914
|
+
raise ValueError(
|
|
915
|
+
f"Tool package {tool_package_name} from LlamaIndex not supported by Vectara-agentic."
|
|
916
|
+
)
|
|
849
917
|
|
|
850
918
|
module_name = f"llama_index.tools.{tool_package_name}"
|
|
851
919
|
module = importlib.import_module(module_name)
|
|
@@ -860,11 +928,18 @@ class ToolsFactory:
|
|
|
860
928
|
tool.metadata.name = tool_name_prefix + "_" + tool.metadata.name
|
|
861
929
|
if isinstance(func_type, dict):
|
|
862
930
|
if tool_spec_name not in func_type.keys():
|
|
863
|
-
raise ValueError(
|
|
931
|
+
raise ValueError(
|
|
932
|
+
f"Tool spec {tool_spec_name} not found in package {tool_package_name}."
|
|
933
|
+
)
|
|
864
934
|
tool_type = func_type[tool_spec_name]
|
|
865
935
|
else:
|
|
866
936
|
tool_type = func_type
|
|
867
|
-
vtool = VectaraTool(
|
|
937
|
+
vtool = VectaraTool(
|
|
938
|
+
tool_type=tool_type,
|
|
939
|
+
fn=tool.fn,
|
|
940
|
+
metadata=tool.metadata,
|
|
941
|
+
async_fn=tool.async_fn,
|
|
942
|
+
)
|
|
868
943
|
vtools.append(vtool)
|
|
869
944
|
return vtools
|
|
870
945
|
|
|
@@ -873,7 +948,10 @@ class ToolsFactory:
|
|
|
873
948
|
Create a list of standard tools.
|
|
874
949
|
"""
|
|
875
950
|
tc = ToolsCatalog(self.agent_config)
|
|
876
|
-
return [
|
|
951
|
+
return [
|
|
952
|
+
self.create_tool(tool)
|
|
953
|
+
for tool in [tc.summarize_text, tc.rephrase_text, tc.critique_text]
|
|
954
|
+
]
|
|
877
955
|
|
|
878
956
|
def guardrail_tools(self) -> List[FunctionTool]:
|
|
879
957
|
"""
|
|
@@ -885,7 +963,9 @@ class ToolsFactory:
|
|
|
885
963
|
"""
|
|
886
964
|
Create a list of financial tools.
|
|
887
965
|
"""
|
|
888
|
-
return self.get_llama_index_tools(
|
|
966
|
+
return self.get_llama_index_tools(
|
|
967
|
+
tool_package_name="yahoo_finance", tool_spec_name="YahooFinanceToolSpec"
|
|
968
|
+
)
|
|
889
969
|
|
|
890
970
|
def legal_tools(self) -> List[FunctionTool]:
|
|
891
971
|
"""
|
|
@@ -917,7 +997,9 @@ class ToolsFactory:
|
|
|
917
997
|
""",
|
|
918
998
|
)
|
|
919
999
|
|
|
920
|
-
return [
|
|
1000
|
+
return [
|
|
1001
|
+
self.create_tool(tool) for tool in [summarize_legal_text, critique_as_judge]
|
|
1002
|
+
]
|
|
921
1003
|
|
|
922
1004
|
def database_tools(
|
|
923
1005
|
self,
|
|
@@ -954,16 +1036,22 @@ class ToolsFactory:
|
|
|
954
1036
|
List[VectaraTool]: A list of VectaraTool objects.
|
|
955
1037
|
"""
|
|
956
1038
|
if sql_database:
|
|
957
|
-
dbt = DatabaseTools(
|
|
1039
|
+
dbt = DatabaseTools(
|
|
1040
|
+
tool_name_prefix=tool_name_prefix,
|
|
1041
|
+
sql_database=sql_database,
|
|
1042
|
+
max_rows=max_rows,
|
|
1043
|
+
)
|
|
958
1044
|
else:
|
|
959
1045
|
if scheme in ["postgresql", "mysql", "sqlite", "mssql", "oracle"]:
|
|
960
1046
|
dbt = DatabaseTools(
|
|
1047
|
+
tool_name_prefix=tool_name_prefix,
|
|
961
1048
|
scheme=scheme,
|
|
962
1049
|
host=host,
|
|
963
1050
|
port=port,
|
|
964
1051
|
user=user,
|
|
965
1052
|
password=password,
|
|
966
1053
|
dbname=dbname,
|
|
1054
|
+
max_rows=max_rows,
|
|
967
1055
|
)
|
|
968
1056
|
else:
|
|
969
1057
|
raise ValueError(
|
|
@@ -977,14 +1065,14 @@ class ToolsFactory:
|
|
|
977
1065
|
for tool in tools:
|
|
978
1066
|
if content_description:
|
|
979
1067
|
tool.metadata.description = (
|
|
980
|
-
tool.metadata.description
|
|
1068
|
+
tool.metadata.description
|
|
1069
|
+
+ f"The database tables include data about {content_description}."
|
|
981
1070
|
)
|
|
982
|
-
if len(tool_name_prefix) > 0:
|
|
983
|
-
tool.metadata.name = tool_name_prefix + "_" + tool.metadata.name
|
|
984
1071
|
vtool = VectaraTool(
|
|
985
1072
|
tool_type=ToolType.QUERY,
|
|
986
|
-
fn=tool.fn,
|
|
987
|
-
|
|
1073
|
+
fn=tool.fn,
|
|
1074
|
+
async_fn=tool.async_fn,
|
|
1075
|
+
metadata=tool.metadata,
|
|
988
1076
|
)
|
|
989
1077
|
vtools.append(vtool)
|
|
990
1078
|
return vtools
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vectara_agentic
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: A Python package for creating AI Assistants and AI Agents with Vectara
|
|
5
5
|
Home-page: https://github.com/vectara/py-vectara-agentic
|
|
6
6
|
Author: Ofer Mendelevitch
|
|
@@ -16,31 +16,31 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
16
16
|
Requires-Python: >=3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: llama-index==0.12.
|
|
19
|
+
Requires-Dist: llama-index==0.12.29
|
|
20
20
|
Requires-Dist: llama-index-indices-managed-vectara==0.4.2
|
|
21
21
|
Requires-Dist: llama-index-agent-llm-compiler==0.3.0
|
|
22
22
|
Requires-Dist: llama-index-agent-lats==0.3.0
|
|
23
23
|
Requires-Dist: llama-index-agent-openai==0.4.6
|
|
24
|
-
Requires-Dist: llama-index-llms-openai==0.3.
|
|
24
|
+
Requires-Dist: llama-index-llms-openai==0.3.32
|
|
25
25
|
Requires-Dist: llama-index-llms-anthropic==0.6.10
|
|
26
26
|
Requires-Dist: llama-index-llms-together==0.3.1
|
|
27
27
|
Requires-Dist: llama-index-llms-groq==0.3.1
|
|
28
28
|
Requires-Dist: llama-index-llms-fireworks==0.3.2
|
|
29
29
|
Requires-Dist: llama-index-llms-cohere==0.4.0
|
|
30
|
-
Requires-Dist: llama-index-llms-gemini==0.4.
|
|
31
|
-
Requires-Dist: llama-index-llms-bedrock==0.3.
|
|
30
|
+
Requires-Dist: llama-index-llms-gemini==0.4.14
|
|
31
|
+
Requires-Dist: llama-index-llms-bedrock==0.3.8
|
|
32
32
|
Requires-Dist: llama-index-tools-yahoo-finance==0.3.0
|
|
33
33
|
Requires-Dist: llama-index-tools-arxiv==0.3.0
|
|
34
34
|
Requires-Dist: llama-index-tools-database==0.3.0
|
|
35
35
|
Requires-Dist: llama-index-tools-google==0.3.0
|
|
36
36
|
Requires-Dist: llama-index-tools-tavily_research==0.3.0
|
|
37
|
+
Requires-Dist: llama_index.tools.brave_search==0.3.0
|
|
37
38
|
Requires-Dist: llama-index-tools-neo4j==0.3.0
|
|
38
|
-
Requires-Dist: llama-index-graph-stores-kuzu==0.
|
|
39
|
+
Requires-Dist: llama-index-graph-stores-kuzu==0.7.0
|
|
39
40
|
Requires-Dist: llama-index-tools-slack==0.3.0
|
|
40
41
|
Requires-Dist: llama-index-tools-exa==0.3.0
|
|
41
|
-
Requires-Dist: tavily-python==0.5.
|
|
42
|
-
Requires-Dist: exa-py==1.
|
|
43
|
-
Requires-Dist: yahoo-finance==1.4.0
|
|
42
|
+
Requires-Dist: tavily-python==0.5.4
|
|
43
|
+
Requires-Dist: exa-py==1.9.1
|
|
44
44
|
Requires-Dist: openinference-instrumentation-llama-index==3.3.3
|
|
45
45
|
Requires-Dist: opentelemetry-proto==1.31.0
|
|
46
46
|
Requires-Dist: arize-phoenix==8.14.1
|
|
@@ -176,7 +176,7 @@ query_financial_reports_tool = vec_factory.create_rag_tool(
|
|
|
176
176
|
)
|
|
177
177
|
```
|
|
178
178
|
|
|
179
|
-
See the [docs](https://vectara.github.io/vectara-agentic
|
|
179
|
+
See the [docs](https://vectara.github.io/py-vectara-agentic/latest/) for additional arguments to customize your Vectara RAG tool.
|
|
180
180
|
|
|
181
181
|
### 3. Create other tools (optional)
|
|
182
182
|
|
|
@@ -205,7 +205,7 @@ agent = Agent(
|
|
|
205
205
|
)
|
|
206
206
|
```
|
|
207
207
|
|
|
208
|
-
See the [docs](https://vectara.github.io/vectara-agentic
|
|
208
|
+
See the [docs](https://vectara.github.io/py-vectara-agentic/latest/) for additional arguments, including `agent_progress_callback` and `query_logging_callback`.
|
|
209
209
|
|
|
210
210
|
### 5. Run a chat interaction
|
|
211
211
|
|
|
@@ -376,7 +376,7 @@ specified in the Agent configuration.
|
|
|
376
376
|
- `load_unique_values`: returns the top unique values for a given column
|
|
377
377
|
|
|
378
378
|
In addition, we include various other tools from LlamaIndex ToolSpecs:
|
|
379
|
-
* Tavily search
|
|
379
|
+
* Tavily search, EXA.AI and Brave Search
|
|
380
380
|
* arxiv
|
|
381
381
|
* neo4j & Kuzu for Graph DB integration
|
|
382
382
|
* Google tools (including gmail, calendar, and search)
|
|
@@ -6,24 +6,24 @@ tests/test_agent_type.py,sha256=JM0Q2GBGHSADoBacz_DW551zWSfbpf7qa8xXqtyWsc4,5671
|
|
|
6
6
|
tests/test_fallback.py,sha256=M5YD7NHZ0joVU1frYIr9_OiRAIje5mrXrYVcekzlyGs,2829
|
|
7
7
|
tests/test_private_llm.py,sha256=CY-_rCpxGUuxnZ3ypkodw5Jj-sJCNdh6rLbCvULwuJI,2247
|
|
8
8
|
tests/test_serialization.py,sha256=Ed23GN2zhSJNdPFrVK4aqLkOhJKviczR_o0t-r9TuRI,4762
|
|
9
|
-
tests/test_tools.py,sha256=
|
|
9
|
+
tests/test_tools.py,sha256=as0rEAKAs6ekvqFDCcq1smRWKhQm5EaH2PUWT8hg1qQ,5726
|
|
10
10
|
tests/test_workflow.py,sha256=lVyrVHdRO5leYNbYtHTmKqMX0c8_xehCpUA7cXQKVsc,2175
|
|
11
11
|
vectara_agentic/__init__.py,sha256=2GLDS3U6KckK-dBRl9v_x1kSV507gEhjOfuMmmu0Qxg,850
|
|
12
|
-
vectara_agentic/_callback.py,sha256=
|
|
13
|
-
vectara_agentic/_observability.py,sha256=
|
|
14
|
-
vectara_agentic/_prompts.py,sha256=
|
|
15
|
-
vectara_agentic/_version.py,sha256=
|
|
16
|
-
vectara_agentic/agent.py,sha256=
|
|
12
|
+
vectara_agentic/_callback.py,sha256=ron49t1t-ox-736WaXzrZ99vhN4NI9bMiHFyj0iIPqg,13062
|
|
13
|
+
vectara_agentic/_observability.py,sha256=BA2zhwa5930aaDUJxHefPlmIPt8kZOuLHVBc9PtYNuU,3839
|
|
14
|
+
vectara_agentic/_prompts.py,sha256=CKbsFrosoM6bPH02t2R5_K3jzVzaxJAl85qO3mEAQ3U,9439
|
|
15
|
+
vectara_agentic/_version.py,sha256=uneBdCHiroBsYz5R-8jYqrJU6UuA9Nu4vCqLtO0VSsE,66
|
|
16
|
+
vectara_agentic/agent.py,sha256=KX0VYQuGFkK_CELjUFdxXWYHng32GFjsLdRdH-gR7aM,43970
|
|
17
17
|
vectara_agentic/agent_config.py,sha256=E-rtYMcpoGxnEAyy8231bizo2n0uGQ2qWxuSgTEfwdQ,4327
|
|
18
18
|
vectara_agentic/agent_endpoint.py,sha256=QIMejCLlpW2qzXxeDAxv3anF46XMDdVMdKGWhJh3azY,1996
|
|
19
|
-
vectara_agentic/db_tools.py,sha256=
|
|
20
|
-
vectara_agentic/sub_query_workflow.py,sha256=
|
|
21
|
-
vectara_agentic/tools.py,sha256=
|
|
19
|
+
vectara_agentic/db_tools.py,sha256=zhP1KIRNiE6BKD69VGmUdcjeKSZ6g0kTIsJdTDNCuv4,11141
|
|
20
|
+
vectara_agentic/sub_query_workflow.py,sha256=xjySd2qjLAKwK6XuS0R0PTyk2uXraHCgCbDP1xDoFVI,12175
|
|
21
|
+
vectara_agentic/tools.py,sha256=n06CwlEqOHlawEJj6BX8xHM5-kMrBQO48Jo68GKRKes,43874
|
|
22
22
|
vectara_agentic/tools_catalog.py,sha256=oiw3wAfbpFhh0_6rMvZsyPqWV6QIzHqhZCNzqRxuyV8,4818
|
|
23
23
|
vectara_agentic/types.py,sha256=HcS7vR8P2v2xQTlOc6ZFV2vvlr3OpzSNWhtcLMxqUZc,1792
|
|
24
24
|
vectara_agentic/utils.py,sha256=4vA5MyNoG47_7eHuLFQByiG_FHWbrQ6ZJDsdqHUwiJA,7720
|
|
25
|
-
vectara_agentic-0.2.
|
|
26
|
-
vectara_agentic-0.2.
|
|
27
|
-
vectara_agentic-0.2.
|
|
28
|
-
vectara_agentic-0.2.
|
|
29
|
-
vectara_agentic-0.2.
|
|
25
|
+
vectara_agentic-0.2.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
+
vectara_agentic-0.2.11.dist-info/METADATA,sha256=c0ue2vnkkIwgNueoHQILYkui5eTDbXB7-SnBBnbWK0A,25088
|
|
27
|
+
vectara_agentic-0.2.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
28
|
+
vectara_agentic-0.2.11.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
|
|
29
|
+
vectara_agentic-0.2.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|