vectara-agentic 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/test_agent.py +2 -2
- tests/test_agent_planning.py +1 -1
- tests/test_groq.py +3 -1
- tests/test_tools.py +119 -17
- tests/test_workflow.py +67 -0
- vectara_agentic/_observability.py +1 -1
- vectara_agentic/_prompts.py +4 -2
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +147 -69
- vectara_agentic/db_tools.py +2 -2
- vectara_agentic/llm_utils.py +10 -6
- vectara_agentic/sub_query_workflow.py +5 -2
- vectara_agentic/tool_utils.py +112 -84
- vectara_agentic/tools.py +3 -1
- vectara_agentic/tools_catalog.py +1 -0
- vectara_agentic/utils.py +1 -1
- {vectara_agentic-0.2.15.dist-info → vectara_agentic-0.2.17.dist-info}/METADATA +62 -19
- vectara_agentic-0.2.17.dist-info/RECORD +34 -0
- {vectara_agentic-0.2.15.dist-info → vectara_agentic-0.2.17.dist-info}/WHEEL +1 -1
- vectara_agentic-0.2.15.dist-info/RECORD +0 -34
- {vectara_agentic-0.2.15.dist-info → vectara_agentic-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.2.15.dist-info → vectara_agentic-0.2.17.dist-info}/top_level.txt +0 -0
vectara_agentic/agent.py
CHANGED
|
@@ -14,14 +14,11 @@ import importlib
|
|
|
14
14
|
from collections import Counter
|
|
15
15
|
import inspect
|
|
16
16
|
from inspect import Signature, Parameter, ismethod
|
|
17
|
-
|
|
17
|
+
from pydantic import Field, create_model, ValidationError, BaseModel
|
|
18
18
|
import cloudpickle as pickle
|
|
19
19
|
|
|
20
20
|
from dotenv import load_dotenv
|
|
21
21
|
|
|
22
|
-
from pydantic import Field, create_model, ValidationError
|
|
23
|
-
|
|
24
|
-
|
|
25
22
|
from llama_index.core.memory import ChatMemoryBuffer
|
|
26
23
|
from llama_index.core.llms import ChatMessage, MessageRole
|
|
27
24
|
from llama_index.core.tools import FunctionTool
|
|
@@ -38,7 +35,7 @@ from llama_index.core.callbacks.base_handler import BaseCallbackHandler
|
|
|
38
35
|
from llama_index.agent.openai import OpenAIAgent
|
|
39
36
|
from llama_index.core.agent.runner.base import AgentRunner
|
|
40
37
|
from llama_index.core.agent.types import BaseAgent
|
|
41
|
-
from llama_index.core.workflow import Workflow
|
|
38
|
+
from llama_index.core.workflow import Workflow, Context
|
|
42
39
|
|
|
43
40
|
from .types import (
|
|
44
41
|
AgentType,
|
|
@@ -145,21 +142,53 @@ def get_field_type(field_schema: dict) -> Any:
|
|
|
145
142
|
"array": list,
|
|
146
143
|
"object": dict,
|
|
147
144
|
"number": float,
|
|
145
|
+
"null": type(None),
|
|
148
146
|
}
|
|
147
|
+
if not field_schema: # Handles empty schema {}
|
|
148
|
+
return Any
|
|
149
|
+
|
|
149
150
|
if "anyOf" in field_schema:
|
|
150
151
|
types = []
|
|
151
|
-
for
|
|
152
|
-
#
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
else:
|
|
156
|
-
types.append(Any)
|
|
157
|
-
# Return a Union of the types. For example, Union[str, int]
|
|
152
|
+
for option_schema in field_schema["anyOf"]:
|
|
153
|
+
types.append(get_field_type(option_schema)) # Recursive call
|
|
154
|
+
if not types:
|
|
155
|
+
return Any
|
|
158
156
|
return Union[tuple(types)]
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
157
|
+
|
|
158
|
+
if "type" in field_schema and isinstance(field_schema["type"], list):
|
|
159
|
+
types = []
|
|
160
|
+
for type_name in field_schema["type"]:
|
|
161
|
+
if type_name == "array":
|
|
162
|
+
item_schema = field_schema.get("items", {})
|
|
163
|
+
types.append(List[get_field_type(item_schema)])
|
|
164
|
+
elif type_name in json_type_to_python:
|
|
165
|
+
types.append(json_type_to_python[type_name])
|
|
166
|
+
else:
|
|
167
|
+
types.append(Any) # Fallback for unknown types in the list
|
|
168
|
+
if not types:
|
|
169
|
+
return Any
|
|
170
|
+
return Union[tuple(types)] # type: ignore
|
|
171
|
+
|
|
172
|
+
if "type" in field_schema:
|
|
173
|
+
schema_type_name = field_schema["type"]
|
|
174
|
+
if schema_type_name == "array":
|
|
175
|
+
item_schema = field_schema.get(
|
|
176
|
+
"items", {}
|
|
177
|
+
) # Default to Any if "items" is missing
|
|
178
|
+
return List[get_field_type(item_schema)]
|
|
179
|
+
|
|
180
|
+
return json_type_to_python.get(schema_type_name, Any)
|
|
181
|
+
|
|
182
|
+
# If only "items" is present (implies array by some conventions, but less standard)
|
|
183
|
+
# Or if it's a schema with other keywords like 'properties' (implying object)
|
|
184
|
+
# For simplicity, if no "type" or "anyOf" at this point, default to Any or add more specific handling.
|
|
185
|
+
# If 'properties' in field_schema or 'additionalProperties' in field_schema, it's likely an object.
|
|
186
|
+
if "properties" in field_schema or "additionalProperties" in field_schema:
|
|
187
|
+
# This path might need to reconstruct a nested Pydantic model if you encounter such schemas.
|
|
188
|
+
# For now, treating as 'dict' or 'Any' might be a simpler placeholder.
|
|
189
|
+
return dict # Or Any, or more sophisticated object reconstruction.
|
|
190
|
+
|
|
191
|
+
return Any
|
|
163
192
|
|
|
164
193
|
|
|
165
194
|
class Agent:
|
|
@@ -169,7 +198,7 @@ class Agent:
|
|
|
169
198
|
|
|
170
199
|
def __init__(
|
|
171
200
|
self,
|
|
172
|
-
tools:
|
|
201
|
+
tools: List[FunctionTool],
|
|
173
202
|
topic: str = "general",
|
|
174
203
|
custom_instructions: str = "",
|
|
175
204
|
general_instructions: str = GENERAL_INSTRUCTIONS,
|
|
@@ -184,7 +213,7 @@ class Agent:
|
|
|
184
213
|
fallback_agent_config: Optional[AgentConfig] = None,
|
|
185
214
|
chat_history: Optional[list[Tuple[str, str]]] = None,
|
|
186
215
|
validate_tools: bool = False,
|
|
187
|
-
workflow_cls: Workflow = None,
|
|
216
|
+
workflow_cls: Optional[Workflow] = None,
|
|
188
217
|
workflow_timeout: int = 120,
|
|
189
218
|
) -> None:
|
|
190
219
|
"""
|
|
@@ -258,13 +287,15 @@ class Agent:
|
|
|
258
287
|
A tool is invalid if it is mentioned in the instructions but not in the tools list.
|
|
259
288
|
A tool's name must have at least two characters.
|
|
260
289
|
Your response should be a comma-separated list of the invalid tools.
|
|
261
|
-
If
|
|
290
|
+
If no invalid tools exist, respond with "<OKAY>" (and nothing else).
|
|
262
291
|
"""
|
|
263
292
|
llm = get_llm(LLMRole.MAIN, config=self.agent_config)
|
|
264
|
-
bad_tools_str = llm.complete(prompt).text
|
|
293
|
+
bad_tools_str = llm.complete(prompt).text.strip('\n')
|
|
265
294
|
if bad_tools_str and bad_tools_str != "<OKAY>":
|
|
266
295
|
bad_tools = [tool.strip() for tool in bad_tools_str.split(",")]
|
|
267
|
-
numbered = ", ".join(
|
|
296
|
+
numbered = ", ".join(
|
|
297
|
+
f"({i}) {tool}" for i, tool in enumerate(bad_tools, 1)
|
|
298
|
+
)
|
|
268
299
|
raise ValueError(
|
|
269
300
|
f"The Agent custom instructions mention these invalid tools: {numbered}"
|
|
270
301
|
)
|
|
@@ -612,7 +643,7 @@ class Agent:
|
|
|
612
643
|
validate_tools: bool = False,
|
|
613
644
|
fallback_agent_config: Optional[AgentConfig] = None,
|
|
614
645
|
chat_history: Optional[list[Tuple[str, str]]] = None,
|
|
615
|
-
workflow_cls: Workflow = None,
|
|
646
|
+
workflow_cls: Optional[Workflow] = None,
|
|
616
647
|
workflow_timeout: int = 120,
|
|
617
648
|
) -> "Agent":
|
|
618
649
|
"""
|
|
@@ -681,8 +712,8 @@ class Agent:
|
|
|
681
712
|
vectara_rerank_limit: Optional[int] = None,
|
|
682
713
|
vectara_rerank_cutoff: Optional[float] = None,
|
|
683
714
|
vectara_diversity_bias: float = 0.2,
|
|
684
|
-
vectara_udf_expression: str = None,
|
|
685
|
-
vectara_rerank_chain: List[Dict] = None,
|
|
715
|
+
vectara_udf_expression: Optional[str] = None,
|
|
716
|
+
vectara_rerank_chain: Optional[List[Dict]] = None,
|
|
686
717
|
vectara_n_sentences_before: int = 2,
|
|
687
718
|
vectara_n_sentences_after: int = 2,
|
|
688
719
|
vectara_summary_num_results: int = 10,
|
|
@@ -1016,7 +1047,7 @@ class Agent:
|
|
|
1016
1047
|
time.sleep(1)
|
|
1017
1048
|
attempt += 1
|
|
1018
1049
|
|
|
1019
|
-
return
|
|
1050
|
+
return AgentStreamingResponse(
|
|
1020
1051
|
response=(
|
|
1021
1052
|
f"For {orig_llm} LLM - failure can't be resolved after "
|
|
1022
1053
|
f"{max_attempts} attempts ({last_error})."
|
|
@@ -1028,7 +1059,11 @@ class Agent:
|
|
|
1028
1059
|
# workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
|
|
1029
1060
|
# the inputs argument comes from the call to run()
|
|
1030
1061
|
#
|
|
1031
|
-
async def run(
|
|
1062
|
+
async def run(
|
|
1063
|
+
self,
|
|
1064
|
+
inputs: Any,
|
|
1065
|
+
verbose: bool = False,
|
|
1066
|
+
) -> Any:
|
|
1032
1067
|
"""
|
|
1033
1068
|
Run a workflow using the agent.
|
|
1034
1069
|
workflow class must be provided in the agent constructor.
|
|
@@ -1036,7 +1071,7 @@ class Agent:
|
|
|
1036
1071
|
inputs (Any): The inputs to the workflow.
|
|
1037
1072
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
|
1038
1073
|
Returns:
|
|
1039
|
-
Any: The output of the workflow.
|
|
1074
|
+
Any: The output or context of the workflow.
|
|
1040
1075
|
"""
|
|
1041
1076
|
# Create workflow
|
|
1042
1077
|
if self.workflow_cls:
|
|
@@ -1048,20 +1083,38 @@ class Agent:
|
|
|
1048
1083
|
if not isinstance(inputs, self.workflow_cls.InputsModel):
|
|
1049
1084
|
raise ValueError(f"Inputs must be an instance of {workflow.InputsModel}.")
|
|
1050
1085
|
|
|
1051
|
-
|
|
1052
|
-
result = await workflow.run(
|
|
1053
|
-
agent=self,
|
|
1054
|
-
tools=self.tools,
|
|
1055
|
-
llm=self.llm,
|
|
1056
|
-
verbose=verbose,
|
|
1057
|
-
inputs=inputs,
|
|
1058
|
-
)
|
|
1059
|
-
|
|
1060
|
-
# return output in the form of workflow.OutputsModel
|
|
1086
|
+
workflow_context = Context(workflow=workflow)
|
|
1061
1087
|
try:
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1088
|
+
# run workflow
|
|
1089
|
+
result = await workflow.run(
|
|
1090
|
+
ctx=workflow_context,
|
|
1091
|
+
agent=self,
|
|
1092
|
+
tools=self.tools,
|
|
1093
|
+
llm=self.llm,
|
|
1094
|
+
verbose=verbose,
|
|
1095
|
+
inputs=inputs,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
# return output in the form of workflow.OutputsModel(BaseModel)
|
|
1099
|
+
try:
|
|
1100
|
+
output = workflow.OutputsModel.model_validate(result)
|
|
1101
|
+
except ValidationError as e:
|
|
1102
|
+
raise ValueError(f"Failed to map workflow output to model: {e}") from e
|
|
1103
|
+
|
|
1104
|
+
except Exception as e:
|
|
1105
|
+
outputs_model_on_fail_cls = getattr(workflow.__class__, "OutputModelOnFail", None)
|
|
1106
|
+
if outputs_model_on_fail_cls:
|
|
1107
|
+
model_fields = outputs_model_on_fail_cls.model_fields
|
|
1108
|
+
input_dict = {
|
|
1109
|
+
key: await workflow_context.get(key, None)
|
|
1110
|
+
for key in model_fields
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
# return output in the form of workflow.OutputModelOnFail(BaseModel)
|
|
1114
|
+
output = outputs_model_on_fail_cls.model_validate(input_dict)
|
|
1115
|
+
else:
|
|
1116
|
+
print(f"Vectara Agentic: Workflow failed with unexpected error: {e}")
|
|
1117
|
+
raise type(e)(str(e)).with_traceback(e.__traceback__)
|
|
1065
1118
|
|
|
1066
1119
|
return output
|
|
1067
1120
|
|
|
@@ -1086,12 +1139,12 @@ class Agent:
|
|
|
1086
1139
|
fn_schema_serialized = {
|
|
1087
1140
|
"schema": (
|
|
1088
1141
|
fn_schema_cls.model_json_schema()
|
|
1089
|
-
if hasattr(fn_schema_cls, "model_json_schema")
|
|
1142
|
+
if fn_schema_cls and hasattr(fn_schema_cls, "model_json_schema")
|
|
1090
1143
|
else None
|
|
1091
1144
|
),
|
|
1092
1145
|
"metadata": {
|
|
1093
|
-
"module": fn_schema_cls.__module__,
|
|
1094
|
-
"class": fn_schema_cls.__name__,
|
|
1146
|
+
"module": fn_schema_cls.__module__ if fn_schema_cls else None,
|
|
1147
|
+
"class": fn_schema_cls.__name__ if fn_schema_cls else None,
|
|
1095
1148
|
},
|
|
1096
1149
|
}
|
|
1097
1150
|
else:
|
|
@@ -1140,44 +1193,69 @@ class Agent:
|
|
|
1140
1193
|
if data.get("fallback_agent_config")
|
|
1141
1194
|
else None
|
|
1142
1195
|
)
|
|
1143
|
-
tools = []
|
|
1196
|
+
tools: list[FunctionTool] = []
|
|
1144
1197
|
|
|
1145
1198
|
for tool_data in data["tools"]:
|
|
1146
|
-
|
|
1199
|
+
query_args_model = None
|
|
1147
1200
|
if tool_data.get("fn_schema"):
|
|
1148
1201
|
schema_info = tool_data["fn_schema"]
|
|
1149
1202
|
try:
|
|
1150
1203
|
module_name = schema_info["metadata"]["module"]
|
|
1151
1204
|
class_name = schema_info["metadata"]["class"]
|
|
1152
1205
|
mod = importlib.import_module(module_name)
|
|
1153
|
-
|
|
1154
|
-
|
|
1206
|
+
candidate_cls = getattr(mod, class_name)
|
|
1207
|
+
if inspect.isclass(candidate_cls) and issubclass(
|
|
1208
|
+
candidate_cls, BaseModel
|
|
1209
|
+
):
|
|
1210
|
+
query_args_model = candidate_cls
|
|
1211
|
+
else:
|
|
1212
|
+
# It's not the Pydantic model class we expected (e.g., it's the function itself)
|
|
1213
|
+
# Force fallback to JSON schema reconstruction by raising an error.
|
|
1214
|
+
raise ImportError(
|
|
1215
|
+
f"Retrieved '{class_name}' from '{module_name}' is not a Pydantic BaseModel class. "
|
|
1216
|
+
"Falling back to JSON schema reconstruction."
|
|
1217
|
+
)
|
|
1155
1218
|
except Exception:
|
|
1156
1219
|
# Fallback: rebuild using the JSON schema
|
|
1157
1220
|
field_definitions = {}
|
|
1158
|
-
|
|
1159
|
-
|
|
1221
|
+
json_schema_to_rebuild = schema_info.get("schema")
|
|
1222
|
+
if json_schema_to_rebuild and isinstance(
|
|
1223
|
+
json_schema_to_rebuild, dict
|
|
1160
1224
|
):
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1225
|
+
for field, values in json_schema_to_rebuild.get(
|
|
1226
|
+
"properties", {}
|
|
1227
|
+
).items():
|
|
1228
|
+
field_type = get_field_type(values)
|
|
1229
|
+
field_description = values.get(
|
|
1230
|
+
"description"
|
|
1231
|
+
) # Defaults to None
|
|
1232
|
+
if "default" in values:
|
|
1233
|
+
field_definitions[field] = (
|
|
1234
|
+
field_type,
|
|
1235
|
+
Field(
|
|
1236
|
+
description=field_description,
|
|
1237
|
+
default=values["default"],
|
|
1238
|
+
),
|
|
1239
|
+
)
|
|
1240
|
+
else:
|
|
1241
|
+
field_definitions[field] = (
|
|
1242
|
+
field_type,
|
|
1243
|
+
Field(description=field_description),
|
|
1244
|
+
)
|
|
1245
|
+
query_args_model = create_model(
|
|
1246
|
+
json_schema_to_rebuild.get(
|
|
1247
|
+
"title", f"{tool_data['name']}_QueryArgs"
|
|
1248
|
+
),
|
|
1249
|
+
**field_definitions,
|
|
1250
|
+
)
|
|
1251
|
+
else: # If schema part is missing or not a dict, create a default empty model
|
|
1252
|
+
query_args_model = create_model(
|
|
1253
|
+
f"{tool_data['name']}_QueryArgs"
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
# If fn_schema was not in tool_data or reconstruction failed badly, default to empty pydantic model
|
|
1257
|
+
if query_args_model is None:
|
|
1258
|
+
query_args_model = create_model(f"{tool_data['name']}_QueryArgs")
|
|
1181
1259
|
|
|
1182
1260
|
fn = (
|
|
1183
1261
|
pickle.loads(tool_data["fn"].encode("latin-1"))
|
vectara_agentic/db_tools.py
CHANGED
|
@@ -112,7 +112,7 @@ class DatabaseTools:
|
|
|
112
112
|
List[str]: a list of Document objects from the database.
|
|
113
113
|
"""
|
|
114
114
|
if sql_query is None:
|
|
115
|
-
raise ValueError("A query parameter is necessary to filter the data")
|
|
115
|
+
raise ValueError("A query parameter is necessary to filter the data.")
|
|
116
116
|
|
|
117
117
|
count_query = f"SELECT COUNT(*) FROM ({sql_query})"
|
|
118
118
|
try:
|
|
@@ -123,7 +123,7 @@ class DatabaseTools:
|
|
|
123
123
|
if num_rows > self.max_rows:
|
|
124
124
|
return [
|
|
125
125
|
f"The query is expected to return more than {self.max_rows} rows. "
|
|
126
|
-
"Please refactor your query to make it return less rows. "
|
|
126
|
+
"Please refactor your query to make it return less rows and try again. "
|
|
127
127
|
]
|
|
128
128
|
try:
|
|
129
129
|
res = self._load_data(sql_query)
|
vectara_agentic/llm_utils.py
CHANGED
|
@@ -69,12 +69,16 @@ def get_tokenizer_for_model(
|
|
|
69
69
|
"""
|
|
70
70
|
Get the tokenizer for the specified model, as determined by the role & config.
|
|
71
71
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
72
|
+
try:
|
|
73
|
+
model_provider, model_name = _get_llm_params_for_role(role, config)
|
|
74
|
+
if model_provider == ModelProvider.OPENAI:
|
|
75
|
+
# This might raise an exception if the model_name is unknown to tiktoken
|
|
76
|
+
return tiktoken.encoding_for_model(model_name).encode
|
|
77
|
+
if model_provider == ModelProvider.ANTHROPIC:
|
|
78
|
+
return Anthropic().tokenizer
|
|
79
|
+
except Exception:
|
|
80
|
+
print(f"Error getting tokenizer for model {model_name}, ignoring")
|
|
81
|
+
return None
|
|
78
82
|
return None
|
|
79
83
|
|
|
80
84
|
|
|
@@ -50,7 +50,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
50
50
|
answer: str
|
|
51
51
|
|
|
52
52
|
@step
|
|
53
|
-
async def query(self, ctx: Context, ev: StartEvent) -> QueryEvent:
|
|
53
|
+
async def query(self, ctx: Context, ev: StartEvent) -> QueryEvent | None:
|
|
54
54
|
"""
|
|
55
55
|
Given a user question, and a list of tools, output a list of relevant
|
|
56
56
|
sub-questions, such that the answers to all the sub-questions put together
|
|
@@ -130,7 +130,10 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
130
130
|
if sub_questions is None:
|
|
131
131
|
raise ValueError(f"Invalid LLM response format: {response_str}")
|
|
132
132
|
if not sub_questions:
|
|
133
|
-
|
|
133
|
+
# If the LLM returns an empty list, we need to handle it gracefully
|
|
134
|
+
# We use the original query as a single question fallback
|
|
135
|
+
print("LLM returned empty sub-questions list")
|
|
136
|
+
sub_questions = [original_query]
|
|
134
137
|
|
|
135
138
|
await ctx.set("sub_question_count", len(sub_questions))
|
|
136
139
|
for question in sub_questions:
|