vectara-agentic 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

vectara_agentic/agent.py CHANGED
@@ -14,14 +14,11 @@ import importlib
14
14
  from collections import Counter
15
15
  import inspect
16
16
  from inspect import Signature, Parameter, ismethod
17
-
17
+ from pydantic import Field, create_model, ValidationError, BaseModel
18
18
  import cloudpickle as pickle
19
19
 
20
20
  from dotenv import load_dotenv
21
21
 
22
- from pydantic import Field, create_model, ValidationError
23
-
24
-
25
22
  from llama_index.core.memory import ChatMemoryBuffer
26
23
  from llama_index.core.llms import ChatMessage, MessageRole
27
24
  from llama_index.core.tools import FunctionTool
@@ -38,7 +35,7 @@ from llama_index.core.callbacks.base_handler import BaseCallbackHandler
38
35
  from llama_index.agent.openai import OpenAIAgent
39
36
  from llama_index.core.agent.runner.base import AgentRunner
40
37
  from llama_index.core.agent.types import BaseAgent
41
- from llama_index.core.workflow import Workflow
38
+ from llama_index.core.workflow import Workflow, Context
42
39
 
43
40
  from .types import (
44
41
  AgentType,
@@ -145,21 +142,53 @@ def get_field_type(field_schema: dict) -> Any:
145
142
  "array": list,
146
143
  "object": dict,
147
144
  "number": float,
145
+ "null": type(None),
148
146
  }
147
+ if not field_schema: # Handles empty schema {}
148
+ return Any
149
+
149
150
  if "anyOf" in field_schema:
150
151
  types = []
151
- for option in field_schema["anyOf"]:
152
- # If the option has a type, convert it; otherwise, use Any.
153
- if "type" in option:
154
- types.append(json_type_to_python.get(option["type"], Any))
155
- else:
156
- types.append(Any)
157
- # Return a Union of the types. For example, Union[str, int]
152
+ for option_schema in field_schema["anyOf"]:
153
+ types.append(get_field_type(option_schema)) # Recursive call
154
+ if not types:
155
+ return Any
158
156
  return Union[tuple(types)]
159
- elif "type" in field_schema:
160
- return json_type_to_python.get(field_schema["type"], Any)
161
- else:
162
- return Any
157
+
158
+ if "type" in field_schema and isinstance(field_schema["type"], list):
159
+ types = []
160
+ for type_name in field_schema["type"]:
161
+ if type_name == "array":
162
+ item_schema = field_schema.get("items", {})
163
+ types.append(List[get_field_type(item_schema)])
164
+ elif type_name in json_type_to_python:
165
+ types.append(json_type_to_python[type_name])
166
+ else:
167
+ types.append(Any) # Fallback for unknown types in the list
168
+ if not types:
169
+ return Any
170
+ return Union[tuple(types)] # type: ignore
171
+
172
+ if "type" in field_schema:
173
+ schema_type_name = field_schema["type"]
174
+ if schema_type_name == "array":
175
+ item_schema = field_schema.get(
176
+ "items", {}
177
+ ) # Default to Any if "items" is missing
178
+ return List[get_field_type(item_schema)]
179
+
180
+ return json_type_to_python.get(schema_type_name, Any)
181
+
182
+ # If only "items" is present (implies array by some conventions, but less standard)
183
+ # Or if it's a schema with other keywords like 'properties' (implying object)
184
+ # For simplicity, if no "type" or "anyOf" at this point, default to Any or add more specific handling.
185
+ # If 'properties' in field_schema or 'additionalProperties' in field_schema, it's likely an object.
186
+ if "properties" in field_schema or "additionalProperties" in field_schema:
187
+ # This path might need to reconstruct a nested Pydantic model if you encounter such schemas.
188
+ # For now, treating as 'dict' or 'Any' might be a simpler placeholder.
189
+ return dict # Or Any, or more sophisticated object reconstruction.
190
+
191
+ return Any
163
192
 
164
193
 
165
194
  class Agent:
@@ -169,7 +198,7 @@ class Agent:
169
198
 
170
199
  def __init__(
171
200
  self,
172
- tools: list[FunctionTool],
201
+ tools: List[FunctionTool],
173
202
  topic: str = "general",
174
203
  custom_instructions: str = "",
175
204
  general_instructions: str = GENERAL_INSTRUCTIONS,
@@ -184,7 +213,7 @@ class Agent:
184
213
  fallback_agent_config: Optional[AgentConfig] = None,
185
214
  chat_history: Optional[list[Tuple[str, str]]] = None,
186
215
  validate_tools: bool = False,
187
- workflow_cls: Workflow = None,
216
+ workflow_cls: Optional[Workflow] = None,
188
217
  workflow_timeout: int = 120,
189
218
  ) -> None:
190
219
  """
@@ -258,13 +287,15 @@ class Agent:
258
287
  A tool is invalid if it is mentioned in the instructions but not in the tools list.
259
288
  A tool's name must have at least two characters.
260
289
  Your response should be a comma-separated list of the invalid tools.
261
- If not invalid tools exist, respond with "<OKAY>".
290
+ If no invalid tools exist, respond with "<OKAY>" (and nothing else).
262
291
  """
263
292
  llm = get_llm(LLMRole.MAIN, config=self.agent_config)
264
- bad_tools_str = llm.complete(prompt).text
293
+ bad_tools_str = llm.complete(prompt).text.strip('\n')
265
294
  if bad_tools_str and bad_tools_str != "<OKAY>":
266
295
  bad_tools = [tool.strip() for tool in bad_tools_str.split(",")]
267
- numbered = ", ".join(f"({i}) {tool}" for i, tool in enumerate(bad_tools, 1))
296
+ numbered = ", ".join(
297
+ f"({i}) {tool}" for i, tool in enumerate(bad_tools, 1)
298
+ )
268
299
  raise ValueError(
269
300
  f"The Agent custom instructions mention these invalid tools: {numbered}"
270
301
  )
@@ -612,7 +643,7 @@ class Agent:
612
643
  validate_tools: bool = False,
613
644
  fallback_agent_config: Optional[AgentConfig] = None,
614
645
  chat_history: Optional[list[Tuple[str, str]]] = None,
615
- workflow_cls: Workflow = None,
646
+ workflow_cls: Optional[Workflow] = None,
616
647
  workflow_timeout: int = 120,
617
648
  ) -> "Agent":
618
649
  """
@@ -681,8 +712,8 @@ class Agent:
681
712
  vectara_rerank_limit: Optional[int] = None,
682
713
  vectara_rerank_cutoff: Optional[float] = None,
683
714
  vectara_diversity_bias: float = 0.2,
684
- vectara_udf_expression: str = None,
685
- vectara_rerank_chain: List[Dict] = None,
715
+ vectara_udf_expression: Optional[str] = None,
716
+ vectara_rerank_chain: Optional[List[Dict]] = None,
686
717
  vectara_n_sentences_before: int = 2,
687
718
  vectara_n_sentences_after: int = 2,
688
719
  vectara_summary_num_results: int = 10,
@@ -1016,7 +1047,7 @@ class Agent:
1016
1047
  time.sleep(1)
1017
1048
  attempt += 1
1018
1049
 
1019
- return AgentResponse(
1050
+ return AgentStreamingResponse(
1020
1051
  response=(
1021
1052
  f"For {orig_llm} LLM - failure can't be resolved after "
1022
1053
  f"{max_attempts} attempts ({last_error})."
@@ -1028,7 +1059,11 @@ class Agent:
1028
1059
  # workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
1029
1060
  # the inputs argument comes from the call to run()
1030
1061
  #
1031
- async def run(self, inputs: Any, verbose: bool = False) -> Any:
1062
+ async def run(
1063
+ self,
1064
+ inputs: Any,
1065
+ verbose: bool = False,
1066
+ ) -> Any:
1032
1067
  """
1033
1068
  Run a workflow using the agent.
1034
1069
  workflow class must be provided in the agent constructor.
@@ -1036,7 +1071,7 @@ class Agent:
1036
1071
  inputs (Any): The inputs to the workflow.
1037
1072
  verbose (bool, optional): Whether to print verbose output. Defaults to False.
1038
1073
  Returns:
1039
- Any: The output of the workflow.
1074
+ Any: The output or context of the workflow.
1040
1075
  """
1041
1076
  # Create workflow
1042
1077
  if self.workflow_cls:
@@ -1048,20 +1083,38 @@ class Agent:
1048
1083
  if not isinstance(inputs, self.workflow_cls.InputsModel):
1049
1084
  raise ValueError(f"Inputs must be an instance of {workflow.InputsModel}.")
1050
1085
 
1051
- # run workflow
1052
- result = await workflow.run(
1053
- agent=self,
1054
- tools=self.tools,
1055
- llm=self.llm,
1056
- verbose=verbose,
1057
- inputs=inputs,
1058
- )
1059
-
1060
- # return output in the form of workflow.OutputsModel
1086
+ workflow_context = Context(workflow=workflow)
1061
1087
  try:
1062
- output = workflow.OutputsModel.model_validate(result)
1063
- except ValidationError as e:
1064
- raise ValueError(f"Failed to map workflow output to model: {e}") from e
1088
+ # run workflow
1089
+ result = await workflow.run(
1090
+ ctx=workflow_context,
1091
+ agent=self,
1092
+ tools=self.tools,
1093
+ llm=self.llm,
1094
+ verbose=verbose,
1095
+ inputs=inputs,
1096
+ )
1097
+
1098
+ # return output in the form of workflow.OutputsModel(BaseModel)
1099
+ try:
1100
+ output = workflow.OutputsModel.model_validate(result)
1101
+ except ValidationError as e:
1102
+ raise ValueError(f"Failed to map workflow output to model: {e}") from e
1103
+
1104
+ except Exception as e:
1105
+ outputs_model_on_fail_cls = getattr(workflow.__class__, "OutputModelOnFail", None)
1106
+ if outputs_model_on_fail_cls:
1107
+ model_fields = outputs_model_on_fail_cls.model_fields
1108
+ input_dict = {
1109
+ key: await workflow_context.get(key, None)
1110
+ for key in model_fields
1111
+ }
1112
+
1113
+ # return output in the form of workflow.OutputModelOnFail(BaseModel)
1114
+ output = outputs_model_on_fail_cls.model_validate(input_dict)
1115
+ else:
1116
+ print(f"Vectara Agentic: Workflow failed with unexpected error: {e}")
1117
+ raise type(e)(str(e)).with_traceback(e.__traceback__)
1065
1118
 
1066
1119
  return output
1067
1120
 
@@ -1086,12 +1139,12 @@ class Agent:
1086
1139
  fn_schema_serialized = {
1087
1140
  "schema": (
1088
1141
  fn_schema_cls.model_json_schema()
1089
- if hasattr(fn_schema_cls, "model_json_schema")
1142
+ if fn_schema_cls and hasattr(fn_schema_cls, "model_json_schema")
1090
1143
  else None
1091
1144
  ),
1092
1145
  "metadata": {
1093
- "module": fn_schema_cls.__module__,
1094
- "class": fn_schema_cls.__name__,
1146
+ "module": fn_schema_cls.__module__ if fn_schema_cls else None,
1147
+ "class": fn_schema_cls.__name__ if fn_schema_cls else None,
1095
1148
  },
1096
1149
  }
1097
1150
  else:
@@ -1140,44 +1193,69 @@ class Agent:
1140
1193
  if data.get("fallback_agent_config")
1141
1194
  else None
1142
1195
  )
1143
- tools = []
1196
+ tools: list[FunctionTool] = []
1144
1197
 
1145
1198
  for tool_data in data["tools"]:
1146
- # Recreate the dynamic model using the schema info
1199
+ query_args_model = None
1147
1200
  if tool_data.get("fn_schema"):
1148
1201
  schema_info = tool_data["fn_schema"]
1149
1202
  try:
1150
1203
  module_name = schema_info["metadata"]["module"]
1151
1204
  class_name = schema_info["metadata"]["class"]
1152
1205
  mod = importlib.import_module(module_name)
1153
- fn_schema_cls = getattr(mod, class_name)
1154
- query_args_model = fn_schema_cls
1206
+ candidate_cls = getattr(mod, class_name)
1207
+ if inspect.isclass(candidate_cls) and issubclass(
1208
+ candidate_cls, BaseModel
1209
+ ):
1210
+ query_args_model = candidate_cls
1211
+ else:
1212
+ # It's not the Pydantic model class we expected (e.g., it's the function itself)
1213
+ # Force fallback to JSON schema reconstruction by raising an error.
1214
+ raise ImportError(
1215
+ f"Retrieved '{class_name}' from '{module_name}' is not a Pydantic BaseModel class. "
1216
+ "Falling back to JSON schema reconstruction."
1217
+ )
1155
1218
  except Exception:
1156
1219
  # Fallback: rebuild using the JSON schema
1157
1220
  field_definitions = {}
1158
- for field, values in (
1159
- schema_info.get("schema", {}).get("properties", {}).items()
1221
+ json_schema_to_rebuild = schema_info.get("schema")
1222
+ if json_schema_to_rebuild and isinstance(
1223
+ json_schema_to_rebuild, dict
1160
1224
  ):
1161
- field_type = get_field_type(values)
1162
- if "default" in values:
1163
- field_definitions[field] = (
1164
- field_type,
1165
- Field(
1166
- description=values.get("description", ""),
1167
- default=values["default"],
1168
- ),
1169
- )
1170
- else:
1171
- field_definitions[field] = (
1172
- field_type,
1173
- Field(description=values.get("description", "")),
1174
- )
1175
- query_args_model = create_model(
1176
- schema_info.get("schema", {}).get("title", "QueryArgs"),
1177
- **field_definitions,
1178
- )
1179
- else:
1180
- query_args_model = create_model("QueryArgs")
1225
+ for field, values in json_schema_to_rebuild.get(
1226
+ "properties", {}
1227
+ ).items():
1228
+ field_type = get_field_type(values)
1229
+ field_description = values.get(
1230
+ "description"
1231
+ ) # Defaults to None
1232
+ if "default" in values:
1233
+ field_definitions[field] = (
1234
+ field_type,
1235
+ Field(
1236
+ description=field_description,
1237
+ default=values["default"],
1238
+ ),
1239
+ )
1240
+ else:
1241
+ field_definitions[field] = (
1242
+ field_type,
1243
+ Field(description=field_description),
1244
+ )
1245
+ query_args_model = create_model(
1246
+ json_schema_to_rebuild.get(
1247
+ "title", f"{tool_data['name']}_QueryArgs"
1248
+ ),
1249
+ **field_definitions,
1250
+ )
1251
+ else: # If schema part is missing or not a dict, create a default empty model
1252
+ query_args_model = create_model(
1253
+ f"{tool_data['name']}_QueryArgs"
1254
+ )
1255
+
1256
+ # If fn_schema was not in tool_data or reconstruction failed badly, default to empty pydantic model
1257
+ if query_args_model is None:
1258
+ query_args_model = create_model(f"{tool_data['name']}_QueryArgs")
1181
1259
 
1182
1260
  fn = (
1183
1261
  pickle.loads(tool_data["fn"].encode("latin-1"))
@@ -112,7 +112,7 @@ class DatabaseTools:
112
112
  List[str]: a list of Document objects from the database.
113
113
  """
114
114
  if sql_query is None:
115
- raise ValueError("A query parameter is necessary to filter the data")
115
+ raise ValueError("A query parameter is necessary to filter the data.")
116
116
 
117
117
  count_query = f"SELECT COUNT(*) FROM ({sql_query})"
118
118
  try:
@@ -123,7 +123,7 @@ class DatabaseTools:
123
123
  if num_rows > self.max_rows:
124
124
  return [
125
125
  f"The query is expected to return more than {self.max_rows} rows. "
126
- "Please refactor your query to make it return less rows. "
126
+ "Please refactor your query to make it return less rows and try again. "
127
127
  ]
128
128
  try:
129
129
  res = self._load_data(sql_query)
@@ -69,12 +69,16 @@ def get_tokenizer_for_model(
69
69
  """
70
70
  Get the tokenizer for the specified model, as determined by the role & config.
71
71
  """
72
- model_provider, model_name = _get_llm_params_for_role(role, config)
73
- if model_provider == ModelProvider.OPENAI:
74
- # This might raise an exception if the model_name is unknown to tiktoken
75
- return tiktoken.encoding_for_model(model_name).encode
76
- if model_provider == ModelProvider.ANTHROPIC:
77
- return Anthropic().tokenizer
72
+ try:
73
+ model_provider, model_name = _get_llm_params_for_role(role, config)
74
+ if model_provider == ModelProvider.OPENAI:
75
+ # This might raise an exception if the model_name is unknown to tiktoken
76
+ return tiktoken.encoding_for_model(model_name).encode
77
+ if model_provider == ModelProvider.ANTHROPIC:
78
+ return Anthropic().tokenizer
79
+ except Exception:
80
+ print(f"Error getting tokenizer for model {model_name}, ignoring")
81
+ return None
78
82
  return None
79
83
 
80
84
 
@@ -50,7 +50,7 @@ class SubQuestionQueryWorkflow(Workflow):
50
50
  answer: str
51
51
 
52
52
  @step
53
- async def query(self, ctx: Context, ev: StartEvent) -> QueryEvent:
53
+ async def query(self, ctx: Context, ev: StartEvent) -> QueryEvent | None:
54
54
  """
55
55
  Given a user question, and a list of tools, output a list of relevant
56
56
  sub-questions, such that the answers to all the sub-questions put together
@@ -130,7 +130,10 @@ class SubQuestionQueryWorkflow(Workflow):
130
130
  if sub_questions is None:
131
131
  raise ValueError(f"Invalid LLM response format: {response_str}")
132
132
  if not sub_questions:
133
- raise ValueError("LLM returned empty sub-questions list")
133
+ # If the LLM returns an empty list, we need to handle it gracefully
134
+ # We use the original query as a single question fallback
135
+ print("LLM returned empty sub-questions list")
136
+ sub_questions = [original_query]
134
137
 
135
138
  await ctx.set("sub_question_count", len(sub_questions))
136
139
  for question in sub_questions: