vectara-agentic 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

vectara_agentic/tools.py CHANGED
@@ -19,9 +19,10 @@ from llama_index.core.tools.types import ToolMetadata, ToolOutput
19
19
 
20
20
 
21
21
  from .types import ToolType
22
- from .tools_catalog import summarize_text, rephrase_text, critique_text, get_bad_topics
22
+ from .tools_catalog import ToolsCatalog, get_bad_topics
23
23
  from .db_tools import DBLoadSampleData, DBLoadUniqueValues, DBLoadData
24
24
  from .utils import is_float
25
+ from .agent_config import AgentConfig
25
26
 
26
27
  LI_packages = {
27
28
  "yahoo_finance": ToolType.QUERY,
@@ -126,6 +127,122 @@ class VectaraTool(FunctionTool):
126
127
  break
127
128
  return is_equal
128
129
 
130
+ def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, str], fixed_filter: str) -> str:
131
+ """
132
+ Build filter string for Vectara from kwargs
133
+ """
134
+ filter_parts = []
135
+ comparison_operators = [">=", "<=", "!=", ">", "<", "="]
136
+ numeric_only_ops = {">", "<", ">=", "<="}
137
+
138
+ for key, value in kwargs.items():
139
+ if value is None or value == "":
140
+ continue
141
+
142
+ # Determine the prefix for the key. Valid values are "doc" or "part"
143
+ # default to 'doc' if not specified
144
+ prefix = tool_args_type.get(key, "doc")
145
+
146
+ if prefix not in ["doc", "part"]:
147
+ raise ValueError(
148
+ f'Unrecognized prefix {prefix}. Please make sure to use either "doc" or "part" for the prefix.'
149
+ )
150
+
151
+ if value is PydanticUndefined:
152
+ raise ValueError(
153
+ f"Value of argument {key} is undefined, and this is invalid. "
154
+ "Please form proper arguments and try again."
155
+ )
156
+
157
+ # value of the argument
158
+ val_str = str(value).strip()
159
+
160
+ # Special handling for range operator
161
+ if val_str.startswith(("[", "(")) and val_str.endswith(("]", ")")):
162
+ # Extract the boundary types
163
+ start_inclusive = val_str.startswith("[")
164
+ end_inclusive = val_str.endswith("]")
165
+
166
+ # Remove the boundaries and strip whitespace
167
+ val_str = val_str[1:-1].strip()
168
+
169
+ if "," in val_str:
170
+ val_str = val_str.split(",")
171
+ if len(val_str) != 2:
172
+ raise ValueError(
173
+ f"Range operator requires two values for {key}: {value}"
174
+ )
175
+
176
+ # Validate both bounds as numeric or empty (for unbounded ranges)
177
+ start_val, end_val = val_str[0].strip(), val_str[1].strip()
178
+ if start_val and not (start_val.isdigit() or is_float(start_val)):
179
+ raise ValueError(
180
+ f"Range operator requires numeric operands for {key}: {value}"
181
+ )
182
+ if end_val and not (end_val.isdigit() or is_float(end_val)):
183
+ raise ValueError(
184
+ f"Range operator requires numeric operands for {key}: {value}"
185
+ )
186
+
187
+ # Build the SQL condition
188
+ range_conditions = []
189
+ if start_val:
190
+ operator = ">=" if start_inclusive else ">"
191
+ range_conditions.append(f"{prefix}.{key} {operator} {start_val}")
192
+ if end_val:
193
+ operator = "<=" if end_inclusive else "<"
194
+ range_conditions.append(f"{prefix}.{key} {operator} {end_val}")
195
+
196
+ # Join the range conditions with AND
197
+ filter_parts.append('( ' + " AND ".join(range_conditions) + ' )')
198
+ continue
199
+
200
+ raise ValueError(
201
+ f"Range operator requires two values for {key}: {value}"
202
+ )
203
+
204
+ # Check if value contains a known comparison operator at the start
205
+ matched_operator = None
206
+ for op in comparison_operators:
207
+ if val_str.startswith(op):
208
+ matched_operator = op
209
+ break
210
+
211
+ # Break down operator from value
212
+ # e.g. val_str = ">2022" --> operator = ">", rhs = "2022"
213
+ if matched_operator:
214
+ rhs = val_str[len(matched_operator):].strip()
215
+
216
+ if matched_operator in numeric_only_ops:
217
+ # Must be numeric
218
+ if not (rhs.isdigit() or is_float(rhs)):
219
+ raise ValueError(
220
+ f"Operator {matched_operator} requires a numeric operand for {key}: {val_str}"
221
+ )
222
+ filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs}")
223
+ else:
224
+ # = and != operators can be numeric or string
225
+ if rhs.isdigit() or is_float(rhs):
226
+ filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs}")
227
+ elif rhs.lower() in ["true", "false"]:
228
+ filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs.lower()}")
229
+ else:
230
+ # For string operands, wrap them in quotes
231
+ filter_parts.append(f"{prefix}.{key}{matched_operator}'{rhs}'")
232
+ else:
233
+ if val_str.isdigit() or is_float(val_str):
234
+ filter_parts.append(f"{prefix}.{key}={val_str}")
235
+ elif val_str.lower() in ["true", "false"]:
236
+ # This is to handle boolean values.
237
+ # This is not complete solution - the best solution would be to test if the field is boolean
238
+ # That can be done after we move to APIv2
239
+ filter_parts.append(f"{prefix}.{key}={val_str.lower()}")
240
+ else:
241
+ filter_parts.append(f"{prefix}.{key}='{val_str}'")
242
+
243
+ filter_str = " AND ".join(filter_parts)
244
+ return f"({fixed_filter}) AND ({filter_str})" if fixed_filter else filter_str
245
+
129
246
  class VectaraToolFactory:
130
247
  """
131
248
  A factory class for creating Vectara RAG tools.
@@ -149,13 +266,159 @@ class VectaraToolFactory:
149
266
  self.vectara_api_key = vectara_api_key
150
267
  self.num_corpora = len(vectara_corpus_id.split(","))
151
268
 
269
+ def create_search_tool(
270
+ self,
271
+ tool_name: str,
272
+ tool_description: str,
273
+ tool_args_schema: type[BaseModel],
274
+ tool_args_type: Dict[str, str] = {},
275
+ fixed_filter: str = "",
276
+ lambda_val: float = 0.005,
277
+ reranker: str = "mmr",
278
+ rerank_k: int = 50,
279
+ mmr_diversity_bias: float = 0.2,
280
+ udf_expression: str = None,
281
+ rerank_chain: List[Dict] = None,
282
+ verbose: bool = False,
283
+ ) -> VectaraTool:
284
+ """
285
+ Creates a Vectara search/retrieval tool
286
+
287
+ Args:
288
+ tool_name (str): The name of the tool.
289
+ tool_description (str): The description of the tool.
290
+ tool_args_schema (BaseModel): The schema for the tool arguments.
291
+ tool_args_type (Dict[str, str], optional): The type of each argument (doc or part).
292
+ fixed_filter (str, optional): A fixed Vectara filter condition to apply to all queries.
293
+ lambda_val (float, optional): Lambda value for the Vectara query.
294
+ reranker (str, optional): The reranker mode.
295
+ rerank_k (int, optional): Number of top-k documents for reranking.
296
+ mmr_diversity_bias (float, optional): MMR diversity bias.
297
+ udf_expression (str, optional): the user defined expression for reranking results.
298
+ rerank_chain (List[Dict], optional): A list of rerankers to be applied sequentially.
299
+ Each dictionary should specify the "type" of reranker (mmr, slingshot, udf)
300
+ and any other parameters (e.g. "limit" or "cutoff" for any type,
301
+ "diversity_bias" for mmr, and "user_function" for udf).
302
+ If using slingshot/multilingual_reranker_v1, it must be first in the list.
303
+ verbose (bool, optional): Whether to print verbose output.
304
+
305
+ Returns:
306
+ VectaraTool: A VectaraTool object.
307
+ """
308
+
309
+ vectara = VectaraIndex(
310
+ vectara_api_key=self.vectara_api_key,
311
+ vectara_customer_id=self.vectara_customer_id,
312
+ vectara_corpus_id=self.vectara_corpus_id,
313
+ x_source_str="vectara-agentic",
314
+ )
315
+
316
+ # Dynamically generate the search function
317
+ def search_function(*args, **kwargs) -> ToolOutput:
318
+ """
319
+ Dynamically generated function for semantic search Vectara.
320
+ """
321
+ # Convert args to kwargs using the function signature
322
+ sig = inspect.signature(search_function)
323
+ bound_args = sig.bind_partial(*args, **kwargs)
324
+ bound_args.apply_defaults()
325
+ kwargs = bound_args.arguments
326
+
327
+ query = kwargs.pop("query")
328
+ top_k = kwargs.pop("top_k", 10)
329
+ try:
330
+ filter_string = _build_filter_string(kwargs, tool_args_type, fixed_filter)
331
+ except ValueError as e:
332
+ return ToolOutput(
333
+ tool_name=search_function.__name__,
334
+ content=str(e),
335
+ raw_input={"args": args, "kwargs": kwargs},
336
+ raw_output={"response": str(e)},
337
+ )
338
+
339
+ vectara_retriever = vectara.as_retriever(
340
+ summary_enabled=False,
341
+ similarity_top_k=top_k,
342
+ reranker=reranker,
343
+ rerank_k=rerank_k if rerank_k * self.num_corpora <= 100 else int(100 / self.num_corpora),
344
+ mmr_diversity_bias=mmr_diversity_bias,
345
+ udf_expression=udf_expression,
346
+ rerank_chain=rerank_chain,
347
+ lambda_val=lambda_val,
348
+ filter=filter_string,
349
+ x_source_str="vectara-agentic",
350
+ verbose=verbose,
351
+ )
352
+ response = vectara_retriever.retrieve(query)
353
+
354
+ if len(response) == 0:
355
+ msg = "Vectara Tool failed to retreive any results for the query."
356
+ return ToolOutput(
357
+ tool_name=search_function.__name__,
358
+ content=msg,
359
+ raw_input={"args": args, "kwargs": kwargs},
360
+ raw_output={"response": msg},
361
+ )
362
+ tool_output = "Matching documents:\n"
363
+ unique_ids = set()
364
+ for doc in response:
365
+ if doc.id_ in unique_ids:
366
+ continue
367
+ unique_ids.add(doc.id_)
368
+ tool_output += f"document '{doc.id_}' metadata: {doc.metadata}\n"
369
+ out = ToolOutput(
370
+ tool_name=search_function.__name__,
371
+ content=tool_output,
372
+ raw_input={"args": args, "kwargs": kwargs},
373
+ raw_output=response,
374
+ )
375
+ return out
376
+
377
+ fields = tool_args_schema.model_fields
378
+ params = [
379
+ inspect.Parameter(
380
+ name=field_name,
381
+ kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
382
+ default=field_info.default,
383
+ annotation=field_info,
384
+ )
385
+ for field_name, field_info in fields.items()
386
+ ]
387
+
388
+ # Create a new signature using the extracted parameters
389
+ sig = inspect.Signature(params)
390
+ search_function.__signature__ = sig
391
+ search_function.__annotations__["return"] = dict[str, Any]
392
+ search_function.__name__ = "_" + re.sub(r"[^A-Za-z0-9_]", "_", tool_name)
393
+
394
+ # Create the tool function signature string
395
+ fields = []
396
+ for name, field in tool_args_schema.__fields__.items():
397
+ annotation = field.annotation
398
+ type_name = annotation.__name__ if hasattr(annotation, '__name__') else str(annotation)
399
+ fields.append(f"{name}: {type_name}")
400
+ args_str = ", ".join(fields)
401
+ function_str = f"{tool_name}({args_str}) -> str"
402
+
403
+ # Create the tool
404
+ tool = VectaraTool.from_defaults(
405
+ fn=search_function,
406
+ name=tool_name,
407
+ description=function_str + ". " + tool_description,
408
+ fn_schema=tool_args_schema,
409
+ tool_type=ToolType.QUERY,
410
+ )
411
+ return tool
412
+
152
413
  def create_rag_tool(
153
414
  self,
154
415
  tool_name: str,
155
416
  tool_description: str,
156
417
  tool_args_schema: type[BaseModel],
157
418
  tool_args_type: Dict[str, str] = {},
419
+ fixed_filter: str = "",
158
420
  vectara_summarizer: str = "vectara-summary-ext-24-05-sml",
421
+ vectara_prompt_text: str = None,
159
422
  summary_num_results: int = 5,
160
423
  summary_response_lang: str = "eng",
161
424
  n_sentences_before: int = 2,
@@ -178,7 +441,9 @@ class VectaraToolFactory:
178
441
  tool_description (str): The description of the tool.
179
442
  tool_args_schema (BaseModel): The schema for the tool arguments.
180
443
  tool_args_type (Dict[str, str], optional): The type of each argument (doc or part).
444
+ fixed_filter (str, optional): A fixed Vectara filter condition to apply to all queries.
181
445
  vectara_summarizer (str, optional): The Vectara summarizer to use.
446
+ vectara_prompt_text (str, optional): The prompt text for the Vectara summarizer.
182
447
  summary_num_results (int, optional): The number of summary results.
183
448
  summary_response_lang (str, optional): The response language for the summary.
184
449
  n_sentences_before (int, optional): Number of sentences before the summary.
@@ -210,118 +475,6 @@ class VectaraToolFactory:
210
475
  x_source_str="vectara-agentic",
211
476
  )
212
477
 
213
- def _build_filter_string(kwargs: Dict[str, Any], tool_args_type: Dict[str, str]) -> str:
214
- filter_parts = []
215
- comparison_operators = [">=", "<=", "!=", ">", "<", "="]
216
- numeric_only_ops = {">", "<", ">=", "<="}
217
-
218
- for key, value in kwargs.items():
219
- if value is None or value == "":
220
- continue
221
-
222
- # Determine the prefix for the key. Valid values are "doc" or "part"
223
- # default to 'doc' if not specified
224
- prefix = tool_args_type.get(key, "doc")
225
-
226
- if prefix not in ["doc", "part"]:
227
- raise ValueError(
228
- f'Unrecognized prefix {prefix}. Please make sure to use either "doc" or "part" for the prefix.'
229
- )
230
-
231
- if value is PydanticUndefined:
232
- raise ValueError(
233
- f"Value of argument {key} is undefined, and this is invalid. "
234
- "Please form proper arguments and try again."
235
- )
236
-
237
- # value of the arrgument
238
- val_str = str(value).strip()
239
-
240
- # Special handling for range operator
241
- if val_str.startswith(("[", "(")) and val_str.endswith(("]", ")")):
242
- # Extract the boundary types
243
- start_inclusive = val_str.startswith("[")
244
- end_inclusive = val_str.endswith("]")
245
-
246
- # Remove the boundaries and strip whitespace
247
- val_str = val_str[1:-1].strip()
248
-
249
- if "," in val_str:
250
- val_str = val_str.split(",")
251
- if len(val_str) != 2:
252
- raise ValueError(
253
- f"Range operator requires two values for {key}: {value}"
254
- )
255
-
256
- # Validate both bounds as numeric or empty (for unbounded ranges)
257
- start_val, end_val = val_str[0].strip(), val_str[1].strip()
258
- if start_val and not (start_val.isdigit() or is_float(start_val)):
259
- raise ValueError(
260
- f"Range operator requires numeric operands for {key}: {value}"
261
- )
262
- if end_val and not (end_val.isdigit() or is_float(end_val)):
263
- raise ValueError(
264
- f"Range operator requires numeric operands for {key}: {value}"
265
- )
266
-
267
- # Build the SQL condition
268
- range_conditions = []
269
- if start_val:
270
- operator = ">=" if start_inclusive else ">"
271
- range_conditions.append(f"{prefix}.{key} {operator} {start_val}")
272
- if end_val:
273
- operator = "<=" if end_inclusive else "<"
274
- range_conditions.append(f"{prefix}.{key} {operator} {end_val}")
275
-
276
- # Join the range conditions with AND
277
- filter_parts.append('( ' + " AND ".join(range_conditions) + ' )')
278
- continue
279
-
280
- raise ValueError(
281
- f"Range operator requires two values for {key}: {value}"
282
- )
283
-
284
- # Check if value contains a known comparison operator at the start
285
- matched_operator = None
286
- for op in comparison_operators:
287
- if val_str.startswith(op):
288
- matched_operator = op
289
- break
290
-
291
- # Break down operator from value
292
- # e.g. val_str = ">2022" --> operator = ">", rhs = "2022"
293
- if matched_operator:
294
- rhs = val_str[len(matched_operator):].strip()
295
-
296
- if matched_operator in numeric_only_ops:
297
- # Must be numeric
298
- if not (rhs.isdigit() or is_float(rhs)):
299
- raise ValueError(
300
- f"Operator {matched_operator} requires a numeric operand for {key}: {val_str}"
301
- )
302
- filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs}")
303
- else:
304
- # = and != operators can be numeric or string
305
- if rhs.isdigit() or is_float(rhs):
306
- filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs}")
307
- elif rhs.lower() in ["true", "false"]:
308
- filter_parts.append(f"{prefix}.{key}{matched_operator}{rhs.lower()}")
309
- else:
310
- # For string operands, wrap them in quotes
311
- filter_parts.append(f"{prefix}.{key}{matched_operator}'{rhs}'")
312
- else:
313
- if val_str.isdigit() or is_float(val_str):
314
- filter_parts.append(f"{prefix}.{key}={val_str}")
315
- elif val_str.lower() in ["true", "false"]:
316
- # This is to handle boolean values.
317
- # This is not complete solution - the best solution would be to test if the field is boolean
318
- # That can be done after we move to APIv2
319
- filter_parts.append(f"{prefix}.{key}={val_str.lower()}")
320
- else:
321
- filter_parts.append(f"{prefix}.{key}='{val_str}'")
322
-
323
- return " AND ".join(filter_parts)
324
-
325
478
  # Dynamically generate the RAG function
326
479
  def rag_function(*args, **kwargs) -> ToolOutput:
327
480
  """
@@ -335,7 +488,7 @@ class VectaraToolFactory:
335
488
 
336
489
  query = kwargs.pop("query")
337
490
  try:
338
- filter_string = _build_filter_string(kwargs, tool_args_type)
491
+ filter_string = _build_filter_string(kwargs, tool_args_type, fixed_filter)
339
492
  except ValueError as e:
340
493
  return ToolOutput(
341
494
  tool_name=rag_function.__name__,
@@ -349,6 +502,7 @@ class VectaraToolFactory:
349
502
  summary_num_results=summary_num_results,
350
503
  summary_response_lang=summary_response_lang,
351
504
  summary_prompt_name=vectara_summarizer,
505
+ prompt_text=vectara_prompt_text,
352
506
  reranker=reranker,
353
507
  rerank_k=rerank_k if rerank_k * self.num_corpora <= 100 else int(100 / self.num_corpora),
354
508
  mmr_diversity_bias=mmr_diversity_bias,
@@ -471,6 +625,9 @@ class ToolsFactory:
471
625
  A factory class for creating agent tools.
472
626
  """
473
627
 
628
+ def __init__(self, agent_config: AgentConfig = None) -> None:
629
+ self.agent_config = agent_config
630
+
474
631
  def create_tool(self, function: Callable, tool_type: ToolType = ToolType.QUERY) -> VectaraTool:
475
632
  """
476
633
  Create a tool from a function.
@@ -533,7 +690,8 @@ class ToolsFactory:
533
690
  """
534
691
  Create a list of standard tools.
535
692
  """
536
- return [self.create_tool(tool) for tool in [summarize_text, rephrase_text]]
693
+ tc = ToolsCatalog(self.agent_config)
694
+ return [self.create_tool(tool) for tool in [tc.summarize_text, tc.rephrase_text, tc.critique_text]]
537
695
 
538
696
  def guardrail_tools(self) -> List[FunctionTool]:
539
697
  """
@@ -558,7 +716,8 @@ class ToolsFactory:
558
716
  """
559
717
  Use this tool to summarize legal text with no more than summary_max_length characters.
560
718
  """
561
- return summarize_text(text, expertise="law")
719
+ tc = ToolsCatalog(self.agent_config)
720
+ return tc.summarize_text(text, expertise="law")
562
721
 
563
722
  def critique_as_judge(
564
723
  text: str = Field(description="the original text."),
@@ -566,7 +725,8 @@ class ToolsFactory:
566
725
  """
567
726
  Critique the legal document.
568
727
  """
569
- return critique_text(
728
+ tc = ToolsCatalog(self.agent_config)
729
+ return tc.critique_text(
570
730
  text,
571
731
  role="judge",
572
732
  point_of_view="""