vectara-agentic 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/benchmark_models.py +12 -12
- tests/test_agent.py +4 -3
- tests/test_bedrock.py +101 -0
- tests/test_gemini.py +94 -8
- tests/test_groq.py +97 -16
- tests/test_openai.py +101 -0
- tests/test_react_streaming.py +26 -2
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +19 -30
- vectara_agentic/agent_core/factory.py +11 -4
- vectara_agentic/agent_core/prompts.py +64 -8
- vectara_agentic/agent_core/serialization.py +3 -3
- vectara_agentic/agent_core/streaming.py +174 -197
- vectara_agentic/agent_core/utils/hallucination.py +33 -1
- vectara_agentic/db_tools.py +4 -0
- vectara_agentic/llm_utils.py +55 -2
- vectara_agentic/sub_query_workflow.py +31 -31
- vectara_agentic/tools.py +0 -2
- vectara_agentic/utils.py +35 -10
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.9.dist-info}/METADATA +32 -32
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.9.dist-info}/RECORD +24 -24
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.9.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.9.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.9.dist-info}/top_level.txt +0 -0
|
@@ -42,6 +42,35 @@ def get_event_id(event) -> str:
|
|
|
42
42
|
|
|
43
43
|
return str(uuid.uuid4())
|
|
44
44
|
|
|
45
|
+
|
|
46
|
+
def is_tool_related_event(event) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Determine if an event is actually tool-related and should be tracked.
|
|
49
|
+
|
|
50
|
+
This should only return True for events that represent actual tool calls or tool outputs,
|
|
51
|
+
not for streaming text deltas or other LLM response events.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
event: The stream event to check
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
bool: True if this event should be tracked for tool purposes
|
|
58
|
+
"""
|
|
59
|
+
# Track explicit tool events from LlamaIndex workflow
|
|
60
|
+
if isinstance(event, (ToolCall, ToolCallResult)):
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
has_tool_id = getattr(event, "tool_id", None)
|
|
64
|
+
has_tool_name = getattr(event, "tool_name", None)
|
|
65
|
+
has_delta = getattr(event, "delta", None)
|
|
66
|
+
|
|
67
|
+
# Some providers don't emit ToolCall/ToolCallResult; avoid treating deltas as tool events
|
|
68
|
+
if (has_tool_id or has_tool_name) and not has_delta:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
|
|
45
74
|
class StreamingResponseAdapter:
|
|
46
75
|
"""
|
|
47
76
|
Adapter class that provides a LlamaIndex-compatible streaming response interface.
|
|
@@ -90,25 +119,6 @@ class StreamingResponseAdapter:
|
|
|
90
119
|
"""
|
|
91
120
|
return AgentResponse(response=self.response, metadata=self.metadata)
|
|
92
121
|
|
|
93
|
-
def wait_for_completion(self) -> None:
|
|
94
|
-
"""
|
|
95
|
-
Wait for post-processing to complete and update metadata.
|
|
96
|
-
This should be called after streaming finishes but before accessing metadata.
|
|
97
|
-
"""
|
|
98
|
-
if self.post_process_task and not self.post_process_task.done():
|
|
99
|
-
return
|
|
100
|
-
if self.post_process_task and self.post_process_task.done():
|
|
101
|
-
try:
|
|
102
|
-
final_response = self.post_process_task.result()
|
|
103
|
-
if hasattr(final_response, "metadata") and final_response.metadata:
|
|
104
|
-
# Update our metadata from the completed task
|
|
105
|
-
self.metadata.update(final_response.metadata)
|
|
106
|
-
except Exception as e:
|
|
107
|
-
logging.error(
|
|
108
|
-
f"Error during post-processing: {e}. "
|
|
109
|
-
"Ensure the post-processing task is correctly implemented."
|
|
110
|
-
)
|
|
111
|
-
|
|
112
122
|
|
|
113
123
|
def extract_response_text_from_chat_message(response_text: Any) -> str:
|
|
114
124
|
"""
|
|
@@ -123,15 +133,15 @@ def extract_response_text_from_chat_message(response_text: Any) -> str:
|
|
|
123
133
|
str: Extracted text content
|
|
124
134
|
"""
|
|
125
135
|
# Handle case where response is a ChatMessage object
|
|
126
|
-
if hasattr(response_text, "
|
|
127
|
-
return response_text.content
|
|
128
|
-
elif hasattr(response_text, "blocks"):
|
|
136
|
+
if hasattr(response_text, "blocks"):
|
|
129
137
|
# Extract text from ChatMessage blocks
|
|
130
138
|
text_parts = []
|
|
131
139
|
for block in response_text.blocks:
|
|
132
140
|
if hasattr(block, "text"):
|
|
133
141
|
text_parts.append(block.text)
|
|
134
142
|
return "".join(text_parts)
|
|
143
|
+
elif hasattr(response_text, "content"):
|
|
144
|
+
return response_text.content
|
|
135
145
|
elif not isinstance(response_text, str):
|
|
136
146
|
return str(response_text)
|
|
137
147
|
|
|
@@ -234,9 +244,8 @@ def create_stream_post_processing_task(
|
|
|
234
244
|
async def _safe_post_process():
|
|
235
245
|
try:
|
|
236
246
|
return await _post_process()
|
|
237
|
-
except Exception:
|
|
238
|
-
traceback.
|
|
239
|
-
# Return empty response on error
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logging.error(f"Error {e} occurred during post-processing: {traceback.format_exc()}")
|
|
240
249
|
return AgentResponse(response="", metadata={})
|
|
241
250
|
|
|
242
251
|
return asyncio.create_task(_safe_post_process())
|
|
@@ -244,205 +253,203 @@ def create_stream_post_processing_task(
|
|
|
244
253
|
|
|
245
254
|
class FunctionCallingStreamHandler:
|
|
246
255
|
"""
|
|
247
|
-
|
|
256
|
+
Streaming handler for function-calling agents with strict "no leaks" gating.
|
|
257
|
+
|
|
258
|
+
Core ideas:
|
|
259
|
+
- Buffer tokens PER LLM STEP.
|
|
260
|
+
- Commit the buffer ONLY if that step ends with AgentOutput.tool_calls == [].
|
|
261
|
+
- Drop the buffer if the step triggers tool calls (planning/tool-selection).
|
|
262
|
+
- Track pending tool results; handle multi-round (tool -> read -> tool -> ...) loops.
|
|
263
|
+
- Support return_direct tools (tool output is the final answer, no synthesis step).
|
|
264
|
+
- Two streaming modes:
|
|
265
|
+
- final_only: Buffer all tokens and commit only after step completes with no tool calls
|
|
266
|
+
- optimistic_live: Stream tokens live after all tool calls are complete
|
|
248
267
|
"""
|
|
249
268
|
|
|
250
|
-
def __init__(
|
|
269
|
+
def __init__(
|
|
270
|
+
self,
|
|
271
|
+
agent_instance,
|
|
272
|
+
handler,
|
|
273
|
+
prompt: str,
|
|
274
|
+
*,
|
|
275
|
+
stream_policy: str = "optimistic_live", # "final_only" | "optimistic_live"
|
|
276
|
+
):
|
|
251
277
|
self.agent_instance = agent_instance
|
|
252
|
-
self.handler = handler
|
|
278
|
+
self.handler = handler # awaitable; also has .stream_events()
|
|
253
279
|
self.prompt = prompt
|
|
280
|
+
|
|
281
|
+
self.stream_policy = stream_policy
|
|
282
|
+
|
|
283
|
+
# Plumbing for your existing adapter/post-processing
|
|
254
284
|
self.final_response_container = {"resp": None}
|
|
255
285
|
self.stream_complete_event = asyncio.Event()
|
|
256
286
|
|
|
257
287
|
async def process_stream_events(self) -> AsyncIterator[str]:
|
|
258
288
|
"""
|
|
259
|
-
Process streaming events and yield
|
|
289
|
+
Process streaming events and yield only valid, final tokens.
|
|
260
290
|
|
|
261
|
-
|
|
262
|
-
|
|
291
|
+
Contract:
|
|
292
|
+
- Never surface "planning" tokens (tool arguments, scratchpads, etc).
|
|
293
|
+
- Only surface tokens produced in the last, post-tool LLM step,
|
|
294
|
+
or a return_direct tool's output.
|
|
263
295
|
"""
|
|
264
|
-
|
|
265
|
-
|
|
296
|
+
# Step-scoped state
|
|
297
|
+
step_buffer: list[str] = []
|
|
298
|
+
step_has_tool_calls = False
|
|
299
|
+
|
|
300
|
+
# Run-scoped state
|
|
301
|
+
pending_tools = 0
|
|
302
|
+
committed_any_text = False
|
|
303
|
+
|
|
304
|
+
def _reset_step():
|
|
305
|
+
nonlocal step_has_tool_calls
|
|
306
|
+
step_buffer.clear()
|
|
307
|
+
step_has_tool_calls = False
|
|
266
308
|
|
|
267
309
|
async for ev in self.handler.stream_events():
|
|
268
|
-
#
|
|
310
|
+
# ---- 1) Capture tool outputs for downstream logging/telemetry ----
|
|
269
311
|
if isinstance(ev, ToolCallResult):
|
|
270
312
|
if hasattr(self.agent_instance, "_add_tool_output"):
|
|
271
313
|
# pylint: disable=W0212
|
|
272
|
-
self.agent_instance._add_tool_output(
|
|
273
|
-
|
|
274
|
-
|
|
314
|
+
self.agent_instance._add_tool_output(ev.tool_name, str(ev.tool_output))
|
|
315
|
+
|
|
316
|
+
pending_tools = max(0, pending_tools - 1)
|
|
317
|
+
|
|
318
|
+
# Return-direct short-circuit: surface tool output as the final answer
|
|
319
|
+
if getattr(ev, "return_direct", False):
|
|
320
|
+
yield str(ev.tool_output)
|
|
321
|
+
committed_any_text = True
|
|
322
|
+
# Do not early-break; keep draining events safely.
|
|
275
323
|
|
|
276
|
-
#
|
|
324
|
+
# ---- 2) Progress callback plumbing (safe and optional) ----
|
|
277
325
|
if self.agent_instance.agent_progress_callback:
|
|
278
|
-
|
|
279
|
-
if self._is_tool_related_event(ev):
|
|
326
|
+
if is_tool_related_event(ev):
|
|
280
327
|
try:
|
|
281
328
|
event_id = get_event_id(ev)
|
|
282
329
|
await self._handle_progress_callback(ev, event_id)
|
|
283
|
-
except
|
|
284
|
-
logging.warning(f"
|
|
285
|
-
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logging.warning(f"[progress-callback] skipping event: {e}")
|
|
332
|
+
|
|
333
|
+
# ---- 3) Step boundaries & gating logic ----
|
|
334
|
+
# New step starts: clear per-step state
|
|
335
|
+
if isinstance(ev, AgentInput):
|
|
336
|
+
_reset_step()
|
|
337
|
+
continue
|
|
286
338
|
|
|
287
|
-
#
|
|
339
|
+
# Streaming deltas (provisional)
|
|
288
340
|
if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
hasattr(ev, "tool_calls")
|
|
293
|
-
and not ev.tool_calls
|
|
294
|
-
and had_tool_calls
|
|
295
|
-
and not transitioned_to_prose
|
|
296
|
-
):
|
|
297
|
-
yield "\n\n"
|
|
298
|
-
transitioned_to_prose = True
|
|
299
|
-
if hasattr(ev, "delta"):
|
|
300
|
-
yield ev.delta
|
|
301
|
-
elif (
|
|
302
|
-
hasattr(ev, "tool_calls")
|
|
303
|
-
and not ev.tool_calls
|
|
304
|
-
and hasattr(ev, "delta")
|
|
305
|
-
and transitioned_to_prose
|
|
306
|
-
):
|
|
307
|
-
yield ev.delta
|
|
341
|
+
# If the model is constructing a function call, LlamaIndex will attach tool_calls here
|
|
342
|
+
if getattr(ev, "tool_calls", None):
|
|
343
|
+
step_has_tool_calls = True
|
|
308
344
|
|
|
309
|
-
|
|
345
|
+
delta = getattr(ev, "delta", None)
|
|
346
|
+
if not delta:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
# Always buffer first
|
|
350
|
+
step_buffer.append(delta)
|
|
351
|
+
|
|
352
|
+
# Stream live only after all tools are complete
|
|
353
|
+
if self.stream_policy == "optimistic_live" and pending_tools == 0:
|
|
354
|
+
yield delta
|
|
355
|
+
|
|
356
|
+
continue
|
|
357
|
+
|
|
358
|
+
# Step end: decide to commit or drop
|
|
359
|
+
if isinstance(ev, AgentOutput):
|
|
360
|
+
n_calls = len(getattr(ev, "tool_calls", []) or [])
|
|
361
|
+
|
|
362
|
+
if n_calls == 0:
|
|
363
|
+
# Final text step -> commit
|
|
364
|
+
if self.stream_policy == "final_only":
|
|
365
|
+
# We held everything; now stream it out in order.
|
|
366
|
+
for chunk in step_buffer:
|
|
367
|
+
yield chunk
|
|
368
|
+
# In optimistic mode, tokens were streamed live after tools completed.
|
|
369
|
+
|
|
370
|
+
committed_any_text = committed_any_text or bool(step_buffer)
|
|
371
|
+
_reset_step()
|
|
372
|
+
|
|
373
|
+
else:
|
|
374
|
+
# Planning/tool step -> drop buffer
|
|
375
|
+
_reset_step()
|
|
376
|
+
pending_tools += n_calls
|
|
377
|
+
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
# ---- 4) Finish: await the underlying handler for the final result ----
|
|
310
381
|
try:
|
|
311
382
|
self.final_response_container["resp"] = await self.handler
|
|
312
383
|
except Exception as e:
|
|
313
384
|
error_str = str(e).lower()
|
|
314
385
|
if "rate limit" in error_str or "429" in error_str:
|
|
315
|
-
logging.error(f"[RATE_LIMIT_ERROR]
|
|
386
|
+
logging.error(f"[RATE_LIMIT_ERROR] {e}")
|
|
316
387
|
self.final_response_container["resp"] = AgentResponse(
|
|
317
388
|
response="Rate limit exceeded. Please try again later.",
|
|
318
389
|
source_nodes=[],
|
|
319
390
|
metadata={"error_type": "rate_limit", "original_error": str(e)},
|
|
320
391
|
)
|
|
321
392
|
else:
|
|
322
|
-
logging.error(f"[STREAM_ERROR]
|
|
323
|
-
logging.error(
|
|
324
|
-
f"[STREAM_ERROR] Full traceback: {traceback.format_exc()}"
|
|
325
|
-
)
|
|
393
|
+
logging.error(f"[STREAM_ERROR] {e}")
|
|
326
394
|
self.final_response_container["resp"] = AgentResponse(
|
|
327
395
|
response="Response completion Error",
|
|
328
396
|
source_nodes=[],
|
|
329
397
|
metadata={"error_type": "general", "original_error": str(e)},
|
|
330
398
|
)
|
|
331
399
|
finally:
|
|
332
|
-
#
|
|
400
|
+
# If nothing was ever committed and we ended right after a tool,
|
|
401
|
+
# assume that tool's output is the "final answer" (common with return_direct).
|
|
333
402
|
self.stream_complete_event.set()
|
|
334
403
|
|
|
335
|
-
def
|
|
404
|
+
async def _handle_progress_callback(self, event, event_id: str):
|
|
336
405
|
"""
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
This should only return True for events that represent actual tool calls or tool outputs,
|
|
340
|
-
not for streaming text deltas or other LLM response events.
|
|
341
|
-
|
|
342
|
-
Args:
|
|
343
|
-
event: The stream event to check
|
|
344
|
-
|
|
345
|
-
Returns:
|
|
346
|
-
bool: True if this event should be tracked for tool purposes
|
|
406
|
+
Fan out progress events to the user's callback (sync or async). Mirrors your existing logic.
|
|
347
407
|
"""
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
return True
|
|
351
|
-
|
|
352
|
-
has_tool_id = hasattr(event, "tool_id") and event.tool_id
|
|
353
|
-
has_delta = hasattr(event, "delta") and event.delta
|
|
354
|
-
has_tool_name = hasattr(event, "tool_name") and event.tool_name
|
|
408
|
+
cb = self.agent_instance.agent_progress_callback
|
|
409
|
+
is_async = asyncio.iscoroutinefunction(cb)
|
|
355
410
|
|
|
356
|
-
# We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
|
|
357
|
-
# but still avoid streaming deltas
|
|
358
|
-
if (has_tool_id or has_tool_name) and not has_delta:
|
|
359
|
-
return True
|
|
360
|
-
|
|
361
|
-
# Everything else (streaming deltas, agent outputs, workflow events, etc.)
|
|
362
|
-
# should NOT be tracked as tool events
|
|
363
|
-
return False
|
|
364
|
-
|
|
365
|
-
async def _handle_progress_callback(self, event, event_id: str):
|
|
366
|
-
"""Handle progress callback events for different event types with proper context propagation."""
|
|
367
411
|
try:
|
|
368
412
|
if isinstance(event, ToolCall):
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
msg={
|
|
376
|
-
"tool_name": event.tool_name,
|
|
377
|
-
"arguments": json.dumps(event.tool_kwargs),
|
|
378
|
-
},
|
|
379
|
-
event_id=event_id,
|
|
380
|
-
)
|
|
413
|
+
payload = {
|
|
414
|
+
"tool_name": event.tool_name,
|
|
415
|
+
"arguments": json.dumps(getattr(event, "tool_kwargs", {})),
|
|
416
|
+
}
|
|
417
|
+
if is_async:
|
|
418
|
+
await cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
|
|
381
419
|
else:
|
|
382
|
-
|
|
383
|
-
self.agent_instance.agent_progress_callback(
|
|
384
|
-
status_type=AgentStatusType.TOOL_CALL,
|
|
385
|
-
msg={
|
|
386
|
-
"tool_name": event.tool_name,
|
|
387
|
-
"arguments": json.dumps(event.tool_kwargs),
|
|
388
|
-
},
|
|
389
|
-
event_id=event_id,
|
|
390
|
-
)
|
|
420
|
+
cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
|
|
391
421
|
|
|
392
422
|
elif isinstance(event, ToolCallResult):
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
msg={
|
|
400
|
-
"tool_name": event.tool_name,
|
|
401
|
-
"content": str(event.tool_output),
|
|
402
|
-
},
|
|
403
|
-
event_id=event_id,
|
|
404
|
-
)
|
|
423
|
+
payload = {
|
|
424
|
+
"tool_name": event.tool_name,
|
|
425
|
+
"content": str(event.tool_output),
|
|
426
|
+
}
|
|
427
|
+
if is_async:
|
|
428
|
+
await cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
|
|
405
429
|
else:
|
|
406
|
-
|
|
407
|
-
status_type=AgentStatusType.TOOL_OUTPUT,
|
|
408
|
-
msg={
|
|
409
|
-
"tool_name": event.tool_name,
|
|
410
|
-
"content": str(event.tool_output),
|
|
411
|
-
},
|
|
412
|
-
event_id=event_id,
|
|
413
|
-
)
|
|
430
|
+
cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
|
|
414
431
|
|
|
415
432
|
elif isinstance(event, AgentInput):
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
msg=
|
|
419
|
-
|
|
420
|
-
|
|
433
|
+
payload = {"content": f"Agent input: {getattr(event, 'input', '')}"}
|
|
434
|
+
if is_async:
|
|
435
|
+
await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
436
|
+
else:
|
|
437
|
+
cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
421
438
|
|
|
422
439
|
elif isinstance(event, AgentOutput):
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
msg=
|
|
426
|
-
|
|
427
|
-
|
|
440
|
+
payload = {"content": f"Agent output: {getattr(event, 'response', '')}"}
|
|
441
|
+
if is_async:
|
|
442
|
+
await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
443
|
+
else:
|
|
444
|
+
cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
428
445
|
|
|
429
446
|
except Exception as e:
|
|
447
|
+
logging.error(f"[progress-callback] Exception: {e}")
|
|
448
|
+
logging.error(traceback.format_exc())
|
|
430
449
|
|
|
431
|
-
|
|
432
|
-
logging.error(f"Traceback: {traceback.format_exc()}")
|
|
433
|
-
# Continue execution despite callback errors
|
|
434
|
-
|
|
435
|
-
def create_streaming_response(
|
|
436
|
-
self, user_metadata: Dict[str, Any]
|
|
437
|
-
) -> "StreamingResponseAdapter":
|
|
450
|
+
def create_streaming_response(self, user_metadata: Dict[str, Any]) -> "StreamingResponseAdapter":
|
|
438
451
|
"""
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
Args:
|
|
442
|
-
user_metadata: User metadata dictionary to update
|
|
443
|
-
|
|
444
|
-
Returns:
|
|
445
|
-
StreamingResponseAdapter: Configured streaming adapter
|
|
452
|
+
Build the adapter with post-processing wired in.
|
|
446
453
|
"""
|
|
447
454
|
post_process_task = create_stream_post_processing_task(
|
|
448
455
|
self.stream_complete_event,
|
|
@@ -454,8 +461,8 @@ class FunctionCallingStreamHandler:
|
|
|
454
461
|
|
|
455
462
|
return StreamingResponseAdapter(
|
|
456
463
|
async_response_gen=self.process_stream_events,
|
|
457
|
-
response="",
|
|
458
|
-
metadata={},
|
|
464
|
+
response="", # will be set by post-processing
|
|
465
|
+
metadata={}, # will be set by post-processing
|
|
459
466
|
post_process_task=post_process_task,
|
|
460
467
|
)
|
|
461
468
|
|
|
@@ -493,7 +500,7 @@ class ReActStreamHandler:
|
|
|
493
500
|
# Handle progress callbacks if available - this is the key missing piece!
|
|
494
501
|
if self.agent_instance.agent_progress_callback:
|
|
495
502
|
# Only track events that are actual tool-related events
|
|
496
|
-
if
|
|
503
|
+
if is_tool_related_event(event):
|
|
497
504
|
try:
|
|
498
505
|
# Get event ID from LlamaIndex event
|
|
499
506
|
event_id = get_event_id(event)
|
|
@@ -603,36 +610,6 @@ class ReActStreamHandler:
|
|
|
603
610
|
# Signal that stream processing is complete
|
|
604
611
|
self.stream_complete_event.set()
|
|
605
612
|
|
|
606
|
-
def _is_tool_related_event(self, event) -> bool:
|
|
607
|
-
"""
|
|
608
|
-
Determine if an event is actually tool-related and should be tracked.
|
|
609
|
-
|
|
610
|
-
This should only return True for events that represent actual tool calls or tool outputs,
|
|
611
|
-
not for streaming text deltas or other LLM response events.
|
|
612
|
-
|
|
613
|
-
Args:
|
|
614
|
-
event: The stream event to check
|
|
615
|
-
|
|
616
|
-
Returns:
|
|
617
|
-
bool: True if this event should be tracked for tool purposes
|
|
618
|
-
"""
|
|
619
|
-
# Track explicit tool events from LlamaIndex workflow
|
|
620
|
-
if isinstance(event, (ToolCall, ToolCallResult)):
|
|
621
|
-
return True
|
|
622
|
-
|
|
623
|
-
has_tool_id = hasattr(event, "tool_id") and event.tool_id
|
|
624
|
-
has_delta = hasattr(event, "delta") and event.delta
|
|
625
|
-
has_tool_name = hasattr(event, "tool_name") and event.tool_name
|
|
626
|
-
|
|
627
|
-
# We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
|
|
628
|
-
# but still avoid streaming deltas
|
|
629
|
-
if (has_tool_id or has_tool_name) and not has_delta:
|
|
630
|
-
return True
|
|
631
|
-
|
|
632
|
-
# Everything else (streaming deltas, agent outputs, workflow events, etc.)
|
|
633
|
-
# should NOT be tracked as tool events
|
|
634
|
-
return False
|
|
635
|
-
|
|
636
613
|
def create_streaming_response(
|
|
637
614
|
self, user_metadata: Dict[str, Any]
|
|
638
615
|
) -> "StreamingResponseAdapter":
|
|
@@ -1,12 +1,41 @@
|
|
|
1
1
|
"""Vectara Hallucination Detection and Correction client."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import re
|
|
4
5
|
from typing import List, Optional, Tuple
|
|
5
6
|
import requests
|
|
6
7
|
|
|
7
8
|
from llama_index.core.llms import MessageRole
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
# Compiled regex patterns for better performance
|
|
12
|
+
_MARKDOWN_LINK_PATTERN = re.compile(r'\[([^\]]*)\]\([^)]*\)')
|
|
13
|
+
_WHITESPACE_CLEANUP_PATTERN = re.compile(r'\s+')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def clean_urls_from_text(text: str) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Remove markdown URLs [text](URL) from text, preserving the link text.
|
|
19
|
+
This prevents interference with hallucination detection while keeping useful text content.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
text (str): The input text potentially containing markdown URLs
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
str: Text with markdown URLs replaced by their text content
|
|
26
|
+
"""
|
|
27
|
+
if not text:
|
|
28
|
+
return text
|
|
29
|
+
|
|
30
|
+
# Replace markdown links [text](url) with just the text part
|
|
31
|
+
cleaned_text = _MARKDOWN_LINK_PATTERN.sub(r'\1', text)
|
|
32
|
+
|
|
33
|
+
# Clean up any extra whitespace that might result from the replacement
|
|
34
|
+
cleaned_text = _WHITESPACE_CLEANUP_PATTERN.sub(' ', cleaned_text).strip()
|
|
35
|
+
|
|
36
|
+
return cleaned_text
|
|
37
|
+
|
|
38
|
+
|
|
10
39
|
class Hallucination:
|
|
11
40
|
"""Vectara Hallucination Correction."""
|
|
12
41
|
|
|
@@ -143,9 +172,12 @@ def analyze_hallucinations(
|
|
|
143
172
|
return None, []
|
|
144
173
|
|
|
145
174
|
try:
|
|
175
|
+
# Clean URLs from agent response to prevent interference with hallucination detection
|
|
176
|
+
cleaned_agent_response = clean_urls_from_text(agent_response)
|
|
177
|
+
|
|
146
178
|
h = Hallucination(vectara_api_key)
|
|
147
179
|
corrected_text, corrections = h.compute(
|
|
148
|
-
query=query, context=context, hypothesis=
|
|
180
|
+
query=query, context=context, hypothesis=cleaned_agent_response
|
|
149
181
|
)
|
|
150
182
|
return corrected_text, corrections
|
|
151
183
|
|
vectara_agentic/db_tools.py
CHANGED
|
@@ -305,3 +305,7 @@ def patch_sync(func_async: AsyncCallable) -> Callable:
|
|
|
305
305
|
return loop.run_until_complete(func_async(*args, **kwargs))
|
|
306
306
|
|
|
307
307
|
return patched_sync
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# Tool name suffixes for pattern matching (with underscore prefix)
|
|
311
|
+
DB_TOOL_SUFFIXES = {f"_{func}" for func in DatabaseTools.spec_functions}
|
vectara_agentic/llm_utils.py
CHANGED
|
@@ -18,7 +18,7 @@ from .agent_config import AgentConfig
|
|
|
18
18
|
|
|
19
19
|
provider_to_default_model_name = {
|
|
20
20
|
ModelProvider.OPENAI: "gpt-4.1-mini",
|
|
21
|
-
ModelProvider.ANTHROPIC: "claude-sonnet-4-
|
|
21
|
+
ModelProvider.ANTHROPIC: "claude-sonnet-4-5",
|
|
22
22
|
ModelProvider.TOGETHER: "deepseek-ai/DeepSeek-V3",
|
|
23
23
|
ModelProvider.GROQ: "openai/gpt-oss-20b",
|
|
24
24
|
ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
@@ -34,6 +34,7 @@ models_to_max_tokens = {
|
|
|
34
34
|
"gpt-4.1-mini": 32768,
|
|
35
35
|
"claude-sonnet-4-20250514": 64000,
|
|
36
36
|
"claude-sonnet-4-0": 64000,
|
|
37
|
+
"claude-sonnet-4-5": 64000,
|
|
37
38
|
"deepseek-ai/deepseek-v3": 8192,
|
|
38
39
|
"models/gemini-2.5-flash": 65536,
|
|
39
40
|
"models/gemini-2.5-flash-lite": 65536,
|
|
@@ -117,6 +118,57 @@ def _get_llm_params_for_role(
|
|
|
117
118
|
return model_provider, model_name
|
|
118
119
|
|
|
119
120
|
|
|
121
|
+
def _cleanup_gemini_clients() -> None:
|
|
122
|
+
"""Helper function to cleanup Gemini client sessions."""
|
|
123
|
+
for llm in _llm_cache.values():
|
|
124
|
+
try:
|
|
125
|
+
# Check if this is a GoogleGenAI instance with internal client structure
|
|
126
|
+
if not hasattr(llm, '_client'):
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
client = getattr(llm, '_client', None)
|
|
130
|
+
if not client:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
api_client = getattr(client, '_api_client', None)
|
|
134
|
+
if not api_client:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
async_session = getattr(api_client, '_async_session', None)
|
|
138
|
+
if not async_session:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# Close the aiohttp session if it exists
|
|
142
|
+
try:
|
|
143
|
+
import asyncio
|
|
144
|
+
loop = asyncio.get_event_loop()
|
|
145
|
+
if not loop.is_closed():
|
|
146
|
+
loop.run_until_complete(async_session.close())
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def clear_llm_cache(provider: Optional[ModelProvider] = None) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Clear the LLM cache, optionally for a specific provider only.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
provider: If specified, only clear cache entries for this provider.
|
|
159
|
+
If None, clear the entire cache.
|
|
160
|
+
"""
|
|
161
|
+
# Before clearing, try to cleanup any Gemini clients
|
|
162
|
+
_cleanup_gemini_clients()
|
|
163
|
+
|
|
164
|
+
if provider is None:
|
|
165
|
+
# Clear entire cache
|
|
166
|
+
_llm_cache.clear()
|
|
167
|
+
else:
|
|
168
|
+
# For simplicity, just clear all when provider is specified
|
|
169
|
+
_llm_cache.clear()
|
|
170
|
+
|
|
171
|
+
|
|
120
172
|
def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
|
|
121
173
|
"""
|
|
122
174
|
Get the LLM for the specified role, using the provided config
|
|
@@ -159,6 +211,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
|
|
|
159
211
|
"google_genai not available. Install with: pip install llama-index-llms-google-genai"
|
|
160
212
|
) from e
|
|
161
213
|
import google.genai.types as google_types
|
|
214
|
+
|
|
162
215
|
generation_config = google_types.GenerateContentConfig(
|
|
163
216
|
temperature=0.0,
|
|
164
217
|
seed=123,
|
|
@@ -182,7 +235,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
|
|
|
182
235
|
) from e
|
|
183
236
|
additional_kwargs = {"seed": 42}
|
|
184
237
|
if model_name in [
|
|
185
|
-
"deepseek-ai/DeepSeek-V3.1",
|
|
238
|
+
"deepseek-ai/DeepSeek-V3.1",
|
|
186
239
|
"deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
|
|
187
240
|
"openai/gpt-oss-120b", "openai/gpt-oss-20b",
|
|
188
241
|
]:
|