vectara-agentic 0.4.7__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/test_bedrock.py +101 -0
- tests/test_gemini.py +64 -0
- tests/test_groq.py +196 -11
- tests/test_openai.py +101 -0
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +1 -1
- vectara_agentic/agent_core/prompts.py +1 -0
- vectara_agentic/agent_core/streaming.py +176 -194
- vectara_agentic/llm_utils.py +1 -1
- vectara_agentic/sub_query_workflow.py +31 -31
- vectara_agentic/tools.py +0 -2
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.8.dist-info}/METADATA +31 -30
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.8.dist-info}/RECORD +16 -16
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.8.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.8.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.7.dist-info → vectara_agentic-0.4.8.dist-info}/top_level.txt +0 -0
|
@@ -42,6 +42,35 @@ def get_event_id(event) -> str:
|
|
|
42
42
|
|
|
43
43
|
return str(uuid.uuid4())
|
|
44
44
|
|
|
45
|
+
|
|
46
|
+
def is_tool_related_event(event) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Determine if an event is actually tool-related and should be tracked.
|
|
49
|
+
|
|
50
|
+
This should only return True for events that represent actual tool calls or tool outputs,
|
|
51
|
+
not for streaming text deltas or other LLM response events.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
event: The stream event to check
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
bool: True if this event should be tracked for tool purposes
|
|
58
|
+
"""
|
|
59
|
+
# Track explicit tool events from LlamaIndex workflow
|
|
60
|
+
if isinstance(event, (ToolCall, ToolCallResult)):
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
has_tool_id = getattr(event, "tool_id", None)
|
|
64
|
+
has_tool_name = getattr(event, "tool_name", None)
|
|
65
|
+
has_delta = getattr(event, "delta", None)
|
|
66
|
+
|
|
67
|
+
# Some providers don't emit ToolCall/ToolCallResult; avoid treating deltas as tool events
|
|
68
|
+
if (has_tool_id or has_tool_name) and not has_delta:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
|
|
45
74
|
class StreamingResponseAdapter:
|
|
46
75
|
"""
|
|
47
76
|
Adapter class that provides a LlamaIndex-compatible streaming response interface.
|
|
@@ -90,25 +119,6 @@ class StreamingResponseAdapter:
|
|
|
90
119
|
"""
|
|
91
120
|
return AgentResponse(response=self.response, metadata=self.metadata)
|
|
92
121
|
|
|
93
|
-
def wait_for_completion(self) -> None:
|
|
94
|
-
"""
|
|
95
|
-
Wait for post-processing to complete and update metadata.
|
|
96
|
-
This should be called after streaming finishes but before accessing metadata.
|
|
97
|
-
"""
|
|
98
|
-
if self.post_process_task and not self.post_process_task.done():
|
|
99
|
-
return
|
|
100
|
-
if self.post_process_task and self.post_process_task.done():
|
|
101
|
-
try:
|
|
102
|
-
final_response = self.post_process_task.result()
|
|
103
|
-
if hasattr(final_response, "metadata") and final_response.metadata:
|
|
104
|
-
# Update our metadata from the completed task
|
|
105
|
-
self.metadata.update(final_response.metadata)
|
|
106
|
-
except Exception as e:
|
|
107
|
-
logging.error(
|
|
108
|
-
f"Error during post-processing: {e}. "
|
|
109
|
-
"Ensure the post-processing task is correctly implemented."
|
|
110
|
-
)
|
|
111
|
-
|
|
112
122
|
|
|
113
123
|
def extract_response_text_from_chat_message(response_text: Any) -> str:
|
|
114
124
|
"""
|
|
@@ -234,9 +244,8 @@ def create_stream_post_processing_task(
|
|
|
234
244
|
async def _safe_post_process():
|
|
235
245
|
try:
|
|
236
246
|
return await _post_process()
|
|
237
|
-
except Exception:
|
|
238
|
-
traceback.
|
|
239
|
-
# Return empty response on error
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logging.error(f"Error {e} occurred during post-processing: {traceback.format_exc()}")
|
|
240
249
|
return AgentResponse(response="", metadata={})
|
|
241
250
|
|
|
242
251
|
return asyncio.create_task(_safe_post_process())
|
|
@@ -244,205 +253,208 @@ def create_stream_post_processing_task(
|
|
|
244
253
|
|
|
245
254
|
class FunctionCallingStreamHandler:
|
|
246
255
|
"""
|
|
247
|
-
|
|
256
|
+
Streaming handler for function-calling agents with strict "no leaks" gating.
|
|
257
|
+
|
|
258
|
+
Core ideas:
|
|
259
|
+
- Buffer tokens PER LLM STEP.
|
|
260
|
+
- Commit the buffer ONLY if that step ends with AgentOutput.tool_calls == [].
|
|
261
|
+
- Drop the buffer if the step triggers tool calls (planning/tool-selection).
|
|
262
|
+
- Track pending tool results; handle multi-round (tool -> read -> tool -> ...) loops.
|
|
263
|
+
- Support return_direct tools (tool output is the final answer, no synthesis step).
|
|
264
|
+
- Optional optimistic streaming with rollback token for nicer UX.
|
|
248
265
|
"""
|
|
249
266
|
|
|
250
|
-
def __init__(
|
|
267
|
+
def __init__(
|
|
268
|
+
self,
|
|
269
|
+
agent_instance,
|
|
270
|
+
handler,
|
|
271
|
+
prompt: str,
|
|
272
|
+
*,
|
|
273
|
+
stream_policy: str = "final_only", # "final_only" | "optimistic_live"
|
|
274
|
+
rollback_token: str = "[[__rollback_current_step__]]", # UI control signal (optional)
|
|
275
|
+
):
|
|
251
276
|
self.agent_instance = agent_instance
|
|
252
|
-
self.handler = handler
|
|
277
|
+
self.handler = handler # awaitable; also has .stream_events()
|
|
253
278
|
self.prompt = prompt
|
|
279
|
+
|
|
280
|
+
self.stream_policy = stream_policy
|
|
281
|
+
self.rollback_token = rollback_token
|
|
282
|
+
|
|
283
|
+
# Plumbing for your existing adapter/post-processing
|
|
254
284
|
self.final_response_container = {"resp": None}
|
|
255
285
|
self.stream_complete_event = asyncio.Event()
|
|
256
286
|
|
|
257
287
|
async def process_stream_events(self) -> AsyncIterator[str]:
|
|
258
288
|
"""
|
|
259
|
-
Process streaming events and yield
|
|
289
|
+
Process streaming events and yield only valid, final tokens.
|
|
260
290
|
|
|
261
|
-
|
|
262
|
-
|
|
291
|
+
Contract:
|
|
292
|
+
- Never surface "planning" tokens (tool arguments, scratchpads, etc).
|
|
293
|
+
- Only surface tokens produced in the last, post-tool LLM step,
|
|
294
|
+
or a return_direct tool's output.
|
|
263
295
|
"""
|
|
264
|
-
|
|
265
|
-
|
|
296
|
+
# Step-scoped state
|
|
297
|
+
step_buffer: list[str] = []
|
|
298
|
+
step_has_tool_calls = False
|
|
299
|
+
|
|
300
|
+
# Run-scoped state
|
|
301
|
+
pending_tools = 0
|
|
302
|
+
committed_any_text = False
|
|
303
|
+
|
|
304
|
+
def _reset_step():
|
|
305
|
+
nonlocal step_has_tool_calls
|
|
306
|
+
step_buffer.clear()
|
|
307
|
+
step_has_tool_calls = False
|
|
266
308
|
|
|
267
309
|
async for ev in self.handler.stream_events():
|
|
268
|
-
#
|
|
310
|
+
# ---- 1) Capture tool outputs for downstream logging/telemetry ----
|
|
269
311
|
if isinstance(ev, ToolCallResult):
|
|
270
312
|
if hasattr(self.agent_instance, "_add_tool_output"):
|
|
271
313
|
# pylint: disable=W0212
|
|
272
|
-
self.agent_instance._add_tool_output(
|
|
273
|
-
|
|
274
|
-
|
|
314
|
+
self.agent_instance._add_tool_output(ev.tool_name, str(ev.tool_output))
|
|
315
|
+
|
|
316
|
+
pending_tools = max(0, pending_tools - 1)
|
|
275
317
|
|
|
276
|
-
|
|
318
|
+
# Return-direct short-circuit: surface tool output as the final answer
|
|
319
|
+
if getattr(ev, "return_direct", False):
|
|
320
|
+
yield str(ev.tool_output)
|
|
321
|
+
committed_any_text = True
|
|
322
|
+
# Do not early-break; keep draining events safely.
|
|
323
|
+
|
|
324
|
+
# ---- 2) Progress callback plumbing (safe and optional) ----
|
|
277
325
|
if self.agent_instance.agent_progress_callback:
|
|
278
|
-
|
|
279
|
-
if self._is_tool_related_event(ev):
|
|
326
|
+
if is_tool_related_event(ev):
|
|
280
327
|
try:
|
|
281
328
|
event_id = get_event_id(ev)
|
|
282
329
|
await self._handle_progress_callback(ev, event_id)
|
|
283
|
-
except
|
|
284
|
-
logging.warning(f"
|
|
285
|
-
continue
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logging.warning(f"[progress-callback] skipping event: {e}")
|
|
286
332
|
|
|
287
|
-
#
|
|
333
|
+
# ---- 3) Step boundaries & gating logic ----
|
|
334
|
+
# New step starts: clear per-step state
|
|
335
|
+
if isinstance(ev, AgentInput):
|
|
336
|
+
_reset_step()
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
# Streaming deltas (provisional)
|
|
288
340
|
if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
hasattr(ev, "tool_calls")
|
|
293
|
-
and not ev.tool_calls
|
|
294
|
-
and had_tool_calls
|
|
295
|
-
and not transitioned_to_prose
|
|
296
|
-
):
|
|
297
|
-
yield "\n\n"
|
|
298
|
-
transitioned_to_prose = True
|
|
299
|
-
if hasattr(ev, "delta"):
|
|
300
|
-
yield ev.delta
|
|
301
|
-
elif (
|
|
302
|
-
hasattr(ev, "tool_calls")
|
|
303
|
-
and not ev.tool_calls
|
|
304
|
-
and hasattr(ev, "delta")
|
|
305
|
-
and transitioned_to_prose
|
|
306
|
-
):
|
|
307
|
-
yield ev.delta
|
|
341
|
+
# If the model is constructing a function call, LlamaIndex will attach tool_calls here
|
|
342
|
+
if getattr(ev, "tool_calls", None):
|
|
343
|
+
step_has_tool_calls = True
|
|
308
344
|
|
|
309
|
-
|
|
345
|
+
delta = getattr(ev, "delta", None)
|
|
346
|
+
if not delta:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
# Always buffer first
|
|
350
|
+
step_buffer.append(delta)
|
|
351
|
+
|
|
352
|
+
# Optional "optimistic" UX: show live typing but be ready to roll it back
|
|
353
|
+
if self.stream_policy == "optimistic_live" and pending_tools == 0 and not step_has_tool_calls:
|
|
354
|
+
yield delta
|
|
355
|
+
|
|
356
|
+
continue
|
|
357
|
+
|
|
358
|
+
# Step end: decide to commit or drop
|
|
359
|
+
if isinstance(ev, AgentOutput):
|
|
360
|
+
n_calls = len(getattr(ev, "tool_calls", []) or [])
|
|
361
|
+
|
|
362
|
+
if n_calls == 0:
|
|
363
|
+
# Final text step -> commit
|
|
364
|
+
if self.stream_policy == "final_only":
|
|
365
|
+
# We held everything; now stream it out in order.
|
|
366
|
+
for chunk in step_buffer:
|
|
367
|
+
yield chunk
|
|
368
|
+
# In optimistic mode, UI already saw these chunks live.
|
|
369
|
+
|
|
370
|
+
committed_any_text = committed_any_text or bool(step_buffer)
|
|
371
|
+
_reset_step()
|
|
372
|
+
|
|
373
|
+
else:
|
|
374
|
+
# Planning/tool step -> drop buffer
|
|
375
|
+
if self.stream_policy == "optimistic_live" and step_buffer:
|
|
376
|
+
# Tell the UI to roll back the ephemeral message
|
|
377
|
+
# (only if your frontend supports it)
|
|
378
|
+
yield self.rollback_token
|
|
379
|
+
|
|
380
|
+
_reset_step()
|
|
381
|
+
pending_tools += n_calls
|
|
382
|
+
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
# ---- 4) Finish: await the underlying handler for the final result ----
|
|
310
386
|
try:
|
|
311
387
|
self.final_response_container["resp"] = await self.handler
|
|
312
388
|
except Exception as e:
|
|
313
389
|
error_str = str(e).lower()
|
|
314
390
|
if "rate limit" in error_str or "429" in error_str:
|
|
315
|
-
logging.error(f"[RATE_LIMIT_ERROR]
|
|
391
|
+
logging.error(f"[RATE_LIMIT_ERROR] {e}")
|
|
316
392
|
self.final_response_container["resp"] = AgentResponse(
|
|
317
393
|
response="Rate limit exceeded. Please try again later.",
|
|
318
394
|
source_nodes=[],
|
|
319
395
|
metadata={"error_type": "rate_limit", "original_error": str(e)},
|
|
320
396
|
)
|
|
321
397
|
else:
|
|
322
|
-
logging.error(f"[STREAM_ERROR]
|
|
323
|
-
logging.error(
|
|
324
|
-
f"[STREAM_ERROR] Full traceback: {traceback.format_exc()}"
|
|
325
|
-
)
|
|
398
|
+
logging.error(f"[STREAM_ERROR] {e}")
|
|
326
399
|
self.final_response_container["resp"] = AgentResponse(
|
|
327
400
|
response="Response completion Error",
|
|
328
401
|
source_nodes=[],
|
|
329
402
|
metadata={"error_type": "general", "original_error": str(e)},
|
|
330
403
|
)
|
|
331
404
|
finally:
|
|
332
|
-
#
|
|
405
|
+
# If nothing was ever committed and we ended right after a tool,
|
|
406
|
+
# assume that tool's output is the "final answer" (common with return_direct).
|
|
333
407
|
self.stream_complete_event.set()
|
|
334
408
|
|
|
335
|
-
def
|
|
409
|
+
async def _handle_progress_callback(self, event, event_id: str):
|
|
336
410
|
"""
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
This should only return True for events that represent actual tool calls or tool outputs,
|
|
340
|
-
not for streaming text deltas or other LLM response events.
|
|
341
|
-
|
|
342
|
-
Args:
|
|
343
|
-
event: The stream event to check
|
|
344
|
-
|
|
345
|
-
Returns:
|
|
346
|
-
bool: True if this event should be tracked for tool purposes
|
|
411
|
+
Fan out progress events to the user's callback (sync or async). Mirrors your existing logic.
|
|
347
412
|
"""
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
return True
|
|
351
|
-
|
|
352
|
-
has_tool_id = hasattr(event, "tool_id") and event.tool_id
|
|
353
|
-
has_delta = hasattr(event, "delta") and event.delta
|
|
354
|
-
has_tool_name = hasattr(event, "tool_name") and event.tool_name
|
|
355
|
-
|
|
356
|
-
# We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
|
|
357
|
-
# but still avoid streaming deltas
|
|
358
|
-
if (has_tool_id or has_tool_name) and not has_delta:
|
|
359
|
-
return True
|
|
413
|
+
cb = self.agent_instance.agent_progress_callback
|
|
414
|
+
is_async = asyncio.iscoroutinefunction(cb)
|
|
360
415
|
|
|
361
|
-
# Everything else (streaming deltas, agent outputs, workflow events, etc.)
|
|
362
|
-
# should NOT be tracked as tool events
|
|
363
|
-
return False
|
|
364
|
-
|
|
365
|
-
async def _handle_progress_callback(self, event, event_id: str):
|
|
366
|
-
"""Handle progress callback events for different event types with proper context propagation."""
|
|
367
416
|
try:
|
|
368
417
|
if isinstance(event, ToolCall):
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
msg={
|
|
376
|
-
"tool_name": event.tool_name,
|
|
377
|
-
"arguments": json.dumps(event.tool_kwargs),
|
|
378
|
-
},
|
|
379
|
-
event_id=event_id,
|
|
380
|
-
)
|
|
418
|
+
payload = {
|
|
419
|
+
"tool_name": event.tool_name,
|
|
420
|
+
"arguments": json.dumps(getattr(event, "tool_kwargs", {})),
|
|
421
|
+
}
|
|
422
|
+
if is_async:
|
|
423
|
+
await cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
|
|
381
424
|
else:
|
|
382
|
-
|
|
383
|
-
self.agent_instance.agent_progress_callback(
|
|
384
|
-
status_type=AgentStatusType.TOOL_CALL,
|
|
385
|
-
msg={
|
|
386
|
-
"tool_name": event.tool_name,
|
|
387
|
-
"arguments": json.dumps(event.tool_kwargs),
|
|
388
|
-
},
|
|
389
|
-
event_id=event_id,
|
|
390
|
-
)
|
|
425
|
+
cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
|
|
391
426
|
|
|
392
427
|
elif isinstance(event, ToolCallResult):
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
msg={
|
|
400
|
-
"tool_name": event.tool_name,
|
|
401
|
-
"content": str(event.tool_output),
|
|
402
|
-
},
|
|
403
|
-
event_id=event_id,
|
|
404
|
-
)
|
|
428
|
+
payload = {
|
|
429
|
+
"tool_name": event.tool_name,
|
|
430
|
+
"content": str(event.tool_output),
|
|
431
|
+
}
|
|
432
|
+
if is_async:
|
|
433
|
+
await cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
|
|
405
434
|
else:
|
|
406
|
-
|
|
407
|
-
status_type=AgentStatusType.TOOL_OUTPUT,
|
|
408
|
-
msg={
|
|
409
|
-
"tool_name": event.tool_name,
|
|
410
|
-
"content": str(event.tool_output),
|
|
411
|
-
},
|
|
412
|
-
event_id=event_id,
|
|
413
|
-
)
|
|
435
|
+
cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
|
|
414
436
|
|
|
415
437
|
elif isinstance(event, AgentInput):
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
msg=
|
|
419
|
-
|
|
420
|
-
|
|
438
|
+
payload = {"content": f"Agent input: {getattr(event, 'input', '')}"}
|
|
439
|
+
if is_async:
|
|
440
|
+
await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
441
|
+
else:
|
|
442
|
+
cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
421
443
|
|
|
422
444
|
elif isinstance(event, AgentOutput):
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
msg=
|
|
426
|
-
|
|
427
|
-
|
|
445
|
+
payload = {"content": f"Agent output: {getattr(event, 'response', '')}"}
|
|
446
|
+
if is_async:
|
|
447
|
+
await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
448
|
+
else:
|
|
449
|
+
cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
|
|
428
450
|
|
|
429
451
|
except Exception as e:
|
|
452
|
+
logging.error(f"[progress-callback] Exception: {e}")
|
|
453
|
+
logging.error(traceback.format_exc())
|
|
430
454
|
|
|
431
|
-
|
|
432
|
-
logging.error(f"Traceback: {traceback.format_exc()}")
|
|
433
|
-
# Continue execution despite callback errors
|
|
434
|
-
|
|
435
|
-
def create_streaming_response(
|
|
436
|
-
self, user_metadata: Dict[str, Any]
|
|
437
|
-
) -> "StreamingResponseAdapter":
|
|
455
|
+
def create_streaming_response(self, user_metadata: Dict[str, Any]) -> "StreamingResponseAdapter":
|
|
438
456
|
"""
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
Args:
|
|
442
|
-
user_metadata: User metadata dictionary to update
|
|
443
|
-
|
|
444
|
-
Returns:
|
|
445
|
-
StreamingResponseAdapter: Configured streaming adapter
|
|
457
|
+
Build the adapter with post-processing wired in.
|
|
446
458
|
"""
|
|
447
459
|
post_process_task = create_stream_post_processing_task(
|
|
448
460
|
self.stream_complete_event,
|
|
@@ -454,8 +466,8 @@ class FunctionCallingStreamHandler:
|
|
|
454
466
|
|
|
455
467
|
return StreamingResponseAdapter(
|
|
456
468
|
async_response_gen=self.process_stream_events,
|
|
457
|
-
response="",
|
|
458
|
-
metadata={},
|
|
469
|
+
response="", # will be set by post-processing
|
|
470
|
+
metadata={}, # will be set by post-processing
|
|
459
471
|
post_process_task=post_process_task,
|
|
460
472
|
)
|
|
461
473
|
|
|
@@ -493,7 +505,7 @@ class ReActStreamHandler:
|
|
|
493
505
|
# Handle progress callbacks if available - this is the key missing piece!
|
|
494
506
|
if self.agent_instance.agent_progress_callback:
|
|
495
507
|
# Only track events that are actual tool-related events
|
|
496
|
-
if
|
|
508
|
+
if is_tool_related_event(event):
|
|
497
509
|
try:
|
|
498
510
|
# Get event ID from LlamaIndex event
|
|
499
511
|
event_id = get_event_id(event)
|
|
@@ -603,36 +615,6 @@ class ReActStreamHandler:
|
|
|
603
615
|
# Signal that stream processing is complete
|
|
604
616
|
self.stream_complete_event.set()
|
|
605
617
|
|
|
606
|
-
def _is_tool_related_event(self, event) -> bool:
|
|
607
|
-
"""
|
|
608
|
-
Determine if an event is actually tool-related and should be tracked.
|
|
609
|
-
|
|
610
|
-
This should only return True for events that represent actual tool calls or tool outputs,
|
|
611
|
-
not for streaming text deltas or other LLM response events.
|
|
612
|
-
|
|
613
|
-
Args:
|
|
614
|
-
event: The stream event to check
|
|
615
|
-
|
|
616
|
-
Returns:
|
|
617
|
-
bool: True if this event should be tracked for tool purposes
|
|
618
|
-
"""
|
|
619
|
-
# Track explicit tool events from LlamaIndex workflow
|
|
620
|
-
if isinstance(event, (ToolCall, ToolCallResult)):
|
|
621
|
-
return True
|
|
622
|
-
|
|
623
|
-
has_tool_id = hasattr(event, "tool_id") and event.tool_id
|
|
624
|
-
has_delta = hasattr(event, "delta") and event.delta
|
|
625
|
-
has_tool_name = hasattr(event, "tool_name") and event.tool_name
|
|
626
|
-
|
|
627
|
-
# We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
|
|
628
|
-
# but still avoid streaming deltas
|
|
629
|
-
if (has_tool_id or has_tool_name) and not has_delta:
|
|
630
|
-
return True
|
|
631
|
-
|
|
632
|
-
# Everything else (streaming deltas, agent outputs, workflow events, etc.)
|
|
633
|
-
# should NOT be tracked as tool events
|
|
634
|
-
return False
|
|
635
|
-
|
|
636
618
|
def create_streaming_response(
|
|
637
619
|
self, user_metadata: Dict[str, Any]
|
|
638
620
|
) -> "StreamingResponseAdapter":
|
vectara_agentic/llm_utils.py
CHANGED
|
@@ -182,7 +182,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
|
|
|
182
182
|
) from e
|
|
183
183
|
additional_kwargs = {"seed": 42}
|
|
184
184
|
if model_name in [
|
|
185
|
-
"deepseek-ai/DeepSeek-V3.1",
|
|
185
|
+
"deepseek-ai/DeepSeek-V3.1",
|
|
186
186
|
"deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
|
|
187
187
|
"openai/gpt-oss-120b", "openai/gpt-oss-20b",
|
|
188
188
|
]:
|
|
@@ -72,7 +72,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
72
72
|
raise ValueError(f"Expected inputs to be of type {self.InputsModel}")
|
|
73
73
|
|
|
74
74
|
query = ev.inputs.query
|
|
75
|
-
await ctx.set("original_query", query)
|
|
75
|
+
await ctx.store.set("original_query", query)
|
|
76
76
|
|
|
77
77
|
required_attrs = ["agent", "llm", "tools"]
|
|
78
78
|
for attr in required_attrs:
|
|
@@ -81,15 +81,15 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
81
81
|
f"{attr.capitalize()} not provided to workflow Start Event."
|
|
82
82
|
)
|
|
83
83
|
|
|
84
|
-
await ctx.set("agent", ev.agent)
|
|
85
|
-
await ctx.set("llm", ev.llm)
|
|
86
|
-
await ctx.set("tools", ev.tools)
|
|
87
|
-
await ctx.set("verbose", getattr(ev, "verbose", False))
|
|
84
|
+
await ctx.store.set("agent", ev.agent)
|
|
85
|
+
await ctx.store.set("llm", ev.llm)
|
|
86
|
+
await ctx.store.set("tools", ev.tools)
|
|
87
|
+
await ctx.store.set("verbose", getattr(ev, "verbose", False))
|
|
88
88
|
|
|
89
89
|
chat_history = [str(msg) for msg in ev.agent.memory.get()]
|
|
90
90
|
|
|
91
|
-
llm = await ctx.get("llm")
|
|
92
|
-
original_query = await ctx.get("original_query")
|
|
91
|
+
llm = await ctx.store.get("llm")
|
|
92
|
+
original_query = await ctx.store.get("original_query")
|
|
93
93
|
response = llm.complete(
|
|
94
94
|
f"""
|
|
95
95
|
Given a user question, and a list of tools, output a list of
|
|
@@ -140,7 +140,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
140
140
|
# We use the original query as a single question fallback
|
|
141
141
|
sub_questions = [original_query]
|
|
142
142
|
|
|
143
|
-
await ctx.set("sub_question_count", len(sub_questions))
|
|
143
|
+
await ctx.store.set("sub_question_count", len(sub_questions))
|
|
144
144
|
for question in sub_questions:
|
|
145
145
|
ctx.send_event(self.QueryEvent(question=question))
|
|
146
146
|
|
|
@@ -151,13 +151,13 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
151
151
|
"""
|
|
152
152
|
Given a sub-question, return the answer to the sub-question, using the agent.
|
|
153
153
|
"""
|
|
154
|
-
if await ctx.get("verbose"):
|
|
154
|
+
if await ctx.store.get("verbose"):
|
|
155
155
|
logging.info(f"Sub-question is {ev.question}")
|
|
156
|
-
agent = await ctx.get("agent")
|
|
156
|
+
agent = await ctx.store.get("agent")
|
|
157
157
|
question = ev.question
|
|
158
158
|
response = await agent.achat(question)
|
|
159
159
|
answer = str(response)
|
|
160
|
-
await ctx.set("qna", await ctx.get("qna", []) + [(question, answer)])
|
|
160
|
+
await ctx.store.set("qna", await ctx.store.get("qna", []) + [(question, answer)])
|
|
161
161
|
return self.AnswerEvent(question=question, answer=answer)
|
|
162
162
|
|
|
163
163
|
@step
|
|
@@ -166,7 +166,7 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
166
166
|
Given a list of answers to sub-questions, combine them into a single answer.
|
|
167
167
|
"""
|
|
168
168
|
ready = ctx.collect_events(
|
|
169
|
-
ev, [self.AnswerEvent] * await ctx.get("sub_question_count")
|
|
169
|
+
ev, [self.AnswerEvent] * await ctx.store.get("sub_question_count")
|
|
170
170
|
)
|
|
171
171
|
if ready is None:
|
|
172
172
|
return None
|
|
@@ -180,18 +180,18 @@ class SubQuestionQueryWorkflow(Workflow):
|
|
|
180
180
|
each of which has been answered. Combine the answers to all the sub-questions
|
|
181
181
|
into a single answer to the original question.
|
|
182
182
|
|
|
183
|
-
Original question: {await ctx.get('original_query')}
|
|
183
|
+
Original question: {await ctx.store.get('original_query')}
|
|
184
184
|
|
|
185
185
|
Sub-questions and answers:
|
|
186
186
|
{answers}
|
|
187
187
|
"""
|
|
188
|
-
if await ctx.get("verbose"):
|
|
188
|
+
if await ctx.store.get("verbose"):
|
|
189
189
|
logging.info(f"Final prompt is {prompt}")
|
|
190
190
|
|
|
191
|
-
llm = await ctx.get("llm")
|
|
191
|
+
llm = await ctx.store.get("llm")
|
|
192
192
|
response = llm.complete(prompt)
|
|
193
193
|
|
|
194
|
-
if await ctx.get("verbose"):
|
|
194
|
+
if await ctx.store.get("verbose"):
|
|
195
195
|
logging.info(f"Final response is {response}")
|
|
196
196
|
return StopEvent(result=self.OutputsModel(response=str(response)))
|
|
197
197
|
|
|
@@ -246,33 +246,33 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
246
246
|
raise ValueError(f"Expected inputs to be of type {self.InputsModel}")
|
|
247
247
|
if hasattr(ev, "inputs"):
|
|
248
248
|
query = ev.inputs.query
|
|
249
|
-
await ctx.set("original_query", query)
|
|
249
|
+
await ctx.store.set("original_query", query)
|
|
250
250
|
|
|
251
251
|
if hasattr(ev, "agent"):
|
|
252
|
-
await ctx.set("agent", ev.agent)
|
|
252
|
+
await ctx.store.set("agent", ev.agent)
|
|
253
253
|
else:
|
|
254
254
|
raise ValueError("Agent not provided to workflow Start Event.")
|
|
255
255
|
chat_history = [str(msg) for msg in ev.agent.memory.get()]
|
|
256
256
|
|
|
257
257
|
if hasattr(ev, "llm"):
|
|
258
|
-
await ctx.set("llm", ev.llm)
|
|
258
|
+
await ctx.store.set("llm", ev.llm)
|
|
259
259
|
else:
|
|
260
260
|
raise ValueError("LLM not provided to workflow Start Event.")
|
|
261
261
|
|
|
262
262
|
if hasattr(ev, "tools"):
|
|
263
|
-
await ctx.set("tools", ev.tools)
|
|
263
|
+
await ctx.store.set("tools", ev.tools)
|
|
264
264
|
else:
|
|
265
265
|
raise ValueError("Tools not provided to workflow Start Event.")
|
|
266
266
|
|
|
267
267
|
if hasattr(ev, "verbose"):
|
|
268
|
-
await ctx.set("verbose", ev.verbose)
|
|
268
|
+
await ctx.store.set("verbose", ev.verbose)
|
|
269
269
|
else:
|
|
270
|
-
await ctx.set("verbose", False)
|
|
270
|
+
await ctx.store.set("verbose", False)
|
|
271
271
|
|
|
272
|
-
original_query = await ctx.get("original_query")
|
|
272
|
+
original_query = await ctx.store.get("original_query")
|
|
273
273
|
if ev.verbose:
|
|
274
274
|
logging.info(f"Query is {original_query}")
|
|
275
|
-
llm = await ctx.get("llm")
|
|
275
|
+
llm = await ctx.store.get("llm")
|
|
276
276
|
response = llm.complete(
|
|
277
277
|
f"""
|
|
278
278
|
Given a user question, and a list of tools, output a list of
|
|
@@ -320,8 +320,8 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
320
320
|
|
|
321
321
|
sub_questions = response_obj.get("sub_questions")
|
|
322
322
|
|
|
323
|
-
await ctx.set("sub_questions", sub_questions)
|
|
324
|
-
if await ctx.get("verbose"):
|
|
323
|
+
await ctx.store.set("sub_questions", sub_questions)
|
|
324
|
+
if await ctx.store.get("verbose"):
|
|
325
325
|
logging.info(f"Sub-questions are {sub_questions}")
|
|
326
326
|
|
|
327
327
|
return self.QueryEvent(question=sub_questions[0], prev_answer="", num=0)
|
|
@@ -333,10 +333,10 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
333
333
|
"""
|
|
334
334
|
Given a sub-question, return the answer to the sub-question, using the agent.
|
|
335
335
|
"""
|
|
336
|
-
if await ctx.get("verbose"):
|
|
336
|
+
if await ctx.store.get("verbose"):
|
|
337
337
|
logging.info(f"Sub-question is {ev.question}")
|
|
338
|
-
agent = await ctx.get("agent")
|
|
339
|
-
sub_questions = await ctx.get("sub_questions")
|
|
338
|
+
agent = await ctx.store.get("agent")
|
|
339
|
+
sub_questions = await ctx.store.get("sub_questions")
|
|
340
340
|
question = ev.question
|
|
341
341
|
if ev.prev_answer:
|
|
342
342
|
prev_question = sub_questions[ev.num - 1]
|
|
@@ -348,11 +348,11 @@ class SequentialSubQuestionsWorkflow(Workflow):
|
|
|
348
348
|
else:
|
|
349
349
|
response = await agent.achat(question)
|
|
350
350
|
answer = response.response
|
|
351
|
-
if await ctx.get("verbose"):
|
|
351
|
+
if await ctx.store.get("verbose"):
|
|
352
352
|
logging.info(f"Answer is {answer}")
|
|
353
353
|
|
|
354
354
|
if ev.num + 1 < len(sub_questions):
|
|
355
|
-
await ctx.set("qna", await ctx.get("qna", []) + [(question, answer)])
|
|
355
|
+
await ctx.store.set("qna", await ctx.store.get("qna", []) + [(question, answer)])
|
|
356
356
|
return self.QueryEvent(
|
|
357
357
|
question=sub_questions[ev.num + 1],
|
|
358
358
|
prev_answer=answer,
|
vectara_agentic/tools.py
CHANGED
|
@@ -66,7 +66,6 @@ LI_packages = {
|
|
|
66
66
|
},
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
|
|
70
69
|
def normalize_url(url):
|
|
71
70
|
"""
|
|
72
71
|
Normalize URL for consistent comparison by handling percent-encoding.
|
|
@@ -90,7 +89,6 @@ def normalize_url(url):
|
|
|
90
89
|
logging.warning(f"Error normalizing URL '{url}': {e}")
|
|
91
90
|
return url
|
|
92
91
|
|
|
93
|
-
|
|
94
92
|
def citation_appears_in_text(citation_text, citation_url, response_text):
|
|
95
93
|
"""
|
|
96
94
|
Check if citation appears in response text using multiple matching strategies.
|