vectara-agentic 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

Files changed (53) hide show
  1. tests/__init__.py +7 -0
  2. tests/conftest.py +312 -0
  3. tests/endpoint.py +54 -17
  4. tests/run_tests.py +111 -0
  5. tests/test_agent.py +10 -5
  6. tests/test_agent_type.py +82 -143
  7. tests/test_api_endpoint.py +4 -0
  8. tests/test_bedrock.py +4 -0
  9. tests/test_fallback.py +4 -0
  10. tests/test_gemini.py +28 -45
  11. tests/test_groq.py +4 -0
  12. tests/test_private_llm.py +11 -2
  13. tests/test_return_direct.py +6 -2
  14. tests/test_serialization.py +4 -0
  15. tests/test_streaming.py +88 -0
  16. tests/test_tools.py +10 -82
  17. tests/test_vectara_llms.py +4 -0
  18. tests/test_vhc.py +66 -0
  19. tests/test_workflow.py +4 -0
  20. vectara_agentic/__init__.py +27 -4
  21. vectara_agentic/_callback.py +65 -67
  22. vectara_agentic/_observability.py +30 -30
  23. vectara_agentic/_version.py +1 -1
  24. vectara_agentic/agent.py +375 -848
  25. vectara_agentic/agent_config.py +15 -14
  26. vectara_agentic/agent_core/__init__.py +22 -0
  27. vectara_agentic/agent_core/factory.py +501 -0
  28. vectara_agentic/{_prompts.py → agent_core/prompts.py} +3 -35
  29. vectara_agentic/agent_core/serialization.py +345 -0
  30. vectara_agentic/agent_core/streaming.py +495 -0
  31. vectara_agentic/agent_core/utils/__init__.py +34 -0
  32. vectara_agentic/agent_core/utils/hallucination.py +202 -0
  33. vectara_agentic/agent_core/utils/logging.py +52 -0
  34. vectara_agentic/agent_core/utils/prompt_formatting.py +56 -0
  35. vectara_agentic/agent_core/utils/schemas.py +87 -0
  36. vectara_agentic/agent_core/utils/tools.py +125 -0
  37. vectara_agentic/agent_endpoint.py +4 -6
  38. vectara_agentic/db_tools.py +37 -12
  39. vectara_agentic/llm_utils.py +41 -42
  40. vectara_agentic/sub_query_workflow.py +9 -14
  41. vectara_agentic/tool_utils.py +138 -83
  42. vectara_agentic/tools.py +36 -21
  43. vectara_agentic/tools_catalog.py +16 -16
  44. vectara_agentic/types.py +98 -6
  45. {vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.0.dist-info}/METADATA +69 -30
  46. vectara_agentic-0.4.0.dist-info/RECORD +50 -0
  47. tests/test_agent_planning.py +0 -64
  48. tests/test_hhem.py +0 -100
  49. vectara_agentic/hhem.py +0 -82
  50. vectara_agentic-0.3.3.dist-info/RECORD +0 -39
  51. {vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.0.dist-info}/WHEEL +0 -0
  52. {vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.0.dist-info}/licenses/LICENSE +0 -0
  53. {vectara_agentic-0.3.3.dist-info → vectara_agentic-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,495 @@
1
+ """
2
+ Streaming utilities for agent responses.
3
+
4
+ This module provides streaming response handling, adapters, and utilities
5
+ for managing asynchronous agent interactions with proper synchronization.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import uuid
11
+ import json
12
+ from typing import Callable, Any, Dict, AsyncIterator
13
+ from collections import OrderedDict
14
+
15
+ from ..types import AgentResponse
16
+ from .utils.hallucination import analyze_hallucinations
17
+
18
+ class ToolEventTracker:
19
+ """
20
+ Tracks event IDs for tool calls to ensure consistent pairing of tool calls and outputs.
21
+
22
+ This class maintains a mapping between tool identifiers and event IDs to ensure
23
+ that related tool call and tool output events share the same event_id for proper
24
+ frontend grouping.
25
+ """
26
+
27
+ def __init__(self):
28
+ self.event_ids = OrderedDict() # tool_call_id -> event_id mapping
29
+ self.fallback_counter = 0 # For events without identifiable tool_ids
30
+
31
+ def get_event_id(self, event) -> str:
32
+ """
33
+ Get a consistent event ID for a tool event.
34
+
35
+ Args:
36
+ event: The tool event object
37
+
38
+ Returns:
39
+ str: Consistent event ID for this tool execution
40
+ """
41
+ # Try to get tool_id from the event first
42
+ tool_id = getattr(event, "tool_id", None)
43
+
44
+ # If we have a tool_id, use it directly (any format from any LLM provider)
45
+ if tool_id:
46
+ pass # We already have tool_id, just use it
47
+ # If no tool_id, try to derive one from tool_name (for LlamaIndex events)
48
+ elif hasattr(event, "tool_name") and event.tool_name:
49
+ tool_id = f"{event.tool_name}_{self.fallback_counter}"
50
+ self.fallback_counter += 1
51
+ # If still no tool_id, create a generic one based on event type
52
+ else:
53
+ event_type = type(event).__name__
54
+ tool_id = f"{event_type.lower()}_{self.fallback_counter}"
55
+ self.fallback_counter += 1
56
+
57
+ # Get or create event_id for this tool_id
58
+ if tool_id not in self.event_ids:
59
+ self.event_ids[tool_id] = str(uuid.uuid4())
60
+
61
+ return self.event_ids[tool_id]
62
+
63
+ def clear_old_entries(self, max_entries: int = 100):
64
+ """Clear old entries to prevent unbounded memory growth."""
65
+ while len(self.event_ids) > max_entries // 2:
66
+ self.event_ids.popitem(last=False) # Remove oldest entry
67
+
68
+
69
+ class StreamingResponseAdapter:
70
+ """
71
+ Adapter class that provides a LlamaIndex-compatible streaming response interface.
72
+
73
+ This class bridges custom streaming logic with AgentStreamingResponse expectations
74
+ by implementing the required protocol methods and properties.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ async_response_gen: Callable[[], Any] | None = None,
80
+ response: str = "",
81
+ metadata: Dict[str, Any] | None = None,
82
+ post_process_task: Any = None,
83
+ ) -> None:
84
+ """
85
+ Initialize the streaming response adapter.
86
+
87
+ Args:
88
+ async_response_gen: Async generator function for streaming tokens
89
+ response: Final response text (filled after streaming completes)
90
+ metadata: Response metadata dictionary
91
+ post_process_task: Async task that will populate response/metadata
92
+ """
93
+ self.async_response_gen = async_response_gen
94
+ self.response = response
95
+ self.metadata = metadata or {}
96
+ self.post_process_task = post_process_task
97
+
98
+ async def aget_response(self) -> AgentResponse:
99
+ """
100
+ Async version that waits for post-processing to complete.
101
+ """
102
+ if self.post_process_task:
103
+ final_response = await self.post_process_task
104
+ # Update our state with the final response
105
+ self.response = final_response.response
106
+ self.metadata = final_response.metadata or {}
107
+ return AgentResponse(response=self.response, metadata=self.metadata)
108
+
109
+ def get_response(self) -> AgentResponse:
110
+ """
111
+ Return an AgentResponse using the current state.
112
+
113
+ Required by the _StreamProto protocol for AgentStreamingResponse compatibility.
114
+ """
115
+ return AgentResponse(response=self.response, metadata=self.metadata)
116
+
117
+ def wait_for_completion(self) -> None:
118
+ """
119
+ Wait for post-processing to complete and update metadata.
120
+ This should be called after streaming finishes but before accessing metadata.
121
+ """
122
+ if self.post_process_task and not self.post_process_task.done():
123
+ return
124
+ if self.post_process_task and self.post_process_task.done():
125
+ try:
126
+ final_response = self.post_process_task.result()
127
+ if hasattr(final_response, "metadata") and final_response.metadata:
128
+ # Update our metadata from the completed task
129
+ self.metadata.update(final_response.metadata)
130
+ except Exception as e:
131
+ logging.error(
132
+ f"Error during post-processing: {e}. "
133
+ "Ensure the post-processing task is correctly implemented."
134
+ )
135
+
136
+
137
+ def extract_response_text_from_chat_message(response_text: Any) -> str:
138
+ """
139
+ Extract text content from various response formats.
140
+
141
+ Handles ChatMessage objects with blocks, content attributes, or plain strings.
142
+
143
+ Args:
144
+ response_text: Response object that may be ChatMessage, string, or other format
145
+
146
+ Returns:
147
+ str: Extracted text content
148
+ """
149
+ # Handle case where response is a ChatMessage object
150
+ if hasattr(response_text, "content"):
151
+ return response_text.content
152
+ elif hasattr(response_text, "blocks"):
153
+ # Extract text from ChatMessage blocks
154
+ text_parts = []
155
+ for block in response_text.blocks:
156
+ if hasattr(block, "text"):
157
+ text_parts.append(block.text)
158
+ return "".join(text_parts)
159
+ elif not isinstance(response_text, str):
160
+ return str(response_text)
161
+
162
+ return response_text
163
+
164
+
165
+ async def execute_post_stream_processing(
166
+ result: Any,
167
+ prompt: str,
168
+ agent_instance,
169
+ user_metadata: Dict[str, Any],
170
+ ) -> AgentResponse:
171
+ """
172
+ Execute post-stream processing on a completed result.
173
+
174
+ This function consolidates the common post-processing steps that happen
175
+ after streaming completes, including response extraction, formatting,
176
+ callbacks, and FCS calculation.
177
+
178
+ Args:
179
+ result: The completed result object from streaming
180
+ prompt: Original user prompt
181
+ agent_instance: Agent instance for callbacks and processing
182
+ user_metadata: User metadata to update with FCS scores
183
+
184
+ Returns:
185
+ AgentResponse: Processed final response
186
+ """
187
+ if result is None:
188
+ logging.warning("Received None result from streaming, returning empty response.")
189
+ return AgentResponse(
190
+ response="No response generated",
191
+ metadata=getattr(result, "metadata", {}),
192
+ )
193
+
194
+ # Ensure we have an AgentResponse object with a string response
195
+ if hasattr(result, "response"):
196
+ response_text = result.response
197
+ else:
198
+ response_text = str(result)
199
+
200
+ # Extract text from various response formats
201
+ response_text = extract_response_text_from_chat_message(response_text)
202
+
203
+ final = AgentResponse(
204
+ response=response_text,
205
+ metadata=getattr(result, "metadata", {}),
206
+ )
207
+
208
+ # Post-processing steps
209
+ # pylint: disable=protected-access
210
+ await agent_instance._aformat_for_lats(prompt, final)
211
+ if agent_instance.query_logging_callback:
212
+ agent_instance.query_logging_callback(prompt, final.response)
213
+
214
+ # Calculate factual consistency score
215
+
216
+ if agent_instance.vectara_api_key:
217
+ corrected_text, corrections = analyze_hallucinations(
218
+ query=prompt,
219
+ chat_history=agent_instance.memory.get(),
220
+ agent_response=final.response,
221
+ tools=agent_instance.tools,
222
+ vectara_api_key=agent_instance.vectara_api_key,
223
+ )
224
+ user_metadata["corrected_text"] = corrected_text
225
+ user_metadata["corrections"] = corrections
226
+
227
+ if not final.metadata:
228
+ final.metadata = {}
229
+ final.metadata.update(user_metadata)
230
+
231
+ if agent_instance.observability_enabled:
232
+ from .._observability import eval_fcs
233
+ eval_fcs()
234
+
235
+ return final
236
+
237
+
238
+ def create_stream_post_processing_task(
239
+ stream_complete_event: asyncio.Event,
240
+ final_response_container: Dict[str, Any],
241
+ prompt: str,
242
+ agent_instance,
243
+ user_metadata: Dict[str, Any],
244
+ ) -> asyncio.Task:
245
+ """
246
+ Create an async task for post-stream processing.
247
+
248
+ Args:
249
+ stream_complete_event: Event to wait for stream completion
250
+ final_response_container: Container with final response data
251
+ prompt: Original user prompt
252
+ agent_instance: Agent instance for callbacks and processing
253
+ user_metadata: User metadata to update with FCS scores
254
+
255
+ Returns:
256
+ asyncio.Task: Task that will process the final response
257
+ """
258
+
259
+ async def _post_process():
260
+ # Wait until the generator has finished and final response is populated
261
+ await stream_complete_event.wait()
262
+ result = final_response_container.get("resp")
263
+ return await execute_post_stream_processing(
264
+ result, prompt, agent_instance, user_metadata
265
+ )
266
+
267
+ async def _safe_post_process():
268
+ try:
269
+ return await _post_process()
270
+ except Exception:
271
+ import traceback
272
+
273
+ traceback.print_exc()
274
+ # Return empty response on error
275
+ return AgentResponse(response="", metadata={})
276
+
277
+ return asyncio.create_task(_safe_post_process())
278
+
279
+
280
+ class FunctionCallingStreamHandler:
281
+ """
282
+ Handles streaming for function calling agents with proper event processing.
283
+ """
284
+
285
+ def __init__(self, agent_instance, handler, prompt: str):
286
+ self.agent_instance = agent_instance
287
+ self.handler = handler
288
+ self.prompt = prompt
289
+ self.final_response_container = {"resp": None}
290
+ self.stream_complete_event = asyncio.Event()
291
+ self.event_tracker = ToolEventTracker()
292
+
293
+ async def process_stream_events(self) -> AsyncIterator[str]:
294
+ """
295
+ Process streaming events and yield text tokens.
296
+
297
+ Yields:
298
+ str: Text tokens from the streaming response
299
+ """
300
+ had_tool_calls = False
301
+ transitioned_to_prose = False
302
+ event_count = 0
303
+
304
+ async for ev in self.handler.stream_events():
305
+ event_count += 1
306
+
307
+ # Handle progress callbacks if available
308
+ if self.agent_instance.agent_progress_callback:
309
+ # Only track events that are actual tool-related events
310
+ if self._is_tool_related_event(ev):
311
+ event_id = self.event_tracker.get_event_id(ev)
312
+ await self._handle_progress_callback(ev, event_id)
313
+
314
+ # Process streaming text events
315
+ if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
316
+ if hasattr(ev, "tool_calls") and ev.tool_calls:
317
+ had_tool_calls = True
318
+ elif (
319
+ hasattr(ev, "tool_calls")
320
+ and not ev.tool_calls
321
+ and had_tool_calls
322
+ and not transitioned_to_prose
323
+ ):
324
+ yield "\n\n"
325
+ transitioned_to_prose = True
326
+ if hasattr(ev, "delta"):
327
+ yield ev.delta
328
+ elif (
329
+ hasattr(ev, "tool_calls")
330
+ and not ev.tool_calls
331
+ and hasattr(ev, "delta")
332
+ ):
333
+ yield ev.delta
334
+
335
+ # When stream is done, await the handler to get the final response
336
+ try:
337
+ self.final_response_container["resp"] = await self.handler
338
+ except Exception as e:
339
+ logging.error(f"Error processing stream events: {e}")
340
+ self.final_response_container["resp"] = type(
341
+ "AgentResponse",
342
+ (),
343
+ {
344
+ "response": "Response completion Error",
345
+ "source_nodes": [],
346
+ "metadata": None,
347
+ },
348
+ )()
349
+ finally:
350
+ # Clean up event tracker to prevent memory leaks
351
+ self.event_tracker.clear_old_entries()
352
+ # Signal that stream processing is complete
353
+ self.stream_complete_event.set()
354
+
355
+ def _is_tool_related_event(self, event) -> bool:
356
+ """
357
+ Determine if an event is actually tool-related and should be tracked.
358
+
359
+ This should only return True for events that represent actual tool calls or tool outputs,
360
+ not for streaming text deltas or other LLM response events.
361
+
362
+ Args:
363
+ event: The stream event to check
364
+
365
+ Returns:
366
+ bool: True if this event should be tracked for tool purposes
367
+ """
368
+ from llama_index.core.agent.workflow import (
369
+ ToolCall,
370
+ ToolCallResult,
371
+ )
372
+
373
+ # Track explicit tool events from LlamaIndex workflow
374
+ if isinstance(event, (ToolCall, ToolCallResult)):
375
+ return True
376
+
377
+ has_tool_id = hasattr(event, "tool_id") and event.tool_id
378
+ has_delta = hasattr(event, "delta") and event.delta
379
+ has_tool_name = hasattr(event, "tool_name") and event.tool_name
380
+
381
+ # We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
382
+ # but still avoid streaming deltas
383
+ if (has_tool_id or has_tool_name) and not has_delta:
384
+ return True
385
+
386
+ # Everything else (streaming deltas, agent outputs, workflow events, etc.)
387
+ # should NOT be tracked as tool events
388
+ return False
389
+
390
+ async def _handle_progress_callback(self, event, event_id: str):
391
+ """Handle progress callback events for different event types with proper context propagation."""
392
+ # Import here to avoid circular imports
393
+ from ..types import AgentStatusType
394
+ from llama_index.core.agent.workflow import (
395
+ ToolCall,
396
+ ToolCallResult,
397
+ AgentInput,
398
+ AgentOutput,
399
+ )
400
+
401
+ try:
402
+ if isinstance(event, ToolCall):
403
+ # Check if callback is async or sync
404
+ if asyncio.iscoroutinefunction(
405
+ self.agent_instance.agent_progress_callback
406
+ ):
407
+ await self.agent_instance.agent_progress_callback(
408
+ status_type=AgentStatusType.TOOL_CALL,
409
+ msg={
410
+ "tool_name": event.tool_name,
411
+ "arguments": json.dumps(event.tool_kwargs),
412
+ },
413
+ event_id=event_id,
414
+ )
415
+ else:
416
+ # For sync callbacks, ensure we call them properly
417
+ self.agent_instance.agent_progress_callback(
418
+ status_type=AgentStatusType.TOOL_CALL,
419
+ msg={
420
+ "tool_name": event.tool_name,
421
+ "arguments": json.dumps(event.tool_kwargs),
422
+ },
423
+ event_id=event_id,
424
+ )
425
+
426
+ elif isinstance(event, ToolCallResult):
427
+ # Check if callback is async or sync
428
+ if asyncio.iscoroutinefunction(
429
+ self.agent_instance.agent_progress_callback
430
+ ):
431
+ await self.agent_instance.agent_progress_callback(
432
+ status_type=AgentStatusType.TOOL_OUTPUT,
433
+ msg={
434
+ "tool_name": event.tool_name,
435
+ "content": str(event.tool_output),
436
+ },
437
+ event_id=event_id,
438
+ )
439
+ else:
440
+ self.agent_instance.agent_progress_callback(
441
+ status_type=AgentStatusType.TOOL_OUTPUT,
442
+ msg={
443
+ "tool_name": event.tool_name,
444
+ "content": str(event.tool_output),
445
+ },
446
+ event_id=event_id,
447
+ )
448
+
449
+ elif isinstance(event, AgentInput):
450
+ self.agent_instance.agent_progress_callback(
451
+ status_type=AgentStatusType.AGENT_UPDATE,
452
+ msg={"content": f"Agent input: {event.input}"},
453
+ event_id=event_id,
454
+ )
455
+
456
+ elif isinstance(event, AgentOutput):
457
+ self.agent_instance.agent_progress_callback(
458
+ status_type=AgentStatusType.AGENT_UPDATE,
459
+ msg={"content": f"Agent output: {event.response}"},
460
+ event_id=event_id,
461
+ )
462
+
463
+ except Exception as e:
464
+ import traceback
465
+
466
+ logging.error(f"Exception in progress callback: {e}")
467
+ logging.error(f"Traceback: {traceback.format_exc()}")
468
+ # Continue execution despite callback errors
469
+
470
+ def create_streaming_response(
471
+ self, user_metadata: Dict[str, Any]
472
+ ) -> "StreamingResponseAdapter":
473
+ """
474
+ Create a StreamingResponseAdapter with proper post-processing.
475
+
476
+ Args:
477
+ user_metadata: User metadata dictionary to update
478
+
479
+ Returns:
480
+ StreamingResponseAdapter: Configured streaming adapter
481
+ """
482
+ post_process_task = create_stream_post_processing_task(
483
+ self.stream_complete_event,
484
+ self.final_response_container,
485
+ self.prompt,
486
+ self.agent_instance,
487
+ user_metadata,
488
+ )
489
+
490
+ return StreamingResponseAdapter(
491
+ async_response_gen=self.process_stream_events,
492
+ response="", # will be filled post-stream
493
+ metadata={},
494
+ post_process_task=post_process_task,
495
+ )
@@ -0,0 +1,34 @@
1
+ """
2
+ Shared utilities for agent functionality.
3
+
4
+ This sub-module contains smaller, focused utility functions:
5
+ - prompt_formatting: Prompt formatting and templating
6
+ - schemas: Type conversion and schema handling
7
+ - tools: Tool validation and processing
8
+ - logging: Logging configuration and filters
9
+ """
10
+
11
+ # Import utilities for easy access
12
+ from .prompt_formatting import format_prompt, format_llm_compiler_prompt
13
+ from .schemas import get_field_type, JSON_TYPE_TO_PYTHON, PY_TYPES
14
+ from .tools import (
15
+ sanitize_tools_for_gemini,
16
+ validate_tool_consistency,
17
+ )
18
+ from .logging import IgnoreUnpickleableAttributeFilter, setup_agent_logging
19
+
20
+ __all__ = [
21
+ # Prompts
22
+ "format_prompt",
23
+ "format_llm_compiler_prompt",
24
+ # Schemas
25
+ "get_field_type",
26
+ "JSON_TYPE_TO_PYTHON",
27
+ "PY_TYPES",
28
+ # Tools
29
+ "sanitize_tools_for_gemini",
30
+ "validate_tool_consistency",
31
+ # Logging
32
+ "IgnoreUnpickleableAttributeFilter",
33
+ "setup_agent_logging",
34
+ ]