vectara-agentic 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. tests/__init__.py +1 -0
  2. tests/benchmark_models.py +547 -372
  3. tests/conftest.py +14 -12
  4. tests/endpoint.py +9 -5
  5. tests/run_tests.py +1 -0
  6. tests/test_agent.py +22 -9
  7. tests/test_agent_fallback_memory.py +4 -4
  8. tests/test_agent_memory_consistency.py +4 -4
  9. tests/test_agent_type.py +2 -0
  10. tests/test_api_endpoint.py +13 -13
  11. tests/test_bedrock.py +9 -1
  12. tests/test_fallback.py +18 -7
  13. tests/test_gemini.py +14 -40
  14. tests/test_groq.py +9 -1
  15. tests/test_private_llm.py +19 -6
  16. tests/test_react_error_handling.py +293 -0
  17. tests/test_react_memory.py +257 -0
  18. tests/test_react_streaming.py +135 -0
  19. tests/test_react_workflow_events.py +395 -0
  20. tests/test_return_direct.py +1 -0
  21. tests/test_serialization.py +58 -20
  22. tests/test_session_memory.py +11 -11
  23. tests/test_together.py +9 -1
  24. tests/test_tools.py +3 -1
  25. tests/test_vectara_llms.py +2 -2
  26. tests/test_vhc.py +7 -2
  27. tests/test_workflow.py +17 -11
  28. vectara_agentic/_callback.py +79 -21
  29. vectara_agentic/_version.py +1 -1
  30. vectara_agentic/agent.py +65 -27
  31. vectara_agentic/agent_core/serialization.py +5 -9
  32. vectara_agentic/agent_core/streaming.py +245 -64
  33. vectara_agentic/agent_core/utils/schemas.py +2 -2
  34. vectara_agentic/llm_utils.py +4 -2
  35. {vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.3.dist-info}/METADATA +127 -31
  36. vectara_agentic-0.4.3.dist-info/RECORD +58 -0
  37. vectara_agentic-0.4.2.dist-info/RECORD +0 -54
  38. {vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.3.dist-info}/WHEEL +0 -0
  39. {vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.3.dist-info}/licenses/LICENSE +0 -0
  40. {vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.3.dist-info}/top_level.txt +0 -0
@@ -7,12 +7,11 @@ for managing asynchronous agent interactions with proper synchronization.
7
7
 
8
8
  import asyncio
9
9
  import logging
10
- import uuid
11
10
  import json
12
11
  import traceback
12
+ import uuid
13
13
 
14
14
  from typing import Callable, Any, Dict, AsyncIterator
15
- from collections import OrderedDict
16
15
 
17
16
  from llama_index.core.agent.workflow import (
18
17
  ToolCall,
@@ -20,58 +19,28 @@ from llama_index.core.agent.workflow import (
20
19
  AgentInput,
21
20
  AgentOutput,
22
21
  )
23
- from ..types import AgentResponse
22
+ from ..types import AgentResponse, AgentStatusType
24
23
 
25
- class ToolEventTracker:
26
- """
27
- Tracks event IDs for tool calls to ensure consistent pairing of tool calls and outputs.
28
24
 
29
- This class maintains a mapping between tool identifiers and event IDs to ensure
30
- that related tool call and tool output events share the same event_id for proper
31
- frontend grouping.
25
+ def get_event_id(event) -> str:
32
26
  """
27
+ Get event ID from LlamaIndex event.
33
28
 
34
- def __init__(self):
35
- self.event_ids = OrderedDict() # tool_call_id -> event_id mapping
36
- self.fallback_counter = 0 # For events without identifiable tool_ids
37
-
38
- def get_event_id(self, event) -> str:
39
- """
40
- Get a consistent event ID for a tool event.
29
+ Args:
30
+ event: The event object from LlamaIndex
41
31
 
42
- Args:
43
- event: The tool event object
32
+ Returns:
33
+ str: Event ID from the event, or creates a new one if it does not exist
34
+ """
35
+ # Check for direct event_id first
36
+ if hasattr(event, "event_id") and event.event_id:
37
+ return event.event_id
44
38
 
45
- Returns:
46
- str: Consistent event ID for this tool execution
47
- """
48
- # Try to get tool_id from the event first
49
- tool_id = getattr(event, "tool_id", None)
50
-
51
- # If we have a tool_id, use it directly (any format from any LLM provider)
52
- if tool_id:
53
- pass # We already have tool_id, just use it
54
- # If no tool_id, try to derive one from tool_name (for LlamaIndex events)
55
- elif hasattr(event, "tool_name") and event.tool_name:
56
- tool_id = f"{event.tool_name}_{self.fallback_counter}"
57
- self.fallback_counter += 1
58
- # If still no tool_id, create a generic one based on event type
59
- else:
60
- event_type = type(event).__name__
61
- tool_id = f"{event_type.lower()}_{self.fallback_counter}"
62
- self.fallback_counter += 1
63
-
64
- # Get or create event_id for this tool_id
65
- if tool_id not in self.event_ids:
66
- self.event_ids[tool_id] = str(uuid.uuid4())
67
-
68
- return self.event_ids[tool_id]
69
-
70
- def clear_old_entries(self, max_entries: int = 100):
71
- """Clear old entries to prevent unbounded memory growth."""
72
- while len(self.event_ids) > max_entries // 2:
73
- self.event_ids.popitem(last=False) # Remove oldest entry
39
+ # Check for tool_id for tool-related events
40
+ if hasattr(event, "tool_id") and event.tool_id:
41
+ return event.tool_id
74
42
 
43
+ return str(uuid.uuid4())
75
44
 
76
45
  class StreamingResponseAdapter:
77
46
  """
@@ -284,7 +253,6 @@ class FunctionCallingStreamHandler:
284
253
  self.prompt = prompt
285
254
  self.final_response_container = {"resp": None}
286
255
  self.stream_complete_event = asyncio.Event()
287
- self.event_tracker = ToolEventTracker()
288
256
 
289
257
  async def process_stream_events(self) -> AsyncIterator[str]:
290
258
  """
@@ -299,16 +267,22 @@ class FunctionCallingStreamHandler:
299
267
  async for ev in self.handler.stream_events():
300
268
  # Store tool outputs for VHC regardless of progress callback
301
269
  if isinstance(ev, ToolCallResult):
302
- if hasattr(self.agent_instance, '_add_tool_output'):
270
+ if hasattr(self.agent_instance, "_add_tool_output"):
303
271
  # pylint: disable=W0212
304
- self.agent_instance._add_tool_output(ev.tool_name, str(ev.tool_output))
272
+ self.agent_instance._add_tool_output(
273
+ ev.tool_name, str(ev.tool_output)
274
+ )
305
275
 
306
276
  # Handle progress callbacks if available
307
277
  if self.agent_instance.agent_progress_callback:
308
278
  # Only track events that are actual tool-related events
309
279
  if self._is_tool_related_event(ev):
310
- event_id = self.event_tracker.get_event_id(ev)
311
- await self._handle_progress_callback(ev, event_id)
280
+ try:
281
+ event_id = get_event_id(ev)
282
+ await self._handle_progress_callback(ev, event_id)
283
+ except ValueError as e:
284
+ logging.warning(f"Skipping event due to missing ID: {e}")
285
+ continue
312
286
 
313
287
  # Process streaming text events
314
288
  if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
@@ -335,16 +309,25 @@ class FunctionCallingStreamHandler:
335
309
  try:
336
310
  self.final_response_container["resp"] = await self.handler
337
311
  except Exception as e:
338
- logging.error(f"🔍 [STREAM_ERROR] Error processing stream events: {e}")
339
- logging.error(f"🔍 [STREAM_ERROR] Full traceback: {traceback.format_exc()}")
340
- self.final_response_container["resp"] = AgentResponse(
341
- response="Response completion Error",
342
- source_nodes=[],
343
- metadata={}
344
- )
312
+ error_str = str(e).lower()
313
+ if "rate limit" in error_str or "429" in error_str:
314
+ logging.error(f"🔍 [RATE_LIMIT_ERROR] Rate limit exceeded: {e}")
315
+ self.final_response_container["resp"] = AgentResponse(
316
+ response="Rate limit exceeded. Please try again later.",
317
+ source_nodes=[],
318
+ metadata={"error_type": "rate_limit", "original_error": str(e)},
319
+ )
320
+ else:
321
+ logging.error(f"🔍 [STREAM_ERROR] Error processing stream events: {e}")
322
+ logging.error(
323
+ f"🔍 [STREAM_ERROR] Full traceback: {traceback.format_exc()}"
324
+ )
325
+ self.final_response_container["resp"] = AgentResponse(
326
+ response="Response completion Error",
327
+ source_nodes=[],
328
+ metadata={"error_type": "general", "original_error": str(e)},
329
+ )
345
330
  finally:
346
- # Clean up event tracker to prevent memory leaks
347
- self.event_tracker.clear_old_entries()
348
331
  # Signal that stream processing is complete
349
332
  self.stream_complete_event.set()
350
333
 
@@ -380,9 +363,6 @@ class FunctionCallingStreamHandler:
380
363
 
381
364
  async def _handle_progress_callback(self, event, event_id: str):
382
365
  """Handle progress callback events for different event types with proper context propagation."""
383
- # Import here to avoid circular imports
384
- from ..types import AgentStatusType
385
-
386
366
  try:
387
367
  if isinstance(event, ToolCall):
388
368
  # Check if callback is async or sync
@@ -477,3 +457,204 @@ class FunctionCallingStreamHandler:
477
457
  metadata={},
478
458
  post_process_task=post_process_task,
479
459
  )
460
+
461
+
462
+ class ReActStreamHandler:
463
+ """
464
+ Handles streaming for ReAct agents with proper event processing.
465
+
466
+ ReAct agents use a workflow-based approach and emit ToolCall/ToolCallResult events
467
+ that need to be captured and converted to progress callbacks.
468
+ """
469
+
470
+ def __init__(self, agent_instance, handler, prompt: str):
471
+ self.agent_instance = agent_instance
472
+ self.handler = handler
473
+ self.prompt = prompt
474
+ self.final_response_container = {"resp": None}
475
+ self.stream_complete_event = asyncio.Event()
476
+
477
+ async def process_stream_events(self) -> AsyncIterator[str]:
478
+ """
479
+ Process streaming events from ReAct workflow and yield text tokens.
480
+
481
+ Yields:
482
+ str: Text tokens from the streaming response
483
+ """
484
+ async for event in self.handler.stream_events():
485
+ # Store tool outputs for VHC regardless of progress callback
486
+ if isinstance(event, ToolCallResult):
487
+ if hasattr(self.agent_instance, "_add_tool_output"):
488
+ # pylint: disable=W0212
489
+ self.agent_instance._add_tool_output(
490
+ event.tool_name, str(event.tool_output)
491
+ )
492
+ # Handle progress callbacks if available - this is the key missing piece!
493
+ if self.agent_instance.agent_progress_callback:
494
+ # Only track events that are actual tool-related events
495
+ if self._is_tool_related_event(event):
496
+ try:
497
+ # Get event ID from LlamaIndex event
498
+ event_id = get_event_id(event)
499
+
500
+ # Handle different types of workflow events using same logic as achat method
501
+ if isinstance(event, ToolCall):
502
+ # Check if callback is async or sync
503
+ if asyncio.iscoroutinefunction(
504
+ self.agent_instance.agent_progress_callback
505
+ ):
506
+ await self.agent_instance.agent_progress_callback(
507
+ status_type=AgentStatusType.TOOL_CALL,
508
+ msg={
509
+ "tool_name": event.tool_name,
510
+ "arguments": json.dumps(event.tool_kwargs),
511
+ },
512
+ event_id=event_id,
513
+ )
514
+ else:
515
+ self.agent_instance.agent_progress_callback(
516
+ status_type=AgentStatusType.TOOL_CALL,
517
+ msg={
518
+ "tool_name": event.tool_name,
519
+ "arguments": json.dumps(event.tool_kwargs),
520
+ },
521
+ event_id=event_id,
522
+ )
523
+ elif isinstance(event, ToolCallResult):
524
+ # Check if callback is async or sync
525
+ if asyncio.iscoroutinefunction(
526
+ self.agent_instance.agent_progress_callback
527
+ ):
528
+ await self.agent_instance.agent_progress_callback(
529
+ status_type=AgentStatusType.TOOL_OUTPUT,
530
+ msg={
531
+ "tool_name": event.tool_name,
532
+ "content": str(event.tool_output),
533
+ },
534
+ event_id=event_id,
535
+ )
536
+ else:
537
+ self.agent_instance.agent_progress_callback(
538
+ status_type=AgentStatusType.TOOL_OUTPUT,
539
+ msg={
540
+ "tool_name": event.tool_name,
541
+ "content": str(event.tool_output),
542
+ },
543
+ event_id=event_id,
544
+ )
545
+ elif isinstance(event, AgentInput):
546
+ if asyncio.iscoroutinefunction(
547
+ self.agent_instance.agent_progress_callback
548
+ ):
549
+ await self.agent_instance.agent_progress_callback(
550
+ status_type=AgentStatusType.AGENT_UPDATE,
551
+ msg={"content": f"Agent input: {event.input}"},
552
+ event_id=event_id,
553
+ )
554
+ else:
555
+ self.agent_instance.agent_progress_callback(
556
+ status_type=AgentStatusType.AGENT_UPDATE,
557
+ msg={"content": f"Agent input: {event.input}"},
558
+ event_id=event_id,
559
+ )
560
+ elif isinstance(event, AgentOutput):
561
+ if asyncio.iscoroutinefunction(
562
+ self.agent_instance.agent_progress_callback
563
+ ):
564
+ await self.agent_instance.agent_progress_callback(
565
+ status_type=AgentStatusType.AGENT_UPDATE,
566
+ msg={"content": f"Agent output: {event.response}"},
567
+ event_id=event_id,
568
+ )
569
+ else:
570
+ self.agent_instance.agent_progress_callback(
571
+ status_type=AgentStatusType.AGENT_UPDATE,
572
+ msg={"content": f"Agent output: {event.response}"},
573
+ event_id=event_id,
574
+ )
575
+ except ValueError as e:
576
+ logging.warning(f"Skipping event due to missing ID: {e}")
577
+ continue
578
+ except Exception as e:
579
+ logging.error(f"Exception in ReAct progress callback: {e}")
580
+ logging.error(f"Traceback: {traceback.format_exc()}")
581
+ # Continue execution despite callback errors
582
+
583
+ # For ReAct agents, we typically don't have streaming text like function calling
584
+ # ReAct usually processes in steps and then provides complete responses
585
+ # So we just yield empty strings to maintain streaming interface
586
+ yield ""
587
+
588
+ # When stream is done, await the handler to get the final response
589
+ try:
590
+ self.final_response_container["resp"] = await self.handler
591
+ except Exception as e:
592
+ logging.error(
593
+ f"🔍 [REACT_STREAM_ERROR] Error processing ReAct stream events: {e}"
594
+ )
595
+ logging.error(
596
+ f"🔍 [REACT_STREAM_ERROR] Full traceback: {traceback.format_exc()}"
597
+ )
598
+ self.final_response_container["resp"] = AgentResponse(
599
+ response="ReAct Response completion Error", source_nodes=[], metadata={}
600
+ )
601
+ finally:
602
+ # Signal that stream processing is complete
603
+ self.stream_complete_event.set()
604
+
605
+ def _is_tool_related_event(self, event) -> bool:
606
+ """
607
+ Determine if an event is actually tool-related and should be tracked.
608
+
609
+ This should only return True for events that represent actual tool calls or tool outputs,
610
+ not for streaming text deltas or other LLM response events.
611
+
612
+ Args:
613
+ event: The stream event to check
614
+
615
+ Returns:
616
+ bool: True if this event should be tracked for tool purposes
617
+ """
618
+ # Track explicit tool events from LlamaIndex workflow
619
+ if isinstance(event, (ToolCall, ToolCallResult)):
620
+ return True
621
+
622
+ has_tool_id = hasattr(event, "tool_id") and event.tool_id
623
+ has_delta = hasattr(event, "delta") and event.delta
624
+ has_tool_name = hasattr(event, "tool_name") and event.tool_name
625
+
626
+ # We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
627
+ # but still avoid streaming deltas
628
+ if (has_tool_id or has_tool_name) and not has_delta:
629
+ return True
630
+
631
+ # Everything else (streaming deltas, agent outputs, workflow events, etc.)
632
+ # should NOT be tracked as tool events
633
+ return False
634
+
635
+ def create_streaming_response(
636
+ self, user_metadata: Dict[str, Any]
637
+ ) -> "StreamingResponseAdapter":
638
+ """
639
+ Create a StreamingResponseAdapter for ReAct agents with proper post-processing.
640
+
641
+ Args:
642
+ user_metadata: User metadata dictionary to update
643
+
644
+ Returns:
645
+ StreamingResponseAdapter: Configured streaming adapter
646
+ """
647
+ post_process_task = create_stream_post_processing_task(
648
+ self.stream_complete_event,
649
+ self.final_response_container,
650
+ self.prompt,
651
+ self.agent_instance,
652
+ user_metadata,
653
+ )
654
+
655
+ return StreamingResponseAdapter(
656
+ async_response_gen=self.process_stream_events,
657
+ response="", # will be filled post-stream
658
+ metadata={},
659
+ post_process_task=post_process_task,
660
+ )
@@ -78,8 +78,8 @@ def get_field_type(field_schema: dict) -> Any:
78
78
  # If only "items" is present (implies array by some conventions, but less standard)
79
79
  # Or if it's a schema with other keywords like 'properties' (implying object)
80
80
  # For simplicity, if no "type" or "anyOf" at this point, default to Any or add more specific handling.
81
- # If 'properties' in field_schema or 'additionalProperties' in field_schema, it's likely an object.
82
- if "properties" in field_schema or "additionalProperties" in field_schema:
81
+ # If 'properties' in field_schema, it's likely an object.
82
+ if "properties" in field_schema:
83
83
  # This path might need to reconstruct a nested Pydantic model if you encounter such schemas.
84
84
  # For now, treating as 'dict' or 'Any' might be a simpler placeholder.
85
85
  return dict # Or Any, or more sophisticated object reconstruction.
@@ -23,7 +23,7 @@ provider_to_default_model_name = {
23
23
  ModelProvider.GROQ: "openai/gpt-oss-20b",
24
24
  ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
25
25
  ModelProvider.COHERE: "command-a-03-2025",
26
- ModelProvider.GEMINI: "models/gemini-2.5-flash",
26
+ ModelProvider.GEMINI: "models/gemini-2.5-flash-lite",
27
27
  }
28
28
 
29
29
  DEFAULT_MODEL_PROVIDER = ModelProvider.OPENAI
@@ -87,6 +87,8 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
87
87
 
88
88
  Uses a cache based on configuration parameters to avoid repeated LLM instantiation.
89
89
  """
90
+ if config is None:
91
+ config = AgentConfig()
90
92
  # Check cache first
91
93
  cache_key = _create_llm_cache_key(role, config)
92
94
  if cache_key in _llm_cache:
@@ -112,7 +114,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
112
114
  strict=False,
113
115
  max_tokens=max_tokens,
114
116
  pydantic_program_mode="openai",
115
- additional_kwargs=additional_kwargs
117
+ additional_kwargs=additional_kwargs,
116
118
  )
117
119
  elif model_provider == ModelProvider.ANTHROPIC:
118
120
  llm = Anthropic(