PyPI - vectara-agentic - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

vectara-agentic 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vectara-agentic might be problematic. Click here for more details.

Files changed (43) hide show

tests/__init__.py +1 -0
tests/benchmark_models.py +547 -372
tests/conftest.py +14 -12
tests/endpoint.py +9 -5
tests/run_tests.py +1 -0
tests/test_agent.py +22 -9
tests/test_agent_fallback_memory.py +4 -4
tests/test_agent_memory_consistency.py +4 -4
tests/test_agent_type.py +2 -0
tests/test_api_endpoint.py +13 -13
tests/test_bedrock.py +9 -1
tests/test_fallback.py +18 -7
tests/test_gemini.py +14 -40
tests/test_groq.py +43 -1
tests/test_openai.py +160 -0
tests/test_private_llm.py +19 -6
tests/test_react_error_handling.py +293 -0
tests/test_react_memory.py +257 -0
tests/test_react_streaming.py +135 -0
tests/test_react_workflow_events.py +395 -0
tests/test_return_direct.py +1 -0
tests/test_serialization.py +58 -20
tests/test_session_memory.py +11 -11
tests/test_streaming.py +0 -44
tests/test_together.py +75 -1
tests/test_tools.py +3 -1
tests/test_vectara_llms.py +2 -2
tests/test_vhc.py +7 -2
tests/test_workflow.py +17 -11
vectara_agentic/_callback.py +79 -21
vectara_agentic/_version.py +1 -1
vectara_agentic/agent.py +65 -27
vectara_agentic/agent_core/serialization.py +5 -9
vectara_agentic/agent_core/streaming.py +245 -64
vectara_agentic/agent_core/utils/schemas.py +2 -2
vectara_agentic/llm_utils.py +64 -15
vectara_agentic/tools.py +88 -31
{vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.4.dist-info}/METADATA +133 -36
vectara_agentic-0.4.4.dist-info/RECORD +59 -0
vectara_agentic-0.4.2.dist-info/RECORD +0 -54
{vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.4.dist-info}/WHEEL +0 -0
{vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.4.dist-info}/licenses/LICENSE +0 -0
{vectara_agentic-0.4.2.dist-info → vectara_agentic-0.4.4.dist-info}/top_level.txt +0 -0

vectara_agentic/agent_core/streaming.py CHANGED Viewed

@@ -7,12 +7,11 @@ for managing asynchronous agent interactions with proper synchronization.
 import asyncio
 import logging
-import uuid
 import json
 import traceback
+import uuid
 from typing import Callable, Any, Dict, AsyncIterator
-from collections import OrderedDict
 from llama_index.core.agent.workflow import (
     ToolCall,
@@ -20,58 +19,28 @@ from llama_index.core.agent.workflow import (
     AgentInput,
     AgentOutput,
 )
-from ..types import AgentResponse
+from ..types import AgentResponse, AgentStatusType
-class ToolEventTracker:
-    """
-    Tracks event IDs for tool calls to ensure consistent pairing of tool calls and outputs.
-    This class maintains a mapping between tool identifiers and event IDs to ensure
-    that related tool call and tool output events share the same event_id for proper
-    frontend grouping.
+def get_event_id(event) -> str:
     """
+    Get event ID from LlamaIndex event.
-    def __init__(self):
-        self.event_ids = OrderedDict()  # tool_call_id -> event_id mapping
-        self.fallback_counter = 0  # For events without identifiable tool_ids
-    def get_event_id(self, event) -> str:
-        """
-        Get a consistent event ID for a tool event.
+    Args:
+        event: The event object from LlamaIndex
-        Args:
-            event: The tool event object
+    Returns:
+        str: Event ID from the event, or creates a new one if it does not exist
+    """
+    # Check for direct event_id first
+    if hasattr(event, "event_id") and event.event_id:
+        return event.event_id
-        Returns:
-            str: Consistent event ID for this tool execution
-        """
-        # Try to get tool_id from the event first
-        tool_id = getattr(event, "tool_id", None)
-        # If we have a tool_id, use it directly (any format from any LLM provider)
-        if tool_id:
-            pass  # We already have tool_id, just use it
-        # If no tool_id, try to derive one from tool_name (for LlamaIndex events)
-        elif hasattr(event, "tool_name") and event.tool_name:
-            tool_id = f"{event.tool_name}_{self.fallback_counter}"
-            self.fallback_counter += 1
-        # If still no tool_id, create a generic one based on event type
-        else:
-            event_type = type(event).__name__
-            tool_id = f"{event_type.lower()}_{self.fallback_counter}"
-            self.fallback_counter += 1
-        # Get or create event_id for this tool_id
-        if tool_id not in self.event_ids:
-            self.event_ids[tool_id] = str(uuid.uuid4())
-        return self.event_ids[tool_id]
-    def clear_old_entries(self, max_entries: int = 100):
-        """Clear old entries to prevent unbounded memory growth."""
-        while len(self.event_ids) > max_entries // 2:
-            self.event_ids.popitem(last=False)  # Remove oldest entry
+    # Check for tool_id for tool-related events
+    if hasattr(event, "tool_id") and event.tool_id:
+        return event.tool_id
+    return str(uuid.uuid4())
 class StreamingResponseAdapter:
     """
@@ -284,7 +253,6 @@ class FunctionCallingStreamHandler:
         self.prompt = prompt
         self.final_response_container = {"resp": None}
         self.stream_complete_event = asyncio.Event()
-        self.event_tracker = ToolEventTracker()
     async def process_stream_events(self) -> AsyncIterator[str]:
         """
@@ -299,16 +267,22 @@ class FunctionCallingStreamHandler:
         async for ev in self.handler.stream_events():
             # Store tool outputs for VHC regardless of progress callback
             if isinstance(ev, ToolCallResult):
-                if hasattr(self.agent_instance, '_add_tool_output'):
+                if hasattr(self.agent_instance, "_add_tool_output"):
                     # pylint: disable=W0212
-                    self.agent_instance._add_tool_output(ev.tool_name, str(ev.tool_output))
+                    self.agent_instance._add_tool_output(
+                        ev.tool_name, str(ev.tool_output)
+                    )
             # Handle progress callbacks if available
             if self.agent_instance.agent_progress_callback:
                 # Only track events that are actual tool-related events
                 if self._is_tool_related_event(ev):
-                    event_id = self.event_tracker.get_event_id(ev)
-                    await self._handle_progress_callback(ev, event_id)
+                    try:
+                        event_id = get_event_id(ev)
+                        await self._handle_progress_callback(ev, event_id)
+                    except ValueError as e:
+                        logging.warning(f"Skipping event due to missing ID: {e}")
+                        continue
             # Process streaming text events
             if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
@@ -335,16 +309,25 @@ class FunctionCallingStreamHandler:
         try:
             self.final_response_container["resp"] = await self.handler
         except Exception as e:
-            logging.error(f"🔍 [STREAM_ERROR] Error processing stream events: {e}")
-            logging.error(f"🔍 [STREAM_ERROR] Full traceback: {traceback.format_exc()}")
-            self.final_response_container["resp"] = AgentResponse(
-                response="Response completion Error",
-                source_nodes=[],
-                metadata={}
-            )
+            error_str = str(e).lower()
+            if "rate limit" in error_str or "429" in error_str:
+                logging.error(f"🔍 [RATE_LIMIT_ERROR] Rate limit exceeded: {e}")
+                self.final_response_container["resp"] = AgentResponse(
+                    response="Rate limit exceeded. Please try again later.",
+                    source_nodes=[],
+                    metadata={"error_type": "rate_limit", "original_error": str(e)},
+                )
+            else:
+                logging.error(f"🔍 [STREAM_ERROR] Error processing stream events: {e}")
+                logging.error(
+                    f"🔍 [STREAM_ERROR] Full traceback: {traceback.format_exc()}"
+                )
+                self.final_response_container["resp"] = AgentResponse(
+                    response="Response completion Error",
+                    source_nodes=[],
+                    metadata={"error_type": "general", "original_error": str(e)},
+                )
         finally:
-            # Clean up event tracker to prevent memory leaks
-            self.event_tracker.clear_old_entries()
             # Signal that stream processing is complete
             self.stream_complete_event.set()
@@ -380,9 +363,6 @@ class FunctionCallingStreamHandler:
     async def _handle_progress_callback(self, event, event_id: str):
         """Handle progress callback events for different event types with proper context propagation."""
-        # Import here to avoid circular imports
-        from ..types import AgentStatusType
         try:
             if isinstance(event, ToolCall):
                 # Check if callback is async or sync
@@ -477,3 +457,204 @@ class FunctionCallingStreamHandler:
             metadata={},
             post_process_task=post_process_task,
         )
+class ReActStreamHandler:
+    """
+    Handles streaming for ReAct agents with proper event processing.
+    ReAct agents use a workflow-based approach and emit ToolCall/ToolCallResult events
+    that need to be captured and converted to progress callbacks.
+    """
+    def __init__(self, agent_instance, handler, prompt: str):
+        self.agent_instance = agent_instance
+        self.handler = handler
+        self.prompt = prompt
+        self.final_response_container = {"resp": None}
+        self.stream_complete_event = asyncio.Event()
+    async def process_stream_events(self) -> AsyncIterator[str]:
+        """
+        Process streaming events from ReAct workflow and yield text tokens.
+        Yields:
+            str: Text tokens from the streaming response
+        """
+        async for event in self.handler.stream_events():
+            # Store tool outputs for VHC regardless of progress callback
+            if isinstance(event, ToolCallResult):
+                if hasattr(self.agent_instance, "_add_tool_output"):
+                    # pylint: disable=W0212
+                    self.agent_instance._add_tool_output(
+                        event.tool_name, str(event.tool_output)
+                    )
+            # Handle progress callbacks if available - this is the key missing piece!
+            if self.agent_instance.agent_progress_callback:
+                # Only track events that are actual tool-related events
+                if self._is_tool_related_event(event):
+                    try:
+                        # Get event ID from LlamaIndex event
+                        event_id = get_event_id(event)
+                        # Handle different types of workflow events using same logic as achat method
+                        if isinstance(event, ToolCall):
+                            # Check if callback is async or sync
+                            if asyncio.iscoroutinefunction(
+                                self.agent_instance.agent_progress_callback
+                            ):
+                                await self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_CALL,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "arguments": json.dumps(event.tool_kwargs),
+                                    },
+                                    event_id=event_id,
+                                )
+                            else:
+                                self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_CALL,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "arguments": json.dumps(event.tool_kwargs),
+                                    },
+                                    event_id=event_id,
+                                )
+                        elif isinstance(event, ToolCallResult):
+                            # Check if callback is async or sync
+                            if asyncio.iscoroutinefunction(
+                                self.agent_instance.agent_progress_callback
+                            ):
+                                await self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_OUTPUT,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "content": str(event.tool_output),
+                                    },
+                                    event_id=event_id,
+                                )
+                            else:
+                                self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.TOOL_OUTPUT,
+                                    msg={
+                                        "tool_name": event.tool_name,
+                                        "content": str(event.tool_output),
+                                    },
+                                    event_id=event_id,
+                                )
+                        elif isinstance(event, AgentInput):
+                            if asyncio.iscoroutinefunction(
+                                self.agent_instance.agent_progress_callback
+                            ):
+                                await self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent input: {event.input}"},
+                                    event_id=event_id,
+                                )
+                            else:
+                                self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent input: {event.input}"},
+                                    event_id=event_id,
+                                )
+                        elif isinstance(event, AgentOutput):
+                            if asyncio.iscoroutinefunction(
+                                self.agent_instance.agent_progress_callback
+                            ):
+                                await self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent output: {event.response}"},
+                                    event_id=event_id,
+                                )
+                            else:
+                                self.agent_instance.agent_progress_callback(
+                                    status_type=AgentStatusType.AGENT_UPDATE,
+                                    msg={"content": f"Agent output: {event.response}"},
+                                    event_id=event_id,
+                                )
+                    except ValueError as e:
+                        logging.warning(f"Skipping event due to missing ID: {e}")
+                        continue
+                    except Exception as e:
+                        logging.error(f"Exception in ReAct progress callback: {e}")
+                        logging.error(f"Traceback: {traceback.format_exc()}")
+                        # Continue execution despite callback errors
+            # For ReAct agents, we typically don't have streaming text like function calling
+            # ReAct usually processes in steps and then provides complete responses
+            # So we just yield empty strings to maintain streaming interface
+            yield ""
+        # When stream is done, await the handler to get the final response
+        try:
+            self.final_response_container["resp"] = await self.handler
+        except Exception as e:
+            logging.error(
+                f"🔍 [REACT_STREAM_ERROR] Error processing ReAct stream events: {e}"
+            )
+            logging.error(
+                f"🔍 [REACT_STREAM_ERROR] Full traceback: {traceback.format_exc()}"
+            )
+            self.final_response_container["resp"] = AgentResponse(
+                response="ReAct Response completion Error", source_nodes=[], metadata={}
+            )
+        finally:
+            # Signal that stream processing is complete
+            self.stream_complete_event.set()
+    def _is_tool_related_event(self, event) -> bool:
+        """
+        Determine if an event is actually tool-related and should be tracked.
+        This should only return True for events that represent actual tool calls or tool outputs,
+        not for streaming text deltas or other LLM response events.
+        Args:
+            event: The stream event to check
+        Returns:
+            bool: True if this event should be tracked for tool purposes
+        """
+        # Track explicit tool events from LlamaIndex workflow
+        if isinstance(event, (ToolCall, ToolCallResult)):
+            return True
+        has_tool_id = hasattr(event, "tool_id") and event.tool_id
+        has_delta = hasattr(event, "delta") and event.delta
+        has_tool_name = hasattr(event, "tool_name") and event.tool_name
+        # We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
+        # but still avoid streaming deltas
+        if (has_tool_id or has_tool_name) and not has_delta:
+            return True
+        # Everything else (streaming deltas, agent outputs, workflow events, etc.)
+        # should NOT be tracked as tool events
+        return False
+    def create_streaming_response(
+        self, user_metadata: Dict[str, Any]
+    ) -> "StreamingResponseAdapter":
+        """
+        Create a StreamingResponseAdapter for ReAct agents with proper post-processing.
+        Args:
+            user_metadata: User metadata dictionary to update
+        Returns:
+            StreamingResponseAdapter: Configured streaming adapter
+        """
+        post_process_task = create_stream_post_processing_task(
+            self.stream_complete_event,
+            self.final_response_container,
+            self.prompt,
+            self.agent_instance,
+            user_metadata,
+        )
+        return StreamingResponseAdapter(
+            async_response_gen=self.process_stream_events,
+            response="",  # will be filled post-stream
+            metadata={},
+            post_process_task=post_process_task,
+        )

vectara_agentic/agent_core/utils/schemas.py CHANGED Viewed

@@ -78,8 +78,8 @@ def get_field_type(field_schema: dict) -> Any:
     # If only "items" is present (implies array by some conventions, but less standard)
     # Or if it's a schema with other keywords like 'properties' (implying object)
     # For simplicity, if no "type" or "anyOf" at this point, default to Any or add more specific handling.
-    # If 'properties' in field_schema or 'additionalProperties' in field_schema, it's likely an object.
-    if "properties" in field_schema or "additionalProperties" in field_schema:
+    # If 'properties' in field_schema, it's likely an object.
+    if "properties" in field_schema:
         # This path might need to reconstruct a nested Pydantic model if you encounter such schemas.
         # For now, treating as 'dict' or 'Any' might be a simpler placeholder.
         return dict  # Or Any, or more sophisticated object reconstruction.

vectara_agentic/llm_utils.py CHANGED Viewed

@@ -18,7 +18,7 @@ from .agent_config import AgentConfig
 provider_to_default_model_name = {
     ModelProvider.OPENAI: "gpt-4.1-mini",
-    ModelProvider.ANTHROPIC: "claude-sonnet-4-20250514",
+    ModelProvider.ANTHROPIC: "claude-sonnet-4-0",
     ModelProvider.TOGETHER: "deepseek-ai/DeepSeek-V3",
     ModelProvider.GROQ: "openai/gpt-oss-20b",
     ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -26,6 +26,41 @@ provider_to_default_model_name = {
     ModelProvider.GEMINI: "models/gemini-2.5-flash",
 }
+models_to_max_tokens = {
+    "gpt-5": 128000,
+    "gpt-4.1": 32768,
+    "gpt-4o": 16384,
+    "gpt-4.1-mini": 32768,
+    "claude-sonnet-4": 65536,
+    "deepseek-ai/deepseek-v3": 8192,
+    "models/gemini-2.5-flash": 65536,
+    "models/gemini-2.5-flash-lite": 65536,
+    "models/gemini-2.5-pro": 65536,
+    "openai/gpt-oss-20b": 65536,
+    "openai/gpt-oss-120b": 65536,
+    "us.anthropic.claude-sonnet-4-20250514-v1:0": 65536,
+    "command-a-03-2025": 8192,
+}
+def get_max_tokens(model_name: str, model_provider: str) -> int:
+    """Get the maximum token limit for a given model name and provider."""
+    if model_provider in [
+        ModelProvider.GEMINI,
+        ModelProvider.TOGETHER,
+        ModelProvider.OPENAI,
+        ModelProvider.ANTHROPIC,
+        ModelProvider.GROQ,
+        ModelProvider.BEDROCK,
+        ModelProvider.COHERE,
+    ]:
+        # Try exact match first (case-insensitive)
+        max_tokens = models_to_max_tokens.get(model_name, 16384)
+    else:
+        max_tokens = 8192
+    return max_tokens
 DEFAULT_MODEL_PROVIDER = ModelProvider.OPENAI
 # Manual cache for LLM instances to handle mutable AgentConfig objects
@@ -87,24 +122,18 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
     Uses a cache based on configuration parameters to avoid repeated LLM instantiation.
     """
+    if config is None:
+        config = AgentConfig()
     # Check cache first
     cache_key = _create_llm_cache_key(role, config)
     if cache_key in _llm_cache:
         return _llm_cache[cache_key]
     model_provider, model_name = _get_llm_params_for_role(role, config)
-    max_tokens = (
-        16384
-        if model_provider
-        in [
-            ModelProvider.GEMINI,
-            ModelProvider.TOGETHER,
-            ModelProvider.OPENAI,
-            ModelProvider.ANTHROPIC,
-        ]
-        else 8192
-    )
+    max_tokens = get_max_tokens(model_name, model_provider)
     if model_provider == ModelProvider.OPENAI:
-        additional_kwargs = {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
+        additional_kwargs = (
+            {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
+        )
         llm = OpenAI(
             model=model_name,
             temperature=0,
@@ -112,7 +141,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             strict=False,
             max_tokens=max_tokens,
             pydantic_program_mode="openai",
-            additional_kwargs=additional_kwargs
+            additional_kwargs=additional_kwargs,
         )
     elif model_provider == ModelProvider.ANTHROPIC:
         llm = Anthropic(
@@ -127,11 +156,20 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "google_genai not available. Install with: pip install llama-index-llms-google-genai"
             ) from e
+        import google.genai.types as google_types
+        generation_config = google_types.GenerateContentConfig(
+            temperature=0.0,
+            seed=123,
+            max_output_tokens=max_tokens,
+            thinking_config=google_types.ThinkingConfig(thinking_budget=0, include_thoughts=False),
+        )
         llm = GoogleGenAI(
             model=model_name,
             temperature=0,
             is_function_calling_model=True,
             max_tokens=max_tokens,
+            generation_config=generation_config,
+            context_window=1_000_000,
         )
     elif model_provider == ModelProvider.TOGETHER:
         try:
@@ -140,11 +178,18 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "together not available. Install with: pip install llama-index-llms-together"
             ) from e
+        additional_kwargs = {"seed": 42}
+        if model_name in [
+            "deepseek-ai/DeepSeek-V3.1", "openai/gpt-oss-120b",
+            "deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
+        ]:
+            additional_kwargs['reasoning_effort'] = "low"
         llm = TogetherLLM(
             model=model_name,
             temperature=0,
             is_function_calling_model=True,
             max_tokens=max_tokens,
+            additional_kwargs=additional_kwargs,
         )
     elif model_provider == ModelProvider.GROQ:
         try:
@@ -191,7 +236,11 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "openai_like not available. Install with: pip install llama-index-llms-openai-like"
             ) from e
-        if not config or not config.private_llm_api_base or not config.private_llm_api_key:
+        if (
+            not config
+            or not config.private_llm_api_base
+            or not config.private_llm_api_key
+        ):
             raise ValueError(
                 "Private LLM requires both private_llm_api_base and private_llm_api_key to be set in AgentConfig."
             )

vectara-agentic 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

vectara-agentic 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl