PyPI - vectara-agentic - Versions diffs - 0.4.3__tar.gz → 0.4.4__tar.gz - Mend

vectara-agentic 0.4.3tar.gz → 0.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{vectara_agentic-0.4.3/vectara_agentic.egg-info → vectara_agentic-0.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vectara_agentic
-Version: 0.4.3
+Version: 0.4.4
 Summary: A Python package for creating AI Assistants and AI Agents with Vectara
 Home-page: https://github.com/vectara/py-vectara-agentic
 Author: Ofer Mendelevitch
@@ -16,19 +16,20 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: llama-index==0.13.2
-Requires-Dist: llama-index-core==0.13.2
+Requires-Dist: llama-index==0.13.3
+Requires-Dist: llama-index-core==0.13.3
 Requires-Dist: llama-index-workflows==1.3.0
 Requires-Dist: llama-index-cli==0.5.0
 Requires-Dist: llama-index-indices-managed-vectara==0.5.0
-Requires-Dist: llama-index-llms-openai==0.5.2
+Requires-Dist: llama-index-llms-openai==0.5.4
 Requires-Dist: llama-index-llms-openai-like==0.5.0
-Requires-Dist: llama-index-llms-anthropic==0.8.2
+Requires-Dist: llama-index-llms-anthropic==0.8.5
 Requires-Dist: llama-index-llms-together==0.4.0
 Requires-Dist: llama-index-llms-groq==0.4.0
 Requires-Dist: llama-index-llms-cohere==0.6.0
 Requires-Dist: llama-index-llms-google-genai==0.3.0
-Requires-Dist: llama-index-llms-bedrock-converse==0.8.0
+Requires-Dist: google_genai>=1.31.0
+Requires-Dist: llama-index-llms-bedrock-converse==0.8.2
 Requires-Dist: llama-index-tools-yahoo-finance==0.4.0
 Requires-Dist: llama-index-tools-arxiv==0.4.0
 Requires-Dist: llama-index-tools-database==0.4.0
@@ -54,7 +55,7 @@ Requires-Dist: protobuf==5.29.5
 Requires-Dist: tokenizers>=0.20
 Requires-Dist: pydantic>=2.11.5
 Requires-Dist: pandas==2.2.3
-Requires-Dist: retrying==1.3.4
+Requires-Dist: retrying==1.4.2
 Requires-Dist: python-dotenv==1.0.1
 Requires-Dist: cloudpickle>=3.1.1
 Requires-Dist: httpx==0.28.1
@@ -869,7 +870,7 @@ agent_config = AgentConfig(
     main_llm_provider = ModelProvider.ANTHROPIC,
     main_llm_model_name = 'claude-3-5-sonnet-20241022',
     tool_llm_provider = ModelProvider.TOGETHER,
-    tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
+    tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
 )
 agent = Agent(

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/README.md RENAMED Viewed

@@ -795,7 +795,7 @@ agent_config = AgentConfig(
     main_llm_provider = ModelProvider.ANTHROPIC,
     main_llm_model_name = 'claude-3-5-sonnet-20241022',
     tool_llm_provider = ModelProvider.TOGETHER,
-    tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
+    tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
 )
 agent = Agent(

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/requirements.txt RENAMED Viewed

@@ -1,16 +1,17 @@
-llama-index==0.13.2
-llama-index-core==0.13.2
+llama-index==0.13.3
+llama-index-core==0.13.3
 llama-index-workflows==1.3.0
 llama-index-cli==0.5.0
 llama-index-indices-managed-vectara==0.5.0
-llama-index-llms-openai==0.5.2
+llama-index-llms-openai==0.5.4
 llama-index-llms-openai-like==0.5.0
-llama-index-llms-anthropic==0.8.2
+llama-index-llms-anthropic==0.8.5
 llama-index-llms-together==0.4.0
 llama-index-llms-groq==0.4.0
 llama-index-llms-cohere==0.6.0
 llama-index-llms-google-genai==0.3.0
-llama-index-llms-bedrock-converse==0.8.0
+google_genai>=1.31.0
+llama-index-llms-bedrock-converse==0.8.2
 llama-index-tools-yahoo-finance==0.4.0
 llama-index-tools-arxiv==0.4.0
 llama-index-tools-database==0.4.0
@@ -36,7 +37,7 @@ protobuf==5.29.5
 tokenizers>=0.20
 pydantic>=2.11.5
 pandas==2.2.3
-retrying==1.3.4
+retrying==1.4.2
 python-dotenv==1.0.1
 cloudpickle>=3.1.1
 httpx==0.28.1

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/tests/test_groq.py RENAMED Viewed

@@ -8,6 +8,8 @@ import threading
 from vectara_agentic.agent import Agent
 from vectara_agentic.tools import ToolsFactory
+from vectara_agentic.agent_config import AgentConfig
+from vectara_agentic.types import AgentType, ModelProvider
 import nest_asyncio
@@ -64,6 +66,38 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
             self.assertEqual(response3.response, "1050")
+    async def test_gpt_oss_120b(self):
+        """Test GPT-OSS-120B model with GROQ provider."""
+        with ARIZE_LOCK:
+            # Create config specifically for GPT-OSS-120B via GROQ
+            gpt_oss_config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.GROQ,
+                main_llm_model_name="openai/gpt-oss-120b",
+                tool_llm_provider=ModelProvider.GROQ,
+                tool_llm_model_name="openai/gpt-oss-120b",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=gpt_oss_config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 8 * 6 = 48
+            stream = await agent.astream_chat(
+                "What is 8 times 6? Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            # Verify the response contains the correct answer
+            self.assertIn("48", response.response)
 if __name__ == "__main__":
     unittest.main()

vectara_agentic-0.4.4/tests/test_openai.py ADDED Viewed

@@ -0,0 +1,160 @@
+# Suppress external dependency warnings before any other imports
+import warnings
+warnings.simplefilter("ignore", DeprecationWarning)
+import unittest
+import threading
+from vectara_agentic.agent import Agent
+from vectara_agentic.tools import ToolsFactory
+from vectara_agentic.agent_config import AgentConfig
+from vectara_agentic.types import AgentType, ModelProvider
+import nest_asyncio
+nest_asyncio.apply()
+from conftest import (
+    fc_config_openai,
+    mult,
+    STANDARD_TEST_TOPIC,
+    STANDARD_TEST_INSTRUCTIONS,
+)
+ARIZE_LOCK = threading.Lock()
+class TestOpenAI(unittest.IsolatedAsyncioTestCase):
+    async def test_multiturn(self):
+        """Test multi-turn conversation with default OpenAI model."""
+        with ARIZE_LOCK:
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=fc_config_openai,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # First calculation: 5 * 10 = 50
+            stream1 = await agent.astream_chat(
+                "What is 5 times 10. Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream1.async_response_gen():
+                pass
+            _ = await stream1.aget_response()
+            # Second calculation: 3 * 7 = 21
+            stream2 = await agent.astream_chat(
+                "what is 3 times 7. Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream2.async_response_gen():
+                pass
+            _ = await stream2.aget_response()
+            # Final calculation: 50 * 21 = 1050
+            stream3 = await agent.astream_chat(
+                "multiply the results of the last two questions. Output only the answer."
+            )
+            # Consume the stream
+            async for chunk in stream3.async_response_gen():
+                pass
+            response3 = await stream3.aget_response()
+            self.assertEqual(response3.response, "1050")
+    async def test_gpt_4o(self):
+        """Test GPT-4o model with OpenAI provider."""
+        with ARIZE_LOCK:
+            config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.OPENAI,
+                main_llm_model_name="gpt-4o",
+                tool_llm_provider=ModelProvider.OPENAI,
+                tool_llm_model_name="gpt-4o",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 4 * 3 = 12
+            stream = await agent.astream_chat(
+                "What is 4 times 3? Only give the answer, nothing else"
+            )
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            self.assertIn("12", response.response)
+    async def test_gpt_4_1(self):
+        """Test GPT-4.1 model with OpenAI provider."""
+        with ARIZE_LOCK:
+            config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.OPENAI,
+                main_llm_model_name="gpt-4.1",
+                tool_llm_provider=ModelProvider.OPENAI,
+                tool_llm_model_name="gpt-4.1",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 6 * 2 = 12
+            stream = await agent.astream_chat(
+                "What is 6 times 2? Only give the answer, nothing else"
+            )
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            self.assertIn("12", response.response)
+    async def test_gpt_5_minimal_reasoning(self):
+        """Test GPT-5 model with minimal reasoning effort."""
+        with ARIZE_LOCK:
+            config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.OPENAI,
+                main_llm_model_name="gpt-5",
+                tool_llm_provider=ModelProvider.OPENAI,
+                tool_llm_model_name="gpt-5",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 5 * 5 = 25
+            stream = await agent.astream_chat(
+                "What is 5 times 5? Only give the answer, nothing else"
+            )
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            self.assertIn("25", response.response)
+if __name__ == "__main__":
+    unittest.main()

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/tests/test_streaming.py RENAMED Viewed

@@ -4,7 +4,6 @@ import warnings
 warnings.simplefilter("ignore", DeprecationWarning)
 import unittest
-import asyncio
 from vectara_agentic.agent import Agent
 from vectara_agentic.tools import ToolsFactory
@@ -14,7 +13,6 @@ import nest_asyncio
 nest_asyncio.apply()
 from conftest import (
-    fc_config_openai,
     fc_config_anthropic,
     mult,
     STANDARD_TEST_TOPIC,
@@ -62,48 +60,6 @@ class TestAgentStreaming(unittest.IsolatedAsyncioTestCase):
         self.assertIn("1050", response3.response)
-    async def test_openai(self):
-        tools = [ToolsFactory().create_tool(mult)]
-        agent = Agent(
-            agent_config=fc_config_openai,
-            tools=tools,
-            topic=STANDARD_TEST_TOPIC,
-            custom_instructions=STANDARD_TEST_INSTRUCTIONS,
-        )
-        # First calculation: 5 * 10 = 50
-        stream1 = await agent.astream_chat(
-            "What is 5 times 10. Only give the answer, nothing else"
-        )
-        # Consume the stream
-        async for chunk in stream1.async_response_gen():
-            pass
-        _ = await stream1.aget_response()
-        # Second calculation: 3 * 7 = 21
-        stream2 = await agent.astream_chat(
-            "what is 3 times 7. Only give the answer, nothing else"
-        )
-        # Consume the stream
-        async for chunk in stream2.async_response_gen():
-            pass
-        _ = await stream2.aget_response()
-        # Final calculation: 50 * 21 = 1050
-        stream3 = await agent.astream_chat(
-            "multiply the results of the last two multiplications. Only give the answer, nothing else."
-        )
-        # Consume the stream
-        async for chunk in stream3.async_response_gen():
-            pass
-        response3 = await stream3.aget_response()
-        self.assertIn("1050", response3.response)
-    def test_openai_sync(self):
-        """Synchronous wrapper for the async test"""
-        asyncio.run(self.test_openai())
 if __name__ == "__main__":
     unittest.main()

vectara_agentic-0.4.4/tests/test_together.py ADDED Viewed

@@ -0,0 +1,136 @@
+# Suppress external dependency warnings before any other imports
+import warnings
+warnings.simplefilter("ignore", DeprecationWarning)
+import unittest
+import threading
+from vectara_agentic.agent import Agent
+from vectara_agentic.tools import ToolsFactory
+import nest_asyncio
+nest_asyncio.apply()
+from conftest import (
+    fc_config_together,
+    mult,
+    STANDARD_TEST_TOPIC,
+    STANDARD_TEST_INSTRUCTIONS,
+)
+from vectara_agentic.agent_config import AgentConfig
+from vectara_agentic.types import AgentType, ModelProvider
+ARIZE_LOCK = threading.Lock()
+class TestTogether(unittest.IsolatedAsyncioTestCase):
+    async def test_multiturn(self):
+        with ARIZE_LOCK:
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=fc_config_together,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # First calculation: 5 * 10 = 50
+            stream1 = await agent.astream_chat(
+                "What is 5 times 10. Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream1.async_response_gen():
+                pass
+            _ = await stream1.aget_response()
+            # Second calculation: 3 * 7 = 21
+            stream2 = await agent.astream_chat(
+                "what is 3 times 7. Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream2.async_response_gen():
+                pass
+            _ = await stream2.aget_response()
+            # Final calculation: 50 * 21 = 1050
+            stream3 = await agent.astream_chat(
+                "multiply the results of the last two questions. Output only the answer."
+            )
+            # Consume the stream
+            async for chunk in stream3.async_response_gen():
+                pass
+            response3 = await stream3.aget_response()
+            self.assertEqual(response3.response, "1050")
+    async def test_qwen3_coder(self):
+        """Test Qwen3-Coder-480B-A35B-Instruct-FP8 model with Together AI provider."""
+        with ARIZE_LOCK:
+            # Create config specifically for Qwen3-Coder
+            qwen_config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.TOGETHER,
+                main_llm_model_name="Qwen/Qwen3-235B-A22B-fp8-tput",
+                tool_llm_provider=ModelProvider.TOGETHER,
+                tool_llm_model_name="Qwen/Qwen3-235B-A22B-fp8-tput",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=qwen_config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 7 * 9 = 63
+            stream = await agent.astream_chat(
+                "What is 7 times 9? Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            # Verify the response contains the correct answer
+            self.assertIn("63", response.response)
+    async def test_llama4_scout(self):
+        """Test Llama-4-Scout-17B-16E-Instruct model with Together AI provider."""
+        with ARIZE_LOCK:
+            # Create config specifically for Llama 4 Scout
+            llama4_config = AgentConfig(
+                agent_type=AgentType.FUNCTION_CALLING,
+                main_llm_provider=ModelProvider.TOGETHER,
+                main_llm_model_name="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+                tool_llm_provider=ModelProvider.TOGETHER,
+                tool_llm_model_name="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            )
+            tools = [ToolsFactory().create_tool(mult)]
+            agent = Agent(
+                agent_config=llama4_config,
+                tools=tools,
+                topic=STANDARD_TEST_TOPIC,
+                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
+            )
+            # Test simple multiplication: 8 * 6 = 48
+            stream = await agent.astream_chat(
+                "What is 8 times 6? Only give the answer, nothing else"
+            )
+            # Consume the stream
+            async for chunk in stream.async_response_gen():
+                pass
+            response = await stream.aget_response()
+            # Verify the response contains the correct answer
+            self.assertIn("48", response.response)
+if __name__ == "__main__":
+    unittest.main()

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/vectara_agentic/_version.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """
 Define the version of the package.
 """
-__version__ = "0.4.3"
+__version__ = "0.4.4"

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/vectara_agentic/llm_utils.py RENAMED Viewed

@@ -18,14 +18,49 @@ from .agent_config import AgentConfig
 provider_to_default_model_name = {
     ModelProvider.OPENAI: "gpt-4.1-mini",
-    ModelProvider.ANTHROPIC: "claude-sonnet-4-20250514",
+    ModelProvider.ANTHROPIC: "claude-sonnet-4-0",
     ModelProvider.TOGETHER: "deepseek-ai/DeepSeek-V3",
     ModelProvider.GROQ: "openai/gpt-oss-20b",
     ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
     ModelProvider.COHERE: "command-a-03-2025",
-    ModelProvider.GEMINI: "models/gemini-2.5-flash-lite",
+    ModelProvider.GEMINI: "models/gemini-2.5-flash",
 }
+models_to_max_tokens = {
+    "gpt-5": 128000,
+    "gpt-4.1": 32768,
+    "gpt-4o": 16384,
+    "gpt-4.1-mini": 32768,
+    "claude-sonnet-4": 65536,
+    "deepseek-ai/deepseek-v3": 8192,
+    "models/gemini-2.5-flash": 65536,
+    "models/gemini-2.5-flash-lite": 65536,
+    "models/gemini-2.5-pro": 65536,
+    "openai/gpt-oss-20b": 65536,
+    "openai/gpt-oss-120b": 65536,
+    "us.anthropic.claude-sonnet-4-20250514-v1:0": 65536,
+    "command-a-03-2025": 8192,
+}
+def get_max_tokens(model_name: str, model_provider: str) -> int:
+    """Get the maximum token limit for a given model name and provider."""
+    if model_provider in [
+        ModelProvider.GEMINI,
+        ModelProvider.TOGETHER,
+        ModelProvider.OPENAI,
+        ModelProvider.ANTHROPIC,
+        ModelProvider.GROQ,
+        ModelProvider.BEDROCK,
+        ModelProvider.COHERE,
+    ]:
+        # Try exact match first (case-insensitive)
+        max_tokens = models_to_max_tokens.get(model_name, 16384)
+    else:
+        max_tokens = 8192
+    return max_tokens
 DEFAULT_MODEL_PROVIDER = ModelProvider.OPENAI
 # Manual cache for LLM instances to handle mutable AgentConfig objects
@@ -94,19 +129,11 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
     if cache_key in _llm_cache:
         return _llm_cache[cache_key]
     model_provider, model_name = _get_llm_params_for_role(role, config)
-    max_tokens = (
-        16384
-        if model_provider
-        in [
-            ModelProvider.GEMINI,
-            ModelProvider.TOGETHER,
-            ModelProvider.OPENAI,
-            ModelProvider.ANTHROPIC,
-        ]
-        else 8192
-    )
+    max_tokens = get_max_tokens(model_name, model_provider)
     if model_provider == ModelProvider.OPENAI:
-        additional_kwargs = {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
+        additional_kwargs = (
+            {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
+        )
         llm = OpenAI(
             model=model_name,
             temperature=0,
@@ -129,11 +156,20 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "google_genai not available. Install with: pip install llama-index-llms-google-genai"
             ) from e
+        import google.genai.types as google_types
+        generation_config = google_types.GenerateContentConfig(
+            temperature=0.0,
+            seed=123,
+            max_output_tokens=max_tokens,
+            thinking_config=google_types.ThinkingConfig(thinking_budget=0, include_thoughts=False),
+        )
         llm = GoogleGenAI(
             model=model_name,
             temperature=0,
             is_function_calling_model=True,
             max_tokens=max_tokens,
+            generation_config=generation_config,
+            context_window=1_000_000,
         )
     elif model_provider == ModelProvider.TOGETHER:
         try:
@@ -142,11 +178,18 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "together not available. Install with: pip install llama-index-llms-together"
             ) from e
+        additional_kwargs = {"seed": 42}
+        if model_name in [
+            "deepseek-ai/DeepSeek-V3.1", "openai/gpt-oss-120b",
+            "deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
+        ]:
+            additional_kwargs['reasoning_effort'] = "low"
         llm = TogetherLLM(
             model=model_name,
             temperature=0,
             is_function_calling_model=True,
             max_tokens=max_tokens,
+            additional_kwargs=additional_kwargs,
         )
     elif model_provider == ModelProvider.GROQ:
         try:
@@ -193,7 +236,11 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
             raise ImportError(
                 "openai_like not available. Install with: pip install llama-index-llms-openai-like"
             ) from e
-        if not config or not config.private_llm_api_base or not config.private_llm_api_key:
+        if (
+            not config
+            or not config.private_llm_api_base
+            or not config.private_llm_api_key
+        ):
             raise ValueError(
                 "Private LLM requires both private_llm_api_base and private_llm_api_key to be set in AgentConfig."
             )

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/vectara_agentic/tools.py RENAMED Viewed

@@ -3,12 +3,12 @@ This module contains the ToolsFactory class for creating agent tools.
 """
 import inspect
-import re
 import importlib
 import os
 import asyncio
 from typing import Callable, List, Dict, Any, Optional, Union
+from retrying import retry
 from pydantic import BaseModel, Field
 from llama_index.core.tools import FunctionTool
@@ -65,6 +65,18 @@ LI_packages = {
 }
+@retry(stop_max_attempt_number=3, wait_exponential_multiplier=1000, wait_exponential_max=10000)
+def _query_with_retry(vectara_query_engine, query):
+    """Execute Vectara query with automatic retry on timeout/failure."""
+    return vectara_query_engine.query(query)
+@retry(stop_max_attempt_number=3, wait_exponential_multiplier=1000, wait_exponential_max=10000)
+def _retrieve_with_retry(vectara_retriever, query):
+    """Execute Vectara retrieve with automatic retry on timeout/failure."""
+    return vectara_retriever.retrieve(query)
 class VectaraToolFactory:
     """
     A factory class for creating Vectara RAG tools.
@@ -165,6 +177,7 @@ class VectaraToolFactory:
             vectara_base_url=vectara_base_url,
             vectara_verify_ssl=vectara_verify_ssl,
         )
+        vectara.vectara_api_timeout = 10
         # Dynamically generate the search function
         def search_function(*args: Any, **kwargs: Any) -> list[dict]:
@@ -220,7 +233,7 @@ class VectaraToolFactory:
                 x_source_str="vectara-agentic",
                 verbose=verbose,
             )
-            response = vectara_retriever.retrieve(query)
+            response = _retrieve_with_retry(vectara_retriever, query)
             if len(response) == 0:
                 msg = "Vectara Tool failed to retrieve any results for the query."
@@ -370,6 +383,7 @@ class VectaraToolFactory:
         save_history: bool = False,
         fcs_threshold: float = 0.0,
         return_direct: bool = False,
+        return_human_readable_output: bool = False,
         verbose: bool = False,
         vectara_base_url: str = "https://api.vectara.io",
         vectara_verify_ssl: bool = True,
@@ -432,6 +446,7 @@ class VectaraToolFactory:
             fcs_threshold (float, optional): A threshold for factual consistency.
                 If set above 0, the tool notifies the calling agent that it "cannot respond" if FCS is too low.
             return_direct (bool, optional): Whether the agent should return the tool's response directly.
+            return_human_readable_output (bool, optional): Whether to return the output in a human-readable format.
             verbose (bool, optional): Whether to print verbose output.
             vectara_base_url (str, optional): The base URL for the Vectara API.
             vectara_verify_ssl (bool, optional): Whether to verify SSL certificates for the Vectara API.
@@ -447,6 +462,7 @@ class VectaraToolFactory:
             vectara_base_url=vectara_base_url,
             vectara_verify_ssl=vectara_verify_ssl,
         )
+        vectara.vectara_api_timeout = 60
         keys_to_ignore = ["lang", "offset", "len"]
         # Dynamically generate the RAG function
@@ -473,7 +489,7 @@ class VectaraToolFactory:
                 )
                 return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
-            citations_url_pattern = (
+            computed_citations_url_pattern = (
                 (
                     citation_url_pattern
                     if citation_url_pattern is not None
@@ -482,6 +498,8 @@ class VectaraToolFactory:
                 if include_citations
                 else None
             )
+            computed_citations_text_pattern = citation_text_pattern if include_citations else None
             vectara_query_engine = vectara.as_query_engine(
                 summary_enabled=True,
                 similarity_top_k=summary_num_results,
@@ -514,15 +532,13 @@ class VectaraToolFactory:
                 frequency_penalty=frequency_penalty,
                 presence_penalty=presence_penalty,
                 citations_style="markdown" if include_citations else None,
-                citations_url_pattern=citations_url_pattern,
-                citations_text_pattern=(
-                    citation_text_pattern if include_citations else None
-                ),
+                citations_url_pattern=computed_citations_url_pattern,
+                citations_text_pattern=computed_citations_text_pattern,
                 save_history=save_history,
                 x_source_str="vectara-agentic",
                 verbose=verbose,
             )
-            response = vectara_query_engine.query(query)
+            response = _query_with_retry(vectara_query_engine, query)
             if len(response.source_nodes) == 0:
                 msg = (
@@ -536,20 +552,6 @@ class VectaraToolFactory:
                 kwargs["query"] = query
                 return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
-            # Extract citation metadata
-            pattern = r"\[(\d+)\]"
-            matches = re.findall(pattern, response.response)
-            citation_numbers = sorted(set(int(match) for match in matches))
-            citation_metadata = {}
-            for citation_number in citation_numbers:
-                metadata = {
-                    k: v
-                    for k, v in response.source_nodes[
-                        citation_number - 1
-                    ].metadata.items()
-                    if k not in keys_to_ignore
-                }
-                citation_metadata[str(citation_number)] = metadata
             fcs = 0.0
             fcs_str = response.metadata["fcs"] if "fcs" in response.metadata else "0.0"
             if fcs_str and is_float(fcs_str):
@@ -560,16 +562,71 @@ class VectaraToolFactory:
                         "text": msg,
                         "metadata": {"args": args, "kwargs": kwargs, "fcs": fcs},
                     }
-            if fcs:
-                citation_metadata["fcs"] = fcs
-            res = {"text": response.response, "metadata": citation_metadata}
-            # Create human-readable output with citation formatting
-            def format_rag_response(result):
-                text = result["text"]
-                return text
+            # Add source nodes to tool output
+            if ((not return_human_readable_output) and
+                (computed_citations_url_pattern is not None) and
+                (computed_citations_text_pattern is not None)):
+                response_text = str(response.response)
+                citation_metadata = []
+                # Converts a dictionary to an object with .<field> access
+                def to_obj(data):
+                    return type('obj', (object,), data)()
+                for source_node in response.source_nodes:
+                    node = source_node.node
+                    node_id = node.id_
+                    node_text = (
+                        node.text_resource.text if hasattr(node, 'text_resource')
+                        else getattr(node, 'text', '')
+                    )
+                    node_metadata = getattr(node, 'metadata', {})
+                    for key in keys_to_ignore:
+                        if key in node_metadata:
+                            del node_metadata[key]
+                    try:
+                        template_data = {}
+                        doc_data = node_metadata.get('document', {})
+                        template_data['doc'] = to_obj(doc_data)
+                        part_data = {k: v for k, v in node_metadata.items() if k != 'document'}
+                        template_data['part'] = to_obj(part_data)
+                        formatted_citation_text = computed_citations_text_pattern.format(**template_data)
+                        formatted_citation_url = computed_citations_url_pattern.format(**template_data)
+                        expected_citation = f"[{formatted_citation_text}]({formatted_citation_url})"
+                        if expected_citation in response_text:
+                            citation_metadata.append({
+                                'doc_id': node_id,
+                                'text': node_text,
+                                'metadata': node_metadata,
+                                'score': getattr(node, 'score', None)
+                            })
+                    except Exception as e:
+                        if verbose:
+                            print(f"Could not format citation for search result {node_id}: {e}")
+                        continue
+                res = {"text": response.response, "citations": citation_metadata}
+                if fcs:
+                    res["fcs"] = fcs
+            else:
+                res = {"text": response.response}
+            # Create human-readable output
+            if return_human_readable_output:
+                def format_rag_response(result):
+                    text = result["text"]
+                    return text
+                return create_human_readable_output(res, format_rag_response)
-            return create_human_readable_output(res, format_rag_response)
+            return res
         class RagToolBaseParams(BaseModel):
             """Model for the base parameters of the RAG tool."""

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4/vectara_agentic.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vectara_agentic
-Version: 0.4.3
+Version: 0.4.4
 Summary: A Python package for creating AI Assistants and AI Agents with Vectara
 Home-page: https://github.com/vectara/py-vectara-agentic
 Author: Ofer Mendelevitch
@@ -16,19 +16,20 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: llama-index==0.13.2
-Requires-Dist: llama-index-core==0.13.2
+Requires-Dist: llama-index==0.13.3
+Requires-Dist: llama-index-core==0.13.3
 Requires-Dist: llama-index-workflows==1.3.0
 Requires-Dist: llama-index-cli==0.5.0
 Requires-Dist: llama-index-indices-managed-vectara==0.5.0
-Requires-Dist: llama-index-llms-openai==0.5.2
+Requires-Dist: llama-index-llms-openai==0.5.4
 Requires-Dist: llama-index-llms-openai-like==0.5.0
-Requires-Dist: llama-index-llms-anthropic==0.8.2
+Requires-Dist: llama-index-llms-anthropic==0.8.5
 Requires-Dist: llama-index-llms-together==0.4.0
 Requires-Dist: llama-index-llms-groq==0.4.0
 Requires-Dist: llama-index-llms-cohere==0.6.0
 Requires-Dist: llama-index-llms-google-genai==0.3.0
-Requires-Dist: llama-index-llms-bedrock-converse==0.8.0
+Requires-Dist: google_genai>=1.31.0
+Requires-Dist: llama-index-llms-bedrock-converse==0.8.2
 Requires-Dist: llama-index-tools-yahoo-finance==0.4.0
 Requires-Dist: llama-index-tools-arxiv==0.4.0
 Requires-Dist: llama-index-tools-database==0.4.0
@@ -54,7 +55,7 @@ Requires-Dist: protobuf==5.29.5
 Requires-Dist: tokenizers>=0.20
 Requires-Dist: pydantic>=2.11.5
 Requires-Dist: pandas==2.2.3
-Requires-Dist: retrying==1.3.4
+Requires-Dist: retrying==1.4.2
 Requires-Dist: python-dotenv==1.0.1
 Requires-Dist: cloudpickle>=3.1.1
 Requires-Dist: httpx==0.28.1
@@ -869,7 +870,7 @@ agent_config = AgentConfig(
     main_llm_provider = ModelProvider.ANTHROPIC,
     main_llm_model_name = 'claude-3-5-sonnet-20241022',
     tool_llm_provider = ModelProvider.TOGETHER,
-    tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
+    tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
 )
 agent = Agent(

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/SOURCES.txt RENAMED Viewed

@@ -17,6 +17,7 @@ tests/test_bedrock.py
 tests/test_fallback.py
 tests/test_gemini.py
 tests/test_groq.py
+tests/test_openai.py
 tests/test_private_llm.py
 tests/test_react_error_handling.py
 tests/test_react_memory.py

{vectara_agentic-0.4.3 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/requires.txt RENAMED Viewed

@@ -1,16 +1,17 @@
-llama-index==0.13.2
-llama-index-core==0.13.2
+llama-index==0.13.3
+llama-index-core==0.13.3
 llama-index-workflows==1.3.0
 llama-index-cli==0.5.0
 llama-index-indices-managed-vectara==0.5.0
-llama-index-llms-openai==0.5.2
+llama-index-llms-openai==0.5.4
 llama-index-llms-openai-like==0.5.0
-llama-index-llms-anthropic==0.8.2
+llama-index-llms-anthropic==0.8.5
 llama-index-llms-together==0.4.0
 llama-index-llms-groq==0.4.0
 llama-index-llms-cohere==0.6.0
 llama-index-llms-google-genai==0.3.0
-llama-index-llms-bedrock-converse==0.8.0
+google_genai>=1.31.0
+llama-index-llms-bedrock-converse==0.8.2
 llama-index-tools-yahoo-finance==0.4.0
 llama-index-tools-arxiv==0.4.0
 llama-index-tools-database==0.4.0
@@ -36,7 +37,7 @@ protobuf==5.29.5
 tokenizers>=0.20
 pydantic>=2.11.5
 pandas==2.2.3
-retrying==1.3.4
+retrying==1.4.2
 python-dotenv==1.0.1
 cloudpickle>=3.1.1
 httpx==0.28.1

vectara_agentic-0.4.3/tests/test_together.py DELETED Viewed

@@ -1,70 +0,0 @@
-# Suppress external dependency warnings before any other imports
-import warnings
-warnings.simplefilter("ignore", DeprecationWarning)
-import unittest
-import threading
-from vectara_agentic.agent import Agent
-from vectara_agentic.tools import ToolsFactory
-import nest_asyncio
-nest_asyncio.apply()
-from conftest import (
-    fc_config_together,
-    mult,
-    STANDARD_TEST_TOPIC,
-    STANDARD_TEST_INSTRUCTIONS,
-)
-ARIZE_LOCK = threading.Lock()
-class TestTogether(unittest.IsolatedAsyncioTestCase):
-    async def test_multiturn(self):
-        with ARIZE_LOCK:
-            tools = [ToolsFactory().create_tool(mult)]
-            agent = Agent(
-                agent_config=fc_config_together,
-                tools=tools,
-                topic=STANDARD_TEST_TOPIC,
-                custom_instructions=STANDARD_TEST_INSTRUCTIONS,
-            )
-            # First calculation: 5 * 10 = 50
-            stream1 = await agent.astream_chat(
-                "What is 5 times 10. Only give the answer, nothing else"
-            )
-            # Consume the stream
-            async for chunk in stream1.async_response_gen():
-                pass
-            _ = await stream1.aget_response()
-            # Second calculation: 3 * 7 = 21
-            stream2 = await agent.astream_chat(
-                "what is 3 times 7. Only give the answer, nothing else"
-            )
-            # Consume the stream
-            async for chunk in stream2.async_response_gen():
-                pass
-            _ = await stream2.aget_response()
-            # Final calculation: 50 * 21 = 1050
-            stream3 = await agent.astream_chat(
-                "multiply the results of the last two questions. Output only the answer."
-            )
-            # Consume the stream
-            async for chunk in stream3.async_response_gen():
-                pass
-            response3 = await stream3.aget_response()
-            self.assertEqual(response3.response, "1050")
-if __name__ == "__main__":
-    unittest.main()