vectara-agentic 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

tests/test_groq.py CHANGED
@@ -8,6 +8,8 @@ import threading
8
8
 
9
9
  from vectara_agentic.agent import Agent
10
10
  from vectara_agentic.tools import ToolsFactory
11
+ from vectara_agentic.agent_config import AgentConfig
12
+ from vectara_agentic.types import AgentType, ModelProvider
11
13
 
12
14
  import nest_asyncio
13
15
 
@@ -64,6 +66,38 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
64
66
 
65
67
  self.assertEqual(response3.response, "1050")
66
68
 
69
+ async def test_gpt_oss_120b(self):
70
+ """Test GPT-OSS-120B model with GROQ provider."""
71
+ with ARIZE_LOCK:
72
+ # Create config specifically for GPT-OSS-120B via GROQ
73
+ gpt_oss_config = AgentConfig(
74
+ agent_type=AgentType.FUNCTION_CALLING,
75
+ main_llm_provider=ModelProvider.GROQ,
76
+ main_llm_model_name="openai/gpt-oss-120b",
77
+ tool_llm_provider=ModelProvider.GROQ,
78
+ tool_llm_model_name="openai/gpt-oss-120b",
79
+ )
80
+
81
+ tools = [ToolsFactory().create_tool(mult)]
82
+ agent = Agent(
83
+ agent_config=gpt_oss_config,
84
+ tools=tools,
85
+ topic=STANDARD_TEST_TOPIC,
86
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
87
+ )
88
+
89
+ # Test simple multiplication: 8 * 6 = 48
90
+ stream = await agent.astream_chat(
91
+ "What is 8 times 6? Only give the answer, nothing else"
92
+ )
93
+ # Consume the stream
94
+ async for chunk in stream.async_response_gen():
95
+ pass
96
+ response = await stream.aget_response()
97
+
98
+ # Verify the response contains the correct answer
99
+ self.assertIn("48", response.response)
100
+
67
101
 
68
102
  if __name__ == "__main__":
69
103
  unittest.main()
tests/test_openai.py ADDED
@@ -0,0 +1,160 @@
1
+ # Suppress external dependency warnings before any other imports
2
+ import warnings
3
+
4
+ warnings.simplefilter("ignore", DeprecationWarning)
5
+
6
+ import unittest
7
+ import threading
8
+
9
+ from vectara_agentic.agent import Agent
10
+ from vectara_agentic.tools import ToolsFactory
11
+ from vectara_agentic.agent_config import AgentConfig
12
+ from vectara_agentic.types import AgentType, ModelProvider
13
+
14
+ import nest_asyncio
15
+
16
+ nest_asyncio.apply()
17
+
18
+ from conftest import (
19
+ fc_config_openai,
20
+ mult,
21
+ STANDARD_TEST_TOPIC,
22
+ STANDARD_TEST_INSTRUCTIONS,
23
+ )
24
+
25
+
26
+ ARIZE_LOCK = threading.Lock()
27
+
28
+
29
+ class TestOpenAI(unittest.IsolatedAsyncioTestCase):
30
+
31
+ async def test_multiturn(self):
32
+ """Test multi-turn conversation with default OpenAI model."""
33
+ with ARIZE_LOCK:
34
+ tools = [ToolsFactory().create_tool(mult)]
35
+ agent = Agent(
36
+ agent_config=fc_config_openai,
37
+ tools=tools,
38
+ topic=STANDARD_TEST_TOPIC,
39
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
40
+ )
41
+
42
+ # First calculation: 5 * 10 = 50
43
+ stream1 = await agent.astream_chat(
44
+ "What is 5 times 10. Only give the answer, nothing else"
45
+ )
46
+ # Consume the stream
47
+ async for chunk in stream1.async_response_gen():
48
+ pass
49
+ _ = await stream1.aget_response()
50
+
51
+ # Second calculation: 3 * 7 = 21
52
+ stream2 = await agent.astream_chat(
53
+ "what is 3 times 7. Only give the answer, nothing else"
54
+ )
55
+ # Consume the stream
56
+ async for chunk in stream2.async_response_gen():
57
+ pass
58
+ _ = await stream2.aget_response()
59
+
60
+ # Final calculation: 50 * 21 = 1050
61
+ stream3 = await agent.astream_chat(
62
+ "multiply the results of the last two questions. Output only the answer."
63
+ )
64
+ # Consume the stream
65
+ async for chunk in stream3.async_response_gen():
66
+ pass
67
+ response3 = await stream3.aget_response()
68
+
69
+ self.assertEqual(response3.response, "1050")
70
+
71
+ async def test_gpt_4o(self):
72
+ """Test GPT-4o model with OpenAI provider."""
73
+ with ARIZE_LOCK:
74
+ config = AgentConfig(
75
+ agent_type=AgentType.FUNCTION_CALLING,
76
+ main_llm_provider=ModelProvider.OPENAI,
77
+ main_llm_model_name="gpt-4o",
78
+ tool_llm_provider=ModelProvider.OPENAI,
79
+ tool_llm_model_name="gpt-4o",
80
+ )
81
+
82
+ tools = [ToolsFactory().create_tool(mult)]
83
+ agent = Agent(
84
+ agent_config=config,
85
+ tools=tools,
86
+ topic=STANDARD_TEST_TOPIC,
87
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
88
+ )
89
+
90
+ # Test simple multiplication: 4 * 3 = 12
91
+ stream = await agent.astream_chat(
92
+ "What is 4 times 3? Only give the answer, nothing else"
93
+ )
94
+ async for chunk in stream.async_response_gen():
95
+ pass
96
+ response = await stream.aget_response()
97
+
98
+ self.assertIn("12", response.response)
99
+
100
+ async def test_gpt_4_1(self):
101
+ """Test GPT-4.1 model with OpenAI provider."""
102
+ with ARIZE_LOCK:
103
+ config = AgentConfig(
104
+ agent_type=AgentType.FUNCTION_CALLING,
105
+ main_llm_provider=ModelProvider.OPENAI,
106
+ main_llm_model_name="gpt-4.1",
107
+ tool_llm_provider=ModelProvider.OPENAI,
108
+ tool_llm_model_name="gpt-4.1",
109
+ )
110
+
111
+ tools = [ToolsFactory().create_tool(mult)]
112
+ agent = Agent(
113
+ agent_config=config,
114
+ tools=tools,
115
+ topic=STANDARD_TEST_TOPIC,
116
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
117
+ )
118
+
119
+ # Test simple multiplication: 6 * 2 = 12
120
+ stream = await agent.astream_chat(
121
+ "What is 6 times 2? Only give the answer, nothing else"
122
+ )
123
+ async for chunk in stream.async_response_gen():
124
+ pass
125
+ response = await stream.aget_response()
126
+
127
+ self.assertIn("12", response.response)
128
+
129
+ async def test_gpt_5_minimal_reasoning(self):
130
+ """Test GPT-5 model with minimal reasoning effort."""
131
+ with ARIZE_LOCK:
132
+ config = AgentConfig(
133
+ agent_type=AgentType.FUNCTION_CALLING,
134
+ main_llm_provider=ModelProvider.OPENAI,
135
+ main_llm_model_name="gpt-5",
136
+ tool_llm_provider=ModelProvider.OPENAI,
137
+ tool_llm_model_name="gpt-5",
138
+ )
139
+
140
+ tools = [ToolsFactory().create_tool(mult)]
141
+ agent = Agent(
142
+ agent_config=config,
143
+ tools=tools,
144
+ topic=STANDARD_TEST_TOPIC,
145
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
146
+ )
147
+
148
+ # Test simple multiplication: 5 * 5 = 25
149
+ stream = await agent.astream_chat(
150
+ "What is 5 times 5? Only give the answer, nothing else"
151
+ )
152
+ async for chunk in stream.async_response_gen():
153
+ pass
154
+ response = await stream.aget_response()
155
+
156
+ self.assertIn("25", response.response)
157
+
158
+
159
+ if __name__ == "__main__":
160
+ unittest.main()
tests/test_streaming.py CHANGED
@@ -4,7 +4,6 @@ import warnings
4
4
  warnings.simplefilter("ignore", DeprecationWarning)
5
5
 
6
6
  import unittest
7
- import asyncio
8
7
 
9
8
  from vectara_agentic.agent import Agent
10
9
  from vectara_agentic.tools import ToolsFactory
@@ -14,7 +13,6 @@ import nest_asyncio
14
13
  nest_asyncio.apply()
15
14
 
16
15
  from conftest import (
17
- fc_config_openai,
18
16
  fc_config_anthropic,
19
17
  mult,
20
18
  STANDARD_TEST_TOPIC,
@@ -62,48 +60,6 @@ class TestAgentStreaming(unittest.IsolatedAsyncioTestCase):
62
60
 
63
61
  self.assertIn("1050", response3.response)
64
62
 
65
- async def test_openai(self):
66
- tools = [ToolsFactory().create_tool(mult)]
67
- agent = Agent(
68
- agent_config=fc_config_openai,
69
- tools=tools,
70
- topic=STANDARD_TEST_TOPIC,
71
- custom_instructions=STANDARD_TEST_INSTRUCTIONS,
72
- )
73
-
74
- # First calculation: 5 * 10 = 50
75
- stream1 = await agent.astream_chat(
76
- "What is 5 times 10. Only give the answer, nothing else"
77
- )
78
- # Consume the stream
79
- async for chunk in stream1.async_response_gen():
80
- pass
81
- _ = await stream1.aget_response()
82
-
83
- # Second calculation: 3 * 7 = 21
84
- stream2 = await agent.astream_chat(
85
- "what is 3 times 7. Only give the answer, nothing else"
86
- )
87
- # Consume the stream
88
- async for chunk in stream2.async_response_gen():
89
- pass
90
- _ = await stream2.aget_response()
91
-
92
- # Final calculation: 50 * 21 = 1050
93
- stream3 = await agent.astream_chat(
94
- "multiply the results of the last two multiplications. Only give the answer, nothing else."
95
- )
96
- # Consume the stream
97
- async for chunk in stream3.async_response_gen():
98
- pass
99
- response3 = await stream3.aget_response()
100
-
101
- self.assertIn("1050", response3.response)
102
-
103
- def test_openai_sync(self):
104
- """Synchronous wrapper for the async test"""
105
- asyncio.run(self.test_openai())
106
-
107
63
 
108
64
  if __name__ == "__main__":
109
65
  unittest.main()
tests/test_together.py CHANGED
@@ -19,6 +19,8 @@ from conftest import (
19
19
  STANDARD_TEST_TOPIC,
20
20
  STANDARD_TEST_INSTRUCTIONS,
21
21
  )
22
+ from vectara_agentic.agent_config import AgentConfig
23
+ from vectara_agentic.types import AgentType, ModelProvider
22
24
 
23
25
 
24
26
  ARIZE_LOCK = threading.Lock()
@@ -65,6 +67,70 @@ class TestTogether(unittest.IsolatedAsyncioTestCase):
65
67
 
66
68
  self.assertEqual(response3.response, "1050")
67
69
 
70
+ async def test_qwen3_coder(self):
71
+ """Test Qwen3-Coder-480B-A35B-Instruct-FP8 model with Together AI provider."""
72
+ with ARIZE_LOCK:
73
+ # Create config specifically for Qwen3-Coder
74
+ qwen_config = AgentConfig(
75
+ agent_type=AgentType.FUNCTION_CALLING,
76
+ main_llm_provider=ModelProvider.TOGETHER,
77
+ main_llm_model_name="Qwen/Qwen3-235B-A22B-fp8-tput",
78
+ tool_llm_provider=ModelProvider.TOGETHER,
79
+ tool_llm_model_name="Qwen/Qwen3-235B-A22B-fp8-tput",
80
+ )
81
+
82
+ tools = [ToolsFactory().create_tool(mult)]
83
+ agent = Agent(
84
+ agent_config=qwen_config,
85
+ tools=tools,
86
+ topic=STANDARD_TEST_TOPIC,
87
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
88
+ )
89
+
90
+ # Test simple multiplication: 7 * 9 = 63
91
+ stream = await agent.astream_chat(
92
+ "What is 7 times 9? Only give the answer, nothing else"
93
+ )
94
+ # Consume the stream
95
+ async for chunk in stream.async_response_gen():
96
+ pass
97
+ response = await stream.aget_response()
98
+
99
+ # Verify the response contains the correct answer
100
+ self.assertIn("63", response.response)
101
+
102
+ async def test_llama4_scout(self):
103
+ """Test Llama-4-Scout-17B-16E-Instruct model with Together AI provider."""
104
+ with ARIZE_LOCK:
105
+ # Create config specifically for Llama 4 Scout
106
+ llama4_config = AgentConfig(
107
+ agent_type=AgentType.FUNCTION_CALLING,
108
+ main_llm_provider=ModelProvider.TOGETHER,
109
+ main_llm_model_name="meta-llama/Llama-4-Scout-17B-16E-Instruct",
110
+ tool_llm_provider=ModelProvider.TOGETHER,
111
+ tool_llm_model_name="meta-llama/Llama-4-Scout-17B-16E-Instruct",
112
+ )
113
+
114
+ tools = [ToolsFactory().create_tool(mult)]
115
+ agent = Agent(
116
+ agent_config=llama4_config,
117
+ tools=tools,
118
+ topic=STANDARD_TEST_TOPIC,
119
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
120
+ )
121
+
122
+ # Test simple multiplication: 8 * 6 = 48
123
+ stream = await agent.astream_chat(
124
+ "What is 8 times 6? Only give the answer, nothing else"
125
+ )
126
+ # Consume the stream
127
+ async for chunk in stream.async_response_gen():
128
+ pass
129
+ response = await stream.aget_response()
130
+
131
+ # Verify the response contains the correct answer
132
+ self.assertIn("48", response.response)
133
+
68
134
 
69
135
  if __name__ == "__main__":
70
136
  unittest.main()
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Define the version of the package.
3
3
  """
4
- __version__ = "0.4.3"
4
+ __version__ = "0.4.4"
@@ -18,14 +18,49 @@ from .agent_config import AgentConfig
18
18
 
19
19
  provider_to_default_model_name = {
20
20
  ModelProvider.OPENAI: "gpt-4.1-mini",
21
- ModelProvider.ANTHROPIC: "claude-sonnet-4-20250514",
21
+ ModelProvider.ANTHROPIC: "claude-sonnet-4-0",
22
22
  ModelProvider.TOGETHER: "deepseek-ai/DeepSeek-V3",
23
23
  ModelProvider.GROQ: "openai/gpt-oss-20b",
24
24
  ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
25
25
  ModelProvider.COHERE: "command-a-03-2025",
26
- ModelProvider.GEMINI: "models/gemini-2.5-flash-lite",
26
+ ModelProvider.GEMINI: "models/gemini-2.5-flash",
27
27
  }
28
28
 
29
+ models_to_max_tokens = {
30
+ "gpt-5": 128000,
31
+ "gpt-4.1": 32768,
32
+ "gpt-4o": 16384,
33
+ "gpt-4.1-mini": 32768,
34
+ "claude-sonnet-4": 65536,
35
+ "deepseek-ai/deepseek-v3": 8192,
36
+ "models/gemini-2.5-flash": 65536,
37
+ "models/gemini-2.5-flash-lite": 65536,
38
+ "models/gemini-2.5-pro": 65536,
39
+ "openai/gpt-oss-20b": 65536,
40
+ "openai/gpt-oss-120b": 65536,
41
+ "us.anthropic.claude-sonnet-4-20250514-v1:0": 65536,
42
+ "command-a-03-2025": 8192,
43
+ }
44
+
45
+
46
+ def get_max_tokens(model_name: str, model_provider: str) -> int:
47
+ """Get the maximum token limit for a given model name and provider."""
48
+ if model_provider in [
49
+ ModelProvider.GEMINI,
50
+ ModelProvider.TOGETHER,
51
+ ModelProvider.OPENAI,
52
+ ModelProvider.ANTHROPIC,
53
+ ModelProvider.GROQ,
54
+ ModelProvider.BEDROCK,
55
+ ModelProvider.COHERE,
56
+ ]:
57
+ # Try exact match first (case-insensitive)
58
+ max_tokens = models_to_max_tokens.get(model_name, 16384)
59
+ else:
60
+ max_tokens = 8192
61
+ return max_tokens
62
+
63
+
29
64
  DEFAULT_MODEL_PROVIDER = ModelProvider.OPENAI
30
65
 
31
66
  # Manual cache for LLM instances to handle mutable AgentConfig objects
@@ -94,19 +129,11 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
94
129
  if cache_key in _llm_cache:
95
130
  return _llm_cache[cache_key]
96
131
  model_provider, model_name = _get_llm_params_for_role(role, config)
97
- max_tokens = (
98
- 16384
99
- if model_provider
100
- in [
101
- ModelProvider.GEMINI,
102
- ModelProvider.TOGETHER,
103
- ModelProvider.OPENAI,
104
- ModelProvider.ANTHROPIC,
105
- ]
106
- else 8192
107
- )
132
+ max_tokens = get_max_tokens(model_name, model_provider)
108
133
  if model_provider == ModelProvider.OPENAI:
109
- additional_kwargs = {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
134
+ additional_kwargs = (
135
+ {"reasoning_effort": "minimal"} if model_name.startswith("gpt-5") else {}
136
+ )
110
137
  llm = OpenAI(
111
138
  model=model_name,
112
139
  temperature=0,
@@ -129,11 +156,20 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
129
156
  raise ImportError(
130
157
  "google_genai not available. Install with: pip install llama-index-llms-google-genai"
131
158
  ) from e
159
+ import google.genai.types as google_types
160
+ generation_config = google_types.GenerateContentConfig(
161
+ temperature=0.0,
162
+ seed=123,
163
+ max_output_tokens=max_tokens,
164
+ thinking_config=google_types.ThinkingConfig(thinking_budget=0, include_thoughts=False),
165
+ )
132
166
  llm = GoogleGenAI(
133
167
  model=model_name,
134
168
  temperature=0,
135
169
  is_function_calling_model=True,
136
170
  max_tokens=max_tokens,
171
+ generation_config=generation_config,
172
+ context_window=1_000_000,
137
173
  )
138
174
  elif model_provider == ModelProvider.TOGETHER:
139
175
  try:
@@ -142,11 +178,18 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
142
178
  raise ImportError(
143
179
  "together not available. Install with: pip install llama-index-llms-together"
144
180
  ) from e
181
+ additional_kwargs = {"seed": 42}
182
+ if model_name in [
183
+ "deepseek-ai/DeepSeek-V3.1", "openai/gpt-oss-120b",
184
+ "deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
185
+ ]:
186
+ additional_kwargs['reasoning_effort'] = "low"
145
187
  llm = TogetherLLM(
146
188
  model=model_name,
147
189
  temperature=0,
148
190
  is_function_calling_model=True,
149
191
  max_tokens=max_tokens,
192
+ additional_kwargs=additional_kwargs,
150
193
  )
151
194
  elif model_provider == ModelProvider.GROQ:
152
195
  try:
@@ -193,7 +236,11 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
193
236
  raise ImportError(
194
237
  "openai_like not available. Install with: pip install llama-index-llms-openai-like"
195
238
  ) from e
196
- if not config or not config.private_llm_api_base or not config.private_llm_api_key:
239
+ if (
240
+ not config
241
+ or not config.private_llm_api_base
242
+ or not config.private_llm_api_key
243
+ ):
197
244
  raise ValueError(
198
245
  "Private LLM requires both private_llm_api_base and private_llm_api_key to be set in AgentConfig."
199
246
  )
vectara_agentic/tools.py CHANGED
@@ -3,12 +3,12 @@ This module contains the ToolsFactory class for creating agent tools.
3
3
  """
4
4
 
5
5
  import inspect
6
- import re
7
6
  import importlib
8
7
  import os
9
8
  import asyncio
10
-
11
9
  from typing import Callable, List, Dict, Any, Optional, Union
10
+
11
+ from retrying import retry
12
12
  from pydantic import BaseModel, Field
13
13
 
14
14
  from llama_index.core.tools import FunctionTool
@@ -65,6 +65,18 @@ LI_packages = {
65
65
  }
66
66
 
67
67
 
68
+ @retry(stop_max_attempt_number=3, wait_exponential_multiplier=1000, wait_exponential_max=10000)
69
+ def _query_with_retry(vectara_query_engine, query):
70
+ """Execute Vectara query with automatic retry on timeout/failure."""
71
+ return vectara_query_engine.query(query)
72
+
73
+
74
+ @retry(stop_max_attempt_number=3, wait_exponential_multiplier=1000, wait_exponential_max=10000)
75
+ def _retrieve_with_retry(vectara_retriever, query):
76
+ """Execute Vectara retrieve with automatic retry on timeout/failure."""
77
+ return vectara_retriever.retrieve(query)
78
+
79
+
68
80
  class VectaraToolFactory:
69
81
  """
70
82
  A factory class for creating Vectara RAG tools.
@@ -165,6 +177,7 @@ class VectaraToolFactory:
165
177
  vectara_base_url=vectara_base_url,
166
178
  vectara_verify_ssl=vectara_verify_ssl,
167
179
  )
180
+ vectara.vectara_api_timeout = 10
168
181
 
169
182
  # Dynamically generate the search function
170
183
  def search_function(*args: Any, **kwargs: Any) -> list[dict]:
@@ -220,7 +233,7 @@ class VectaraToolFactory:
220
233
  x_source_str="vectara-agentic",
221
234
  verbose=verbose,
222
235
  )
223
- response = vectara_retriever.retrieve(query)
236
+ response = _retrieve_with_retry(vectara_retriever, query)
224
237
 
225
238
  if len(response) == 0:
226
239
  msg = "Vectara Tool failed to retrieve any results for the query."
@@ -370,6 +383,7 @@ class VectaraToolFactory:
370
383
  save_history: bool = False,
371
384
  fcs_threshold: float = 0.0,
372
385
  return_direct: bool = False,
386
+ return_human_readable_output: bool = False,
373
387
  verbose: bool = False,
374
388
  vectara_base_url: str = "https://api.vectara.io",
375
389
  vectara_verify_ssl: bool = True,
@@ -432,6 +446,7 @@ class VectaraToolFactory:
432
446
  fcs_threshold (float, optional): A threshold for factual consistency.
433
447
  If set above 0, the tool notifies the calling agent that it "cannot respond" if FCS is too low.
434
448
  return_direct (bool, optional): Whether the agent should return the tool's response directly.
449
+ return_human_readable_output (bool, optional): Whether to return the output in a human-readable format.
435
450
  verbose (bool, optional): Whether to print verbose output.
436
451
  vectara_base_url (str, optional): The base URL for the Vectara API.
437
452
  vectara_verify_ssl (bool, optional): Whether to verify SSL certificates for the Vectara API.
@@ -447,6 +462,7 @@ class VectaraToolFactory:
447
462
  vectara_base_url=vectara_base_url,
448
463
  vectara_verify_ssl=vectara_verify_ssl,
449
464
  )
465
+ vectara.vectara_api_timeout = 60
450
466
  keys_to_ignore = ["lang", "offset", "len"]
451
467
 
452
468
  # Dynamically generate the RAG function
@@ -473,7 +489,7 @@ class VectaraToolFactory:
473
489
  )
474
490
  return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
475
491
 
476
- citations_url_pattern = (
492
+ computed_citations_url_pattern = (
477
493
  (
478
494
  citation_url_pattern
479
495
  if citation_url_pattern is not None
@@ -482,6 +498,8 @@ class VectaraToolFactory:
482
498
  if include_citations
483
499
  else None
484
500
  )
501
+ computed_citations_text_pattern = citation_text_pattern if include_citations else None
502
+
485
503
  vectara_query_engine = vectara.as_query_engine(
486
504
  summary_enabled=True,
487
505
  similarity_top_k=summary_num_results,
@@ -514,15 +532,13 @@ class VectaraToolFactory:
514
532
  frequency_penalty=frequency_penalty,
515
533
  presence_penalty=presence_penalty,
516
534
  citations_style="markdown" if include_citations else None,
517
- citations_url_pattern=citations_url_pattern,
518
- citations_text_pattern=(
519
- citation_text_pattern if include_citations else None
520
- ),
535
+ citations_url_pattern=computed_citations_url_pattern,
536
+ citations_text_pattern=computed_citations_text_pattern,
521
537
  save_history=save_history,
522
538
  x_source_str="vectara-agentic",
523
539
  verbose=verbose,
524
540
  )
525
- response = vectara_query_engine.query(query)
541
+ response = _query_with_retry(vectara_query_engine, query)
526
542
 
527
543
  if len(response.source_nodes) == 0:
528
544
  msg = (
@@ -536,20 +552,6 @@ class VectaraToolFactory:
536
552
  kwargs["query"] = query
537
553
  return {"text": msg, "metadata": {"args": args, "kwargs": kwargs}}
538
554
 
539
- # Extract citation metadata
540
- pattern = r"\[(\d+)\]"
541
- matches = re.findall(pattern, response.response)
542
- citation_numbers = sorted(set(int(match) for match in matches))
543
- citation_metadata = {}
544
- for citation_number in citation_numbers:
545
- metadata = {
546
- k: v
547
- for k, v in response.source_nodes[
548
- citation_number - 1
549
- ].metadata.items()
550
- if k not in keys_to_ignore
551
- }
552
- citation_metadata[str(citation_number)] = metadata
553
555
  fcs = 0.0
554
556
  fcs_str = response.metadata["fcs"] if "fcs" in response.metadata else "0.0"
555
557
  if fcs_str and is_float(fcs_str):
@@ -560,16 +562,71 @@ class VectaraToolFactory:
560
562
  "text": msg,
561
563
  "metadata": {"args": args, "kwargs": kwargs, "fcs": fcs},
562
564
  }
563
- if fcs:
564
- citation_metadata["fcs"] = fcs
565
- res = {"text": response.response, "metadata": citation_metadata}
566
565
 
567
- # Create human-readable output with citation formatting
568
- def format_rag_response(result):
569
- text = result["text"]
570
- return text
566
+ # Add source nodes to tool output
567
+ if ((not return_human_readable_output) and
568
+ (computed_citations_url_pattern is not None) and
569
+ (computed_citations_text_pattern is not None)):
570
+ response_text = str(response.response)
571
+ citation_metadata = []
572
+
573
+ # Converts a dictionary to an object with .<field> access
574
+ def to_obj(data):
575
+ return type('obj', (object,), data)()
576
+
577
+ for source_node in response.source_nodes:
578
+ node = source_node.node
579
+ node_id = node.id_
580
+ node_text = (
581
+ node.text_resource.text if hasattr(node, 'text_resource')
582
+ else getattr(node, 'text', '')
583
+ )
584
+ node_metadata = getattr(node, 'metadata', {})
585
+ for key in keys_to_ignore:
586
+ if key in node_metadata:
587
+ del node_metadata[key]
588
+
589
+ try:
590
+ template_data = {}
591
+
592
+ doc_data = node_metadata.get('document', {})
593
+ template_data['doc'] = to_obj(doc_data)
594
+
595
+ part_data = {k: v for k, v in node_metadata.items() if k != 'document'}
596
+ template_data['part'] = to_obj(part_data)
597
+
598
+ formatted_citation_text = computed_citations_text_pattern.format(**template_data)
599
+ formatted_citation_url = computed_citations_url_pattern.format(**template_data)
600
+ expected_citation = f"[{formatted_citation_text}]({formatted_citation_url})"
601
+
602
+ if expected_citation in response_text:
603
+ citation_metadata.append({
604
+ 'doc_id': node_id,
605
+ 'text': node_text,
606
+ 'metadata': node_metadata,
607
+ 'score': getattr(node, 'score', None)
608
+ })
609
+
610
+ except Exception as e:
611
+ if verbose:
612
+ print(f"Could not format citation for search result {node_id}: {e}")
613
+ continue
614
+
615
+ res = {"text": response.response, "citations": citation_metadata}
616
+ if fcs:
617
+ res["fcs"] = fcs
618
+ else:
619
+ res = {"text": response.response}
620
+
621
+ # Create human-readable output
622
+ if return_human_readable_output:
623
+ def format_rag_response(result):
624
+ text = result["text"]
625
+ return text
626
+
627
+ return create_human_readable_output(res, format_rag_response)
571
628
 
572
- return create_human_readable_output(res, format_rag_response)
629
+ return res
573
630
 
574
631
  class RagToolBaseParams(BaseModel):
575
632
  """Model for the base parameters of the RAG tool."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.4.3
3
+ Version: 0.4.4
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -16,19 +16,20 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: llama-index==0.13.2
20
- Requires-Dist: llama-index-core==0.13.2
19
+ Requires-Dist: llama-index==0.13.3
20
+ Requires-Dist: llama-index-core==0.13.3
21
21
  Requires-Dist: llama-index-workflows==1.3.0
22
22
  Requires-Dist: llama-index-cli==0.5.0
23
23
  Requires-Dist: llama-index-indices-managed-vectara==0.5.0
24
- Requires-Dist: llama-index-llms-openai==0.5.2
24
+ Requires-Dist: llama-index-llms-openai==0.5.4
25
25
  Requires-Dist: llama-index-llms-openai-like==0.5.0
26
- Requires-Dist: llama-index-llms-anthropic==0.8.2
26
+ Requires-Dist: llama-index-llms-anthropic==0.8.5
27
27
  Requires-Dist: llama-index-llms-together==0.4.0
28
28
  Requires-Dist: llama-index-llms-groq==0.4.0
29
29
  Requires-Dist: llama-index-llms-cohere==0.6.0
30
30
  Requires-Dist: llama-index-llms-google-genai==0.3.0
31
- Requires-Dist: llama-index-llms-bedrock-converse==0.8.0
31
+ Requires-Dist: google_genai>=1.31.0
32
+ Requires-Dist: llama-index-llms-bedrock-converse==0.8.2
32
33
  Requires-Dist: llama-index-tools-yahoo-finance==0.4.0
33
34
  Requires-Dist: llama-index-tools-arxiv==0.4.0
34
35
  Requires-Dist: llama-index-tools-database==0.4.0
@@ -54,7 +55,7 @@ Requires-Dist: protobuf==5.29.5
54
55
  Requires-Dist: tokenizers>=0.20
55
56
  Requires-Dist: pydantic>=2.11.5
56
57
  Requires-Dist: pandas==2.2.3
57
- Requires-Dist: retrying==1.3.4
58
+ Requires-Dist: retrying==1.4.2
58
59
  Requires-Dist: python-dotenv==1.0.1
59
60
  Requires-Dist: cloudpickle>=3.1.1
60
61
  Requires-Dist: httpx==0.28.1
@@ -869,7 +870,7 @@ agent_config = AgentConfig(
869
870
  main_llm_provider = ModelProvider.ANTHROPIC,
870
871
  main_llm_model_name = 'claude-3-5-sonnet-20241022',
871
872
  tool_llm_provider = ModelProvider.TOGETHER,
872
- tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
873
+ tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
873
874
  )
874
875
 
875
876
  agent = Agent(
@@ -11,7 +11,8 @@ tests/test_api_endpoint.py,sha256=PrfV6kWvq5icm3zLgrse9isBsR6EkwfUtSdz1ADSUUs,51
11
11
  tests/test_bedrock.py,sha256=4qBip3plouQkHTRU01_sYebop6fiVe3Fnx5vjkMl3H4,2003
12
12
  tests/test_fallback.py,sha256=LQtnYoK-NohJL3D3pQnlY0yrIGs2B25j6B3gX3wGM1c,3073
13
13
  tests/test_gemini.py,sha256=HVTWmwPFxJ-hjketCkbXa_mOyWXpE-1dG9fu47z00bU,1632
14
- tests/test_groq.py,sha256=Ch9rnziGp4FbrVK8r1dqVW5lKa-JqaqRLwYqM0R7avg,1994
14
+ tests/test_groq.py,sha256=BikJ0AV5-k3kvTUbila9bmIKv2iJy3TQm-Kb_Y23kYw,3378
15
+ tests/test_openai.py,sha256=Uc8wPovmeLgmMItV4OOya6rWlSv7Omre1_B11ajpozU,5396
15
16
  tests/test_private_llm.py,sha256=O5sQfZ_NgE2S1-YJ6eMRn1Gz17XkRjEk9O0iHGACRu0,2752
16
17
  tests/test_react_error_handling.py,sha256=xAozh77qNSvaEzMDHjw2blbDNVUY-5qfvBldD_YHCQQ,11198
17
18
  tests/test_react_memory.py,sha256=3YAPhrWAjmDcT2jm2IfxBx2LSWJGkpYUhWQiVt-qXFs,10177
@@ -20,8 +21,8 @@ tests/test_react_workflow_events.py,sha256=sd7CZbgaQIEhb7d0E8VMXC-ivKTQzZvZaRt5Q
20
21
  tests/test_return_direct.py,sha256=ZhcgkRNGqPQFAYm8moY3HLLIpwdFuAyjShE3F6L16lQ,1522
21
22
  tests/test_serialization.py,sha256=DJZ2E_K54t8INwZR0Q8gS1wi-MGbLIheOBcbRmZNcro,5383
22
23
  tests/test_session_memory.py,sha256=hnADl59agjpXySY-CBjw6sDPn3s6JketIK6XbLZsLzU,9691
23
- tests/test_streaming.py,sha256=EBihBb_ZQiGCCvv7Us7YqHN4CxDIQy-XsUSDVO1n5wU,3302
24
- tests/test_together.py,sha256=G4_gHVXKSFyqyDf189MX6_Mqoc71f9k-gmPmxKDQbrY,2007
24
+ tests/test_streaming.py,sha256=r-kj6DOB7sn2mkEv_8msGgIYeKXEtWgrDF2qTtCqnZY,1828
25
+ tests/test_together.py,sha256=zR06GoFU0549VYKZRZ5z8bbpvQ6l_vLA0bYLp5SokuU,4770
25
26
  tests/test_tools.py,sha256=vvi3FC4SDOwpyKJUFOWCWJ5i3Y474FrKFHnZpo4aFQg,13643
26
27
  tests/test_vectara_llms.py,sha256=WoswpfPGhQlBXyOijn5EBX0F2NL1Oq3FDB4wxu7mwXs,2485
27
28
  tests/test_vhc.py,sha256=jVojp8ZUDF60yJaYp5pBRdAdNYK1hhhPz_RTmlTEm4g,1980
@@ -29,15 +30,15 @@ tests/test_workflow.py,sha256=43YUF-0YDbiiJrTSYjnyqrC4gvHYuHQp7uuzV2jMdTE,3553
29
30
  vectara_agentic/__init__.py,sha256=CfS3QR4drKygcTcyH5zUUDuXXQ3WZtTCytz8W4-loeE,1077
30
31
  vectara_agentic/_callback.py,sha256=hYbHU_3sMF4-h0YMierZ9EEWspakNixk7wXAAWztlmU,15364
31
32
  vectara_agentic/_observability.py,sha256=rApfdndB2R021iM0xG4MumTSDX1Ba6qbNM0N_AOTbR0,4884
32
- vectara_agentic/_version.py,sha256=qs-0KHxeB9E6hJ4i743gXRLriP2E5HKFA9AKqTxuWXE,65
33
+ vectara_agentic/_version.py,sha256=DJAlh_N8BRE6skgMsnQl5zW8lCk-U-7BxMIZrymzu58,65
33
34
  vectara_agentic/agent.py,sha256=5eC4BkMPWep8c_LIHSB2N1CvsFLdX6qPAhIpgLR08Gc,49125
34
35
  vectara_agentic/agent_config.py,sha256=njqEX2qHJjAp2KpNuJglgZhyWXPK74wjIjBPACD6w7w,4074
35
36
  vectara_agentic/agent_endpoint.py,sha256=E_AF-YwxaKqd1-p43X62e1e4ugwOWKIyNq4RWOfsO7A,7402
36
37
  vectara_agentic/db_tools.py,sha256=nVZkpGdG63ooGngjX9g7YWyBZRtYMDpvzNasbO696nM,11498
37
- vectara_agentic/llm_utils.py,sha256=MosOvvNmWkg-8igr01DiXlAb7HOUIauOqbt92anrdZg,7664
38
+ vectara_agentic/llm_utils.py,sha256=-yBCIDV9euEbgUS9DuIxJly5hzxDbV5vl63uVJsqqKM,9290
38
39
  vectara_agentic/sub_query_workflow.py,sha256=1y0fBoUem4i-R34QYlSzcMwM8YhmYgj6S_bWynUtL6w,13001
39
40
  vectara_agentic/tool_utils.py,sha256=whnQlk9coeIt01sqUnKnzUorefgn96yWqhtRfHxNL84,25921
40
- vectara_agentic/tools.py,sha256=pb828u-tDps98N_R3U3_bCcnD9L3w5jdmhScduai74I,34852
41
+ vectara_agentic/tools.py,sha256=yF1y7jPR5jLQXvn-orjCYb0vy_o8WStU36WA8A3guEM,37554
41
42
  vectara_agentic/tools_catalog.py,sha256=p6eRram-diJyMz5dZI703auSAm97FfW5wLAMyz_2sB0,4634
42
43
  vectara_agentic/types.py,sha256=qKkK8vRNiLvEcMInMyOClK2bD7iFlrWGTkl3fGC6Xic,6117
43
44
  vectara_agentic/utils.py,sha256=R9HitEG5K3Q_p2M_teosT181OUxkhs1-hnj98qDYGbE,2545
@@ -51,8 +52,8 @@ vectara_agentic/agent_core/utils/hallucination.py,sha256=XmV7tW-MBN9BrzM79zu0T7z
51
52
  vectara_agentic/agent_core/utils/logging.py,sha256=-Ll8iUelml92WuhNWScuY6H-RheyZOTBHNxXQ1UGy0M,1701
52
53
  vectara_agentic/agent_core/utils/schemas.py,sha256=4sEyQ-_z-eZJzgxCJf62AuBgV7RN1Azc9mLPPlj6IWg,2769
53
54
  vectara_agentic/agent_core/utils/tools.py,sha256=k9Gm-UUQ3ZeGxrkjyrjmjcGxOkvnpylcm_Krnr-0fsY,4748
54
- vectara_agentic-0.4.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- vectara_agentic-0.4.3.dist-info/METADATA,sha256=D-HoLTva17z9CzsUuc4gm1GnKQ-s7EqmQZLf3fYe6Z8,38886
56
- vectara_agentic-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
57
- vectara_agentic-0.4.3.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
58
- vectara_agentic-0.4.3.dist-info/RECORD,,
55
+ vectara_agentic-0.4.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
56
+ vectara_agentic-0.4.4.dist-info/METADATA,sha256=6hF6sFKTcDT_8Yap4E5ouDXjN3kRtREtXfo3A0EU2Fs,38906
57
+ vectara_agentic-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
58
+ vectara_agentic-0.4.4.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
59
+ vectara_agentic-0.4.4.dist-info/RECORD,,