vectara-agentic 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +1 -0
- tests/benchmark_models.py +1120 -0
- tests/conftest.py +18 -16
- tests/endpoint.py +9 -5
- tests/run_tests.py +3 -0
- tests/test_agent.py +52 -8
- tests/test_agent_type.py +2 -0
- tests/test_api_endpoint.py +13 -13
- tests/test_bedrock.py +9 -1
- tests/test_fallback.py +19 -8
- tests/test_gemini.py +14 -40
- tests/test_groq.py +9 -1
- tests/test_private_llm.py +20 -7
- tests/test_react_error_handling.py +293 -0
- tests/test_react_memory.py +257 -0
- tests/test_react_streaming.py +135 -0
- tests/test_react_workflow_events.py +395 -0
- tests/test_return_direct.py +1 -0
- tests/test_serialization.py +58 -20
- tests/test_together.py +9 -1
- tests/test_tools.py +3 -1
- tests/test_vectara_llms.py +2 -2
- tests/test_vhc.py +7 -2
- tests/test_workflow.py +17 -11
- vectara_agentic/_callback.py +79 -21
- vectara_agentic/_observability.py +19 -0
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +89 -21
- vectara_agentic/agent_core/factory.py +5 -6
- vectara_agentic/agent_core/prompts.py +3 -4
- vectara_agentic/agent_core/serialization.py +12 -10
- vectara_agentic/agent_core/streaming.py +245 -68
- vectara_agentic/agent_core/utils/schemas.py +2 -2
- vectara_agentic/llm_utils.py +6 -2
- vectara_agentic/sub_query_workflow.py +3 -2
- vectara_agentic/tools.py +0 -19
- {vectara_agentic-0.4.1.dist-info → vectara_agentic-0.4.3.dist-info}/METADATA +156 -61
- vectara_agentic-0.4.3.dist-info/RECORD +58 -0
- vectara_agentic-0.4.1.dist-info/RECORD +0 -53
- {vectara_agentic-0.4.1.dist-info → vectara_agentic-0.4.3.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.1.dist-info → vectara_agentic-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.1.dist-info → vectara_agentic-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# Suppress external dependency warnings before any other imports
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
import asyncio
|
|
8
|
+
|
|
9
|
+
from vectara_agentic.agent import Agent
|
|
10
|
+
from vectara_agentic.tools import ToolsFactory
|
|
11
|
+
from vectara_agentic.agent_config import AgentConfig
|
|
12
|
+
from vectara_agentic.types import AgentType, ModelProvider
|
|
13
|
+
|
|
14
|
+
import nest_asyncio
|
|
15
|
+
|
|
16
|
+
nest_asyncio.apply()
|
|
17
|
+
|
|
18
|
+
from conftest import (
|
|
19
|
+
AgentTestMixin,
|
|
20
|
+
react_config_anthropic,
|
|
21
|
+
react_config_gemini,
|
|
22
|
+
react_config_together,
|
|
23
|
+
mult,
|
|
24
|
+
STANDARD_TEST_TOPIC,
|
|
25
|
+
STANDARD_TEST_INSTRUCTIONS,
|
|
26
|
+
is_rate_limited,
|
|
27
|
+
is_api_key_error,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestReActErrorHandling(unittest.TestCase, AgentTestMixin):
|
|
32
|
+
"""Test error handling and recovery for ReAct agents."""
|
|
33
|
+
|
|
34
|
+
def setUp(self):
|
|
35
|
+
self.tools = [ToolsFactory().create_tool(mult)]
|
|
36
|
+
self.topic = STANDARD_TEST_TOPIC
|
|
37
|
+
self.instructions = STANDARD_TEST_INSTRUCTIONS
|
|
38
|
+
|
|
39
|
+
def test_react_anthropic_rate_limit_handling(self):
|
|
40
|
+
"""Test ReAct agent handling of Anthropic rate limits."""
|
|
41
|
+
agent = Agent(
|
|
42
|
+
agent_config=react_config_anthropic,
|
|
43
|
+
tools=self.tools,
|
|
44
|
+
topic=self.topic,
|
|
45
|
+
custom_instructions=self.instructions,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
with self.with_provider_fallback("Anthropic"):
|
|
49
|
+
response = agent.chat("What is 5 times 10?")
|
|
50
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
51
|
+
|
|
52
|
+
# If we get a response, check it's valid
|
|
53
|
+
if response.response and not is_rate_limited(response.response):
|
|
54
|
+
self.assertIn("50", response.response)
|
|
55
|
+
|
|
56
|
+
def test_react_openai_error_handling(self):
|
|
57
|
+
"""Test ReAct agent handling of OpenAI-specific errors."""
|
|
58
|
+
openai_react_config = AgentConfig(
|
|
59
|
+
agent_type=AgentType.REACT,
|
|
60
|
+
main_llm_provider=ModelProvider.OPENAI,
|
|
61
|
+
tool_llm_provider=ModelProvider.OPENAI,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
agent = Agent(
|
|
65
|
+
agent_config=openai_react_config,
|
|
66
|
+
tools=self.tools,
|
|
67
|
+
topic=self.topic,
|
|
68
|
+
custom_instructions=self.instructions,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
with self.with_provider_fallback("OpenAI"):
|
|
72
|
+
response = agent.chat("Calculate 7 times 8.")
|
|
73
|
+
self.check_response_and_skip(response, "OpenAI")
|
|
74
|
+
|
|
75
|
+
# If we get a response, check it's valid
|
|
76
|
+
if response.response and not is_rate_limited(response.response):
|
|
77
|
+
self.assertIn("56", response.response)
|
|
78
|
+
|
|
79
|
+
def test_react_together_error_handling(self):
|
|
80
|
+
"""Test ReAct agent handling of Together.AI errors."""
|
|
81
|
+
agent = Agent(
|
|
82
|
+
agent_config=react_config_together,
|
|
83
|
+
tools=self.tools,
|
|
84
|
+
topic=self.topic,
|
|
85
|
+
custom_instructions=self.instructions,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
with self.with_provider_fallback("Together AI"):
|
|
89
|
+
response = agent.chat("Calculate 4 times 15.")
|
|
90
|
+
self.check_response_and_skip(response, "Together AI")
|
|
91
|
+
|
|
92
|
+
# If we get a response, check it's valid
|
|
93
|
+
if response.response and not is_rate_limited(response.response):
|
|
94
|
+
self.assertIn("60", response.response)
|
|
95
|
+
|
|
96
|
+
def test_react_gemini_error_handling(self):
|
|
97
|
+
"""Test ReAct agent handling of Gemini-specific errors."""
|
|
98
|
+
agent = Agent(
|
|
99
|
+
agent_config=react_config_gemini,
|
|
100
|
+
tools=self.tools,
|
|
101
|
+
topic=self.topic,
|
|
102
|
+
custom_instructions=self.instructions,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
with self.with_provider_fallback("Gemini"):
|
|
106
|
+
response = agent.chat("What is 12 times 4?")
|
|
107
|
+
self.check_response_and_skip(response, "Gemini")
|
|
108
|
+
|
|
109
|
+
# If we get a response, check it's valid
|
|
110
|
+
if response.response and not is_rate_limited(response.response):
|
|
111
|
+
self.assertIn("48", response.response)
|
|
112
|
+
|
|
113
|
+
def test_react_async_error_handling(self):
|
|
114
|
+
"""Test ReAct agent error handling during async operations."""
|
|
115
|
+
async def _async_test():
|
|
116
|
+
agent = Agent(
|
|
117
|
+
agent_config=react_config_anthropic,
|
|
118
|
+
tools=self.tools,
|
|
119
|
+
topic=self.topic,
|
|
120
|
+
custom_instructions=self.instructions,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
with self.with_provider_fallback("Anthropic"):
|
|
124
|
+
# Test async chat error handling
|
|
125
|
+
response = await agent.achat("Calculate 11 times 3.")
|
|
126
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
127
|
+
|
|
128
|
+
if response.response and not is_rate_limited(response.response):
|
|
129
|
+
self.assertIn("33", response.response)
|
|
130
|
+
|
|
131
|
+
asyncio.run(_async_test())
|
|
132
|
+
|
|
133
|
+
def test_react_streaming_error_handling(self):
|
|
134
|
+
"""Test ReAct agent error handling during streaming operations."""
|
|
135
|
+
async def _async_test():
|
|
136
|
+
agent = Agent(
|
|
137
|
+
agent_config=react_config_anthropic,
|
|
138
|
+
tools=self.tools,
|
|
139
|
+
topic=self.topic,
|
|
140
|
+
custom_instructions=self.instructions,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
with self.with_provider_fallback("Anthropic"):
|
|
144
|
+
try:
|
|
145
|
+
stream = await agent.astream_chat("Calculate 13 times 2.")
|
|
146
|
+
|
|
147
|
+
# Consume the stream
|
|
148
|
+
chunks = []
|
|
149
|
+
async for chunk in stream.async_response_gen():
|
|
150
|
+
chunks.append(str(chunk))
|
|
151
|
+
|
|
152
|
+
response = await stream.aget_response()
|
|
153
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
154
|
+
|
|
155
|
+
if response.response and not is_rate_limited(response.response):
|
|
156
|
+
self.assertIn("26", response.response)
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
error_msg = str(e)
|
|
160
|
+
if is_rate_limited(error_msg) or is_api_key_error(error_msg):
|
|
161
|
+
self.skipTest(f"Anthropic streaming error: {error_msg}")
|
|
162
|
+
raise
|
|
163
|
+
|
|
164
|
+
asyncio.run(_async_test())
|
|
165
|
+
|
|
166
|
+
def test_react_tool_execution_error_handling(self):
|
|
167
|
+
"""Test ReAct agent handling of tool execution errors."""
|
|
168
|
+
|
|
169
|
+
def error_tool(x: float) -> float:
|
|
170
|
+
"""A tool that intentionally raises an error."""
|
|
171
|
+
if x < 0:
|
|
172
|
+
raise ValueError("Cannot process negative numbers")
|
|
173
|
+
return x * 2
|
|
174
|
+
|
|
175
|
+
error_tools = [ToolsFactory().create_tool(error_tool)]
|
|
176
|
+
|
|
177
|
+
agent = Agent(
|
|
178
|
+
agent_config=react_config_anthropic,
|
|
179
|
+
tools=error_tools,
|
|
180
|
+
topic=self.topic,
|
|
181
|
+
custom_instructions=self.instructions,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
with self.with_provider_fallback("Anthropic"):
|
|
185
|
+
# Test with valid input first
|
|
186
|
+
response1 = agent.chat("Use the error_tool with input 5.")
|
|
187
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
188
|
+
|
|
189
|
+
if not is_rate_limited(response1.response):
|
|
190
|
+
self.assertIn("10", response1.response) # 5 * 2 = 10
|
|
191
|
+
|
|
192
|
+
# Test with invalid input that should cause tool error
|
|
193
|
+
response2 = agent.chat("Use the error_tool with input -1.")
|
|
194
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
195
|
+
|
|
196
|
+
if not is_rate_limited(response2.response):
|
|
197
|
+
# ReAct agent should handle the tool error gracefully
|
|
198
|
+
# and provide some kind of error message or explanation
|
|
199
|
+
self.assertTrue(len(response2.response) > 0)
|
|
200
|
+
# Should mention error or inability to process
|
|
201
|
+
error_indicators = ["error", "cannot", "unable", "negative", "problem"]
|
|
202
|
+
has_error_indication = any(
|
|
203
|
+
indicator in response2.response.lower()
|
|
204
|
+
for indicator in error_indicators
|
|
205
|
+
)
|
|
206
|
+
self.assertTrue(
|
|
207
|
+
has_error_indication,
|
|
208
|
+
f"Response should indicate error handling: {response2.response}",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def test_react_workflow_interruption_handling(self):
|
|
212
|
+
"""Test ReAct agent handling of workflow interruptions."""
|
|
213
|
+
agent = Agent(
|
|
214
|
+
agent_config=react_config_anthropic,
|
|
215
|
+
tools=self.tools,
|
|
216
|
+
topic=self.topic,
|
|
217
|
+
custom_instructions=self.instructions,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
with self.with_provider_fallback("Anthropic"):
|
|
221
|
+
# Test a complex multi-step task that might be interrupted
|
|
222
|
+
response = agent.chat(
|
|
223
|
+
"Calculate 3 times 7, then multiply that by 4, then add 10 to the result."
|
|
224
|
+
)
|
|
225
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
226
|
+
|
|
227
|
+
if response.response and not is_rate_limited(response.response):
|
|
228
|
+
# Final result should be: (3*7)*4+10 = 21*4+10 = 84+10 = 94
|
|
229
|
+
self.assertIn("94", response.response)
|
|
230
|
+
|
|
231
|
+
def test_react_memory_corruption_recovery(self):
|
|
232
|
+
"""Test ReAct agent recovery from memory-related errors."""
|
|
233
|
+
agent = Agent(
|
|
234
|
+
agent_config=react_config_anthropic,
|
|
235
|
+
tools=self.tools,
|
|
236
|
+
topic=self.topic,
|
|
237
|
+
custom_instructions=self.instructions,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
with self.with_provider_fallback("Anthropic"):
|
|
241
|
+
# Normal operation
|
|
242
|
+
response1 = agent.chat("Calculate 6 times 7.")
|
|
243
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
244
|
+
|
|
245
|
+
if not is_rate_limited(response1.response):
|
|
246
|
+
self.assertIn("42", response1.response)
|
|
247
|
+
|
|
248
|
+
# Continue conversation to test memory consistency
|
|
249
|
+
response2 = agent.chat(
|
|
250
|
+
"What was the result of the previous calculation?"
|
|
251
|
+
)
|
|
252
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
253
|
+
|
|
254
|
+
if not is_rate_limited(response2.response):
|
|
255
|
+
# Should remember the previous result
|
|
256
|
+
self.assertIn("42", response2.response)
|
|
257
|
+
|
|
258
|
+
def test_react_fallback_behavior_on_provider_failure(self):
|
|
259
|
+
"""Test ReAct agent behavior when provider fails completely."""
|
|
260
|
+
|
|
261
|
+
# Create a config with an invalid API configuration to simulate failure
|
|
262
|
+
invalid_config = AgentConfig(
|
|
263
|
+
agent_type=AgentType.REACT,
|
|
264
|
+
main_llm_provider=ModelProvider.ANTHROPIC,
|
|
265
|
+
tool_llm_provider=ModelProvider.ANTHROPIC,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
agent = Agent(
|
|
269
|
+
agent_config=invalid_config,
|
|
270
|
+
tools=self.tools,
|
|
271
|
+
topic=self.topic,
|
|
272
|
+
custom_instructions=self.instructions,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
with self.with_provider_fallback("Anthropic"):
|
|
276
|
+
try:
|
|
277
|
+
response = agent.chat("What is 2 times 3?")
|
|
278
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
279
|
+
|
|
280
|
+
# If we get here without error, the test passed
|
|
281
|
+
if response.response and not is_rate_limited(response.response):
|
|
282
|
+
self.assertIn("6", response.response)
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
error_msg = str(e)
|
|
286
|
+
if is_rate_limited(error_msg) or is_api_key_error(error_msg):
|
|
287
|
+
self.skipTest(f"Anthropic provider failure: {error_msg}")
|
|
288
|
+
# For other exceptions, let them bubble up as actual test failures
|
|
289
|
+
raise
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
if __name__ == "__main__":
|
|
293
|
+
unittest.main()
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# Suppress external dependency warnings before any other imports
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
from vectara_agentic.agent import Agent
|
|
10
|
+
from vectara_agentic.tools import ToolsFactory
|
|
11
|
+
from llama_index.core.llms import MessageRole
|
|
12
|
+
|
|
13
|
+
import nest_asyncio
|
|
14
|
+
|
|
15
|
+
nest_asyncio.apply()
|
|
16
|
+
|
|
17
|
+
from conftest import (
|
|
18
|
+
AgentTestMixin,
|
|
19
|
+
react_config_anthropic,
|
|
20
|
+
react_config_gemini,
|
|
21
|
+
mult,
|
|
22
|
+
add,
|
|
23
|
+
STANDARD_TEST_TOPIC,
|
|
24
|
+
STANDARD_TEST_INSTRUCTIONS,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
ARIZE_LOCK = threading.Lock()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestReActMemory(unittest.TestCase, AgentTestMixin):
|
|
31
|
+
"""Test memory persistence and conversation history for ReAct agents."""
|
|
32
|
+
|
|
33
|
+
def setUp(self):
|
|
34
|
+
self.tools = [ToolsFactory().create_tool(mult), ToolsFactory().create_tool(add)]
|
|
35
|
+
self.topic = STANDARD_TEST_TOPIC
|
|
36
|
+
self.instructions = STANDARD_TEST_INSTRUCTIONS
|
|
37
|
+
self.session_id = "test-react-memory-123"
|
|
38
|
+
|
|
39
|
+
def test_react_memory_persistence_across_chats(self):
|
|
40
|
+
"""Test that ReAct agents maintain conversation context across multiple chat calls."""
|
|
41
|
+
agent = Agent(
|
|
42
|
+
agent_config=react_config_anthropic,
|
|
43
|
+
tools=self.tools,
|
|
44
|
+
topic=self.topic,
|
|
45
|
+
custom_instructions=self.instructions,
|
|
46
|
+
session_id=self.session_id,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
with self.with_provider_fallback("Anthropic"):
|
|
50
|
+
# First interaction - establish context
|
|
51
|
+
response1 = agent.chat("Calculate 5 times 10 and remember this result.")
|
|
52
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
53
|
+
self.assertIn("50", response1.response)
|
|
54
|
+
|
|
55
|
+
# Second interaction - reference previous result
|
|
56
|
+
response2 = agent.chat(
|
|
57
|
+
"What was the result I asked you to calculate and remember in the previous message?"
|
|
58
|
+
)
|
|
59
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
60
|
+
self.assertIn("50", response2.response)
|
|
61
|
+
|
|
62
|
+
# Third interaction - use previous result in new calculation
|
|
63
|
+
response3 = agent.chat("Add 25 to the number you calculated earlier.")
|
|
64
|
+
self.check_response_and_skip(response3, "Anthropic")
|
|
65
|
+
# Should be 50 + 25 = 75
|
|
66
|
+
self.assertIn("75", response3.response)
|
|
67
|
+
|
|
68
|
+
def test_react_memory_with_tool_history(self):
|
|
69
|
+
"""Test that ReAct agents remember tool usage history across conversations."""
|
|
70
|
+
agent = Agent(
|
|
71
|
+
agent_config=react_config_anthropic,
|
|
72
|
+
tools=self.tools,
|
|
73
|
+
topic=self.topic,
|
|
74
|
+
custom_instructions=self.instructions,
|
|
75
|
+
session_id=self.session_id + "_tools",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
with self.with_provider_fallback("Anthropic"):
|
|
79
|
+
# Use multiplication tool
|
|
80
|
+
response1 = agent.chat("Multiply 7 by 8.")
|
|
81
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
82
|
+
self.assertIn("56", response1.response)
|
|
83
|
+
|
|
84
|
+
# Use addition tool
|
|
85
|
+
response2 = agent.chat("Add 20 to 30.")
|
|
86
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
87
|
+
self.assertIn("50", response2.response)
|
|
88
|
+
|
|
89
|
+
# Reference both previous tool uses
|
|
90
|
+
response3 = agent.chat(
|
|
91
|
+
"What were the two calculations I asked you to perform? "
|
|
92
|
+
"Add those two results together."
|
|
93
|
+
)
|
|
94
|
+
self.check_response_and_skip(response3, "Anthropic")
|
|
95
|
+
# Should remember 7*8=56 and 20+30=50, then 56+50=106
|
|
96
|
+
self.assertIn("106", response3.response)
|
|
97
|
+
|
|
98
|
+
def test_react_memory_state_consistency(self):
|
|
99
|
+
"""Test that ReAct agent memory state remains consistent during workflow execution."""
|
|
100
|
+
agent = Agent(
|
|
101
|
+
agent_config=react_config_anthropic,
|
|
102
|
+
tools=self.tools,
|
|
103
|
+
topic=self.topic,
|
|
104
|
+
custom_instructions=self.instructions,
|
|
105
|
+
session_id=self.session_id + "_consistency",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
with self.with_provider_fallback("Anthropic"):
|
|
109
|
+
# Check initial memory state
|
|
110
|
+
initial_memory_size = len(agent.memory.get_all())
|
|
111
|
+
|
|
112
|
+
# Perform a conversation with tool use
|
|
113
|
+
response1 = agent.chat("Calculate 6 times 9 and tell me the result.")
|
|
114
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
115
|
+
|
|
116
|
+
# Memory should now contain user message, tool calls, and assistant response
|
|
117
|
+
after_first_memory = agent.memory.get_all()
|
|
118
|
+
self.assertGreater(
|
|
119
|
+
len(after_first_memory),
|
|
120
|
+
initial_memory_size,
|
|
121
|
+
"Memory should contain new messages after interaction",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Continue conversation
|
|
125
|
+
response2 = agent.chat("Double that result.")
|
|
126
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
127
|
+
|
|
128
|
+
# Memory should contain all previous messages plus new ones
|
|
129
|
+
after_second_memory = agent.memory.get_all()
|
|
130
|
+
self.assertGreater(
|
|
131
|
+
len(after_second_memory),
|
|
132
|
+
len(after_first_memory),
|
|
133
|
+
"Memory should accumulate messages across interactions",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Final result should be (6*9)*2 = 54*2 = 108
|
|
137
|
+
self.assertIn("108", response2.response)
|
|
138
|
+
|
|
139
|
+
def test_react_memory_across_different_providers(self):
|
|
140
|
+
"""Test memory consistency when using different ReAct providers."""
|
|
141
|
+
# Test with Anthropic
|
|
142
|
+
agent_anthropic = Agent(
|
|
143
|
+
agent_config=react_config_anthropic,
|
|
144
|
+
tools=self.tools,
|
|
145
|
+
topic=self.topic,
|
|
146
|
+
custom_instructions=self.instructions,
|
|
147
|
+
session_id=self.session_id + "_anthropic_provider",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
with self.with_provider_fallback("Anthropic"):
|
|
151
|
+
response1 = agent_anthropic.chat("Multiply 4 by 12.")
|
|
152
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
153
|
+
self.assertIn("48", response1.response)
|
|
154
|
+
|
|
155
|
+
# Verify memory structure is consistent
|
|
156
|
+
anthropic_memory = agent_anthropic.memory.get_all()
|
|
157
|
+
self.assertGreater(len(anthropic_memory), 0)
|
|
158
|
+
|
|
159
|
+
# Test with Gemini (if available)
|
|
160
|
+
agent_gemini = Agent(
|
|
161
|
+
agent_config=react_config_gemini,
|
|
162
|
+
tools=self.tools,
|
|
163
|
+
topic=self.topic,
|
|
164
|
+
custom_instructions=self.instructions,
|
|
165
|
+
session_id=self.session_id + "_gemini_provider",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
with self.with_provider_fallback("Gemini"):
|
|
169
|
+
response2 = agent_gemini.chat("Multiply 4 by 12.")
|
|
170
|
+
self.check_response_and_skip(response2, "Gemini")
|
|
171
|
+
self.assertIn("48", response2.response)
|
|
172
|
+
|
|
173
|
+
# Verify memory structure is consistent across providers
|
|
174
|
+
gemini_memory = agent_gemini.memory.get_all()
|
|
175
|
+
self.assertGreater(len(gemini_memory), 0)
|
|
176
|
+
|
|
177
|
+
def test_react_memory_serialization_compatibility(self):
|
|
178
|
+
"""Test that ReAct agent memory can be properly serialized and deserialized."""
|
|
179
|
+
agent = Agent(
|
|
180
|
+
agent_config=react_config_anthropic,
|
|
181
|
+
tools=self.tools,
|
|
182
|
+
topic=self.topic,
|
|
183
|
+
custom_instructions=self.instructions,
|
|
184
|
+
session_id=self.session_id + "_serialization",
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
with self.with_provider_fallback("Anthropic"):
|
|
188
|
+
# Perform some interactions to populate memory
|
|
189
|
+
response1 = agent.chat("Calculate 15 times 3.")
|
|
190
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
191
|
+
|
|
192
|
+
response2 = agent.chat("Add 10 to the previous result.")
|
|
193
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
194
|
+
|
|
195
|
+
# Get memory state before serialization
|
|
196
|
+
original_memory = agent.memory.get_all()
|
|
197
|
+
original_memory_size = len(original_memory)
|
|
198
|
+
|
|
199
|
+
# Test that memory state can be accessed and contains expected content
|
|
200
|
+
self.assertGreater(original_memory_size, 0)
|
|
201
|
+
|
|
202
|
+
# Verify memory contains both user and assistant messages
|
|
203
|
+
user_messages = [
|
|
204
|
+
msg for msg in original_memory if msg.role == MessageRole.USER
|
|
205
|
+
]
|
|
206
|
+
assistant_messages = [
|
|
207
|
+
msg for msg in original_memory if msg.role == MessageRole.ASSISTANT
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
self.assertGreater(
|
|
211
|
+
len(user_messages), 0, "Should have user messages in memory"
|
|
212
|
+
)
|
|
213
|
+
self.assertGreater(
|
|
214
|
+
len(assistant_messages), 0, "Should have assistant messages in memory"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
async def test_react_memory_async_streaming_consistency(self):
|
|
218
|
+
"""Test memory consistency during async streaming operations with ReAct agents."""
|
|
219
|
+
agent = Agent(
|
|
220
|
+
agent_config=react_config_anthropic,
|
|
221
|
+
tools=self.tools,
|
|
222
|
+
topic=self.topic,
|
|
223
|
+
custom_instructions=self.instructions,
|
|
224
|
+
session_id=self.session_id + "_async_streaming",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
with self.with_provider_fallback("Anthropic"):
|
|
228
|
+
# First streaming interaction
|
|
229
|
+
stream1 = await agent.astream_chat("Calculate 8 times 7.")
|
|
230
|
+
async for chunk in stream1.async_response_gen():
|
|
231
|
+
pass
|
|
232
|
+
response1 = await stream1.aget_response()
|
|
233
|
+
self.check_response_and_skip(response1, "Anthropic")
|
|
234
|
+
|
|
235
|
+
# Check memory after first streaming interaction
|
|
236
|
+
memory_after_first = agent.memory.get_all()
|
|
237
|
+
self.assertGreater(len(memory_after_first), 0)
|
|
238
|
+
|
|
239
|
+
# Second streaming interaction that references the first
|
|
240
|
+
stream2 = await agent.astream_chat(
|
|
241
|
+
"Subtract 6 from the result you just calculated."
|
|
242
|
+
)
|
|
243
|
+
async for chunk in stream2.async_response_gen():
|
|
244
|
+
pass
|
|
245
|
+
response2 = await stream2.aget_response()
|
|
246
|
+
self.check_response_and_skip(response2, "Anthropic")
|
|
247
|
+
|
|
248
|
+
# Memory should contain both interactions
|
|
249
|
+
memory_after_second = agent.memory.get_all()
|
|
250
|
+
self.assertGreater(len(memory_after_second), len(memory_after_first))
|
|
251
|
+
|
|
252
|
+
# Final result should be (8*7)-6 = 56-6 = 50
|
|
253
|
+
self.assertIn("50", response2.response)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
if __name__ == "__main__":
|
|
257
|
+
unittest.main()
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Suppress external dependency warnings before any other imports
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
|
|
8
|
+
from vectara_agentic.agent import Agent
|
|
9
|
+
from vectara_agentic.tools import ToolsFactory
|
|
10
|
+
|
|
11
|
+
import nest_asyncio
|
|
12
|
+
|
|
13
|
+
nest_asyncio.apply()
|
|
14
|
+
|
|
15
|
+
from conftest import (
|
|
16
|
+
AgentTestMixin,
|
|
17
|
+
react_config_openai,
|
|
18
|
+
react_config_anthropic,
|
|
19
|
+
react_config_gemini,
|
|
20
|
+
react_config_together,
|
|
21
|
+
mult,
|
|
22
|
+
STANDARD_TEST_TOPIC,
|
|
23
|
+
STANDARD_TEST_INSTRUCTIONS,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestReActStreaming(unittest.IsolatedAsyncioTestCase, AgentTestMixin):
|
|
28
|
+
"""Test streaming functionality for ReAct agents across all providers."""
|
|
29
|
+
|
|
30
|
+
def setUp(self):
|
|
31
|
+
self.tools = [ToolsFactory().create_tool(mult)]
|
|
32
|
+
self.topic = STANDARD_TEST_TOPIC
|
|
33
|
+
self.instructions = STANDARD_TEST_INSTRUCTIONS
|
|
34
|
+
|
|
35
|
+
async def _test_react_streaming_workflow(self, config, provider_name):
|
|
36
|
+
"""Common workflow for testing ReAct streaming with any provider."""
|
|
37
|
+
agent = Agent(
|
|
38
|
+
agent_config=config,
|
|
39
|
+
tools=self.tools,
|
|
40
|
+
topic=self.topic,
|
|
41
|
+
custom_instructions=self.instructions,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
with self.with_provider_fallback(provider_name):
|
|
45
|
+
# First calculation: 5 * 10 = 50
|
|
46
|
+
stream1 = await agent.astream_chat(
|
|
47
|
+
"What is 5 times 10. Only give the answer, nothing else"
|
|
48
|
+
)
|
|
49
|
+
# Consume the stream
|
|
50
|
+
async for chunk in stream1.async_response_gen():
|
|
51
|
+
pass
|
|
52
|
+
response1 = await stream1.aget_response()
|
|
53
|
+
self.check_response_and_skip(response1, provider_name)
|
|
54
|
+
|
|
55
|
+
# Second calculation: 3 * 7 = 21
|
|
56
|
+
stream2 = await agent.astream_chat(
|
|
57
|
+
"what is 3 times 7. Only give the answer, nothing else"
|
|
58
|
+
)
|
|
59
|
+
# Consume the stream
|
|
60
|
+
async for chunk in stream2.async_response_gen():
|
|
61
|
+
pass
|
|
62
|
+
response2 = await stream2.aget_response()
|
|
63
|
+
self.check_response_and_skip(response2, provider_name)
|
|
64
|
+
|
|
65
|
+
# Final calculation: 50 * 21 = 1050
|
|
66
|
+
stream3 = await agent.astream_chat(
|
|
67
|
+
"multiply the results of the last two multiplications. Only give the answer, nothing else."
|
|
68
|
+
)
|
|
69
|
+
# Consume the stream and collect chunks for verification
|
|
70
|
+
chunks = []
|
|
71
|
+
async for chunk in stream3.async_response_gen():
|
|
72
|
+
chunks.append(chunk)
|
|
73
|
+
|
|
74
|
+
response3 = await stream3.aget_response()
|
|
75
|
+
self.check_response_and_skip(response3, provider_name)
|
|
76
|
+
|
|
77
|
+
# Verify the final result
|
|
78
|
+
self.assertIn("1050", response3.response)
|
|
79
|
+
|
|
80
|
+
# Verify we actually got streaming chunks
|
|
81
|
+
self.assertGreater(
|
|
82
|
+
len(chunks), 0, f"{provider_name} should produce streaming chunks"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
async def test_anthropic_react_streaming(self):
|
|
86
|
+
"""Test ReAct agent streaming with Anthropic."""
|
|
87
|
+
await self._test_react_streaming_workflow(react_config_anthropic, "Anthropic")
|
|
88
|
+
|
|
89
|
+
async def test_openai_react_streaming(self):
|
|
90
|
+
"""Test ReAct agent streaming with OpenAI."""
|
|
91
|
+
await self._test_react_streaming_workflow(react_config_openai, "OpenAI")
|
|
92
|
+
|
|
93
|
+
async def test_gemini_react_streaming(self):
|
|
94
|
+
"""Test ReAct agent streaming with Gemini."""
|
|
95
|
+
await self._test_react_streaming_workflow(react_config_gemini, "Gemini")
|
|
96
|
+
|
|
97
|
+
async def test_together_react_streaming(self):
|
|
98
|
+
"""Test ReAct agent streaming with Together.AI."""
|
|
99
|
+
await self._test_react_streaming_workflow(react_config_together, "Together AI")
|
|
100
|
+
|
|
101
|
+
async def test_react_streaming_reasoning_pattern(self):
|
|
102
|
+
"""Test that ReAct agents demonstrate reasoning patterns in streaming responses."""
|
|
103
|
+
agent = Agent(
|
|
104
|
+
agent_config=react_config_anthropic,
|
|
105
|
+
tools=self.tools,
|
|
106
|
+
topic=self.topic,
|
|
107
|
+
custom_instructions="Think step by step and show your reasoning before using tools.",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
with self.with_provider_fallback("Anthropic"):
|
|
111
|
+
# Ask a question that requires multi-step reasoning
|
|
112
|
+
stream = await agent.astream_chat(
|
|
113
|
+
"I need to calculate 7 times 8, then add 12 to that result, then multiply by 2. "
|
|
114
|
+
"Show me your reasoning process."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
chunks = []
|
|
118
|
+
async for chunk in stream.async_response_gen():
|
|
119
|
+
chunks.append(str(chunk))
|
|
120
|
+
|
|
121
|
+
response = await stream.aget_response()
|
|
122
|
+
self.check_response_and_skip(response, "Anthropic")
|
|
123
|
+
|
|
124
|
+
# Verify we got streaming content
|
|
125
|
+
self.assertGreater(len(chunks), 0)
|
|
126
|
+
|
|
127
|
+
# For ReAct agents, we should see reasoning patterns in the response
|
|
128
|
+
full_content = "".join(chunks).lower()
|
|
129
|
+
|
|
130
|
+
# The final answer should be correct: (7*8 + 12) * 2 = (56 + 12) * 2 = 68 * 2 = 136
|
|
131
|
+
self.assertTrue("136" in response.response or "136" in full_content)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
unittest.main()
|