vectara-agentic 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/conftest.py +5 -1
- tests/run_tests.py +1 -0
- tests/test_agent.py +26 -29
- tests/test_agent_fallback_memory.py +270 -0
- tests/test_agent_memory_consistency.py +229 -0
- tests/test_agent_type.py +4 -0
- tests/test_bedrock.py +46 -31
- tests/test_gemini.py +7 -22
- tests/test_groq.py +46 -31
- tests/test_serialization.py +3 -6
- tests/test_session_memory.py +252 -0
- tests/test_streaming.py +58 -37
- tests/test_together.py +62 -0
- tests/test_vhc.py +3 -2
- tests/test_workflow.py +9 -28
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +212 -33
- vectara_agentic/agent_core/factory.py +30 -148
- vectara_agentic/agent_core/prompts.py +20 -13
- vectara_agentic/agent_core/serialization.py +3 -0
- vectara_agentic/agent_core/streaming.py +22 -34
- vectara_agentic/agent_core/utils/__init__.py +0 -5
- vectara_agentic/agent_core/utils/hallucination.py +54 -99
- vectara_agentic/llm_utils.py +1 -1
- vectara_agentic/types.py +9 -3
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.1.dist-info}/METADATA +49 -8
- vectara_agentic-0.4.1.dist-info/RECORD +53 -0
- vectara_agentic/agent_core/utils/prompt_formatting.py +0 -56
- vectara_agentic-0.4.0.dist-info/RECORD +0 -50
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.1.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.1.dist-info}/top_level.txt +0 -0
tests/test_together.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Suppress external dependency warnings before any other imports
|
|
2
|
+
import warnings
|
|
3
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
import threading
|
|
7
|
+
|
|
8
|
+
from vectara_agentic.agent import Agent
|
|
9
|
+
from vectara_agentic.tools import ToolsFactory
|
|
10
|
+
|
|
11
|
+
import nest_asyncio
|
|
12
|
+
nest_asyncio.apply()
|
|
13
|
+
|
|
14
|
+
from conftest import fc_config_together, mult, STANDARD_TEST_TOPIC, STANDARD_TEST_INSTRUCTIONS
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
ARIZE_LOCK = threading.Lock()
|
|
18
|
+
|
|
19
|
+
class TestTogether(unittest.IsolatedAsyncioTestCase):
|
|
20
|
+
|
|
21
|
+
async def test_multiturn(self):
|
|
22
|
+
with ARIZE_LOCK:
|
|
23
|
+
tools = [ToolsFactory().create_tool(mult)]
|
|
24
|
+
agent = Agent(
|
|
25
|
+
agent_config=fc_config_together,
|
|
26
|
+
tools=tools,
|
|
27
|
+
topic=STANDARD_TEST_TOPIC,
|
|
28
|
+
custom_instructions=STANDARD_TEST_INSTRUCTIONS,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# First calculation: 5 * 10 = 50
|
|
32
|
+
stream1 = await agent.astream_chat(
|
|
33
|
+
"What is 5 times 10. Only give the answer, nothing else"
|
|
34
|
+
)
|
|
35
|
+
# Consume the stream
|
|
36
|
+
async for chunk in stream1.async_response_gen():
|
|
37
|
+
pass
|
|
38
|
+
_ = await stream1.aget_response()
|
|
39
|
+
|
|
40
|
+
# Second calculation: 3 * 7 = 21
|
|
41
|
+
stream2 = await agent.astream_chat(
|
|
42
|
+
"what is 3 times 7. Only give the answer, nothing else"
|
|
43
|
+
)
|
|
44
|
+
# Consume the stream
|
|
45
|
+
async for chunk in stream2.async_response_gen():
|
|
46
|
+
pass
|
|
47
|
+
_ = await stream2.aget_response()
|
|
48
|
+
|
|
49
|
+
# Final calculation: 50 * 21 = 1050
|
|
50
|
+
stream3 = await agent.astream_chat(
|
|
51
|
+
"multiply the results of the last two questions. Output only the answer."
|
|
52
|
+
)
|
|
53
|
+
# Consume the stream
|
|
54
|
+
async for chunk in stream3.async_response_gen():
|
|
55
|
+
pass
|
|
56
|
+
response3 = await stream3.aget_response()
|
|
57
|
+
|
|
58
|
+
self.assertEqual(response3.response, "1050")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
unittest.main()
|
tests/test_vhc.py
CHANGED
|
@@ -54,8 +54,9 @@ class TestVHC(unittest.TestCase):
|
|
|
54
54
|
vectara_api_key=vectara_api_key,
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
_ = agent.chat("Are large cats better than small dogs?")
|
|
58
|
+
vhc_res = agent.compute_vhc()
|
|
59
|
+
vhc_corrections = vhc_res.get("corrections", [])
|
|
59
60
|
self.assertTrue(
|
|
60
61
|
len(vhc_corrections) >= 0 and len(vhc_corrections) <= 2,
|
|
61
62
|
"Corrections should be between 0 and 2"
|
tests/test_workflow.py
CHANGED
|
@@ -8,29 +8,16 @@ from vectara_agentic.agent import Agent
|
|
|
8
8
|
from vectara_agentic.agent_config import AgentConfig
|
|
9
9
|
from vectara_agentic.tools import ToolsFactory
|
|
10
10
|
from vectara_agentic.sub_query_workflow import SubQuestionQueryWorkflow, SequentialSubQuestionsWorkflow
|
|
11
|
-
|
|
12
|
-
def mult(x: float, y: float):
|
|
13
|
-
"""
|
|
14
|
-
Multiply two numbers.
|
|
15
|
-
"""
|
|
16
|
-
return x * y
|
|
17
|
-
|
|
18
|
-
def add(x: float, y: float):
|
|
19
|
-
"""
|
|
20
|
-
Add two numbers.
|
|
21
|
-
"""
|
|
22
|
-
return x + y
|
|
11
|
+
from conftest import mult, add, STANDARD_TEST_TOPIC, WORKFLOW_TEST_INSTRUCTIONS
|
|
23
12
|
|
|
24
13
|
class TestWorkflowPackage(unittest.IsolatedAsyncioTestCase):
|
|
25
14
|
|
|
26
15
|
async def test_sub_query_workflow(self):
|
|
27
16
|
tools = [ToolsFactory().create_tool(mult)] + [ToolsFactory().create_tool(add)]
|
|
28
|
-
topic = "AI topic"
|
|
29
|
-
instructions = "You are a helpful AI assistant."
|
|
30
17
|
agent = Agent(
|
|
31
18
|
tools=tools,
|
|
32
|
-
topic=
|
|
33
|
-
custom_instructions=
|
|
19
|
+
topic=STANDARD_TEST_TOPIC,
|
|
20
|
+
custom_instructions=WORKFLOW_TEST_INSTRUCTIONS,
|
|
34
21
|
agent_config = AgentConfig(),
|
|
35
22
|
workflow_cls = SubQuestionQueryWorkflow,
|
|
36
23
|
)
|
|
@@ -50,12 +37,10 @@ class TestWorkflowPackage(unittest.IsolatedAsyncioTestCase):
|
|
|
50
37
|
|
|
51
38
|
async def test_seq_sub_query_workflow(self):
|
|
52
39
|
tools = [ToolsFactory().create_tool(mult)] + [ToolsFactory().create_tool(add)]
|
|
53
|
-
topic = "AI topic"
|
|
54
|
-
instructions = "You are a helpful AI assistant."
|
|
55
40
|
agent = Agent(
|
|
56
41
|
tools=tools,
|
|
57
|
-
topic=
|
|
58
|
-
custom_instructions=
|
|
42
|
+
topic=STANDARD_TEST_TOPIC,
|
|
43
|
+
custom_instructions=WORKFLOW_TEST_INSTRUCTIONS,
|
|
59
44
|
agent_config = AgentConfig(),
|
|
60
45
|
workflow_cls = SequentialSubQuestionsWorkflow,
|
|
61
46
|
)
|
|
@@ -70,12 +55,10 @@ class TestWorkflowFailure(unittest.IsolatedAsyncioTestCase):
|
|
|
70
55
|
|
|
71
56
|
async def test_workflow_failure_sub_question(self):
|
|
72
57
|
tools = [ToolsFactory().create_tool(mult)] + [ToolsFactory().create_tool(add)]
|
|
73
|
-
topic = "AI topic"
|
|
74
|
-
instructions = "You are a helpful AI assistant."
|
|
75
58
|
agent = Agent(
|
|
76
59
|
tools=tools,
|
|
77
|
-
topic=
|
|
78
|
-
custom_instructions=
|
|
60
|
+
topic=STANDARD_TEST_TOPIC,
|
|
61
|
+
custom_instructions=WORKFLOW_TEST_INSTRUCTIONS,
|
|
79
62
|
agent_config = AgentConfig(),
|
|
80
63
|
workflow_cls = SubQuestionQueryWorkflow,
|
|
81
64
|
workflow_timeout = 1
|
|
@@ -89,12 +72,10 @@ class TestWorkflowFailure(unittest.IsolatedAsyncioTestCase):
|
|
|
89
72
|
|
|
90
73
|
async def test_workflow_failure_seq_sub_question(self):
|
|
91
74
|
tools = [ToolsFactory().create_tool(mult)] + [ToolsFactory().create_tool(add)]
|
|
92
|
-
topic = "AI topic"
|
|
93
|
-
instructions = "You are a helpful AI assistant."
|
|
94
75
|
agent = Agent(
|
|
95
76
|
tools=tools,
|
|
96
|
-
topic=
|
|
97
|
-
custom_instructions=
|
|
77
|
+
topic=STANDARD_TEST_TOPIC,
|
|
78
|
+
custom_instructions=WORKFLOW_TEST_INSTRUCTIONS,
|
|
98
79
|
agent_config = AgentConfig(),
|
|
99
80
|
workflow_cls = SequentialSubQuestionsWorkflow,
|
|
100
81
|
workflow_timeout = 1
|
vectara_agentic/_version.py
CHANGED
vectara_agentic/agent.py
CHANGED
|
@@ -3,10 +3,11 @@ This module contains the Agent class for handling different types of agents and
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import warnings
|
|
6
|
+
|
|
6
7
|
warnings.simplefilter("ignore", DeprecationWarning)
|
|
7
8
|
|
|
8
9
|
# pylint: disable=wrong-import-position
|
|
9
|
-
from typing import List, Callable, Optional, Dict, Any,
|
|
10
|
+
from typing import List, Callable, Optional, Dict, Any, Tuple, TYPE_CHECKING
|
|
10
11
|
import os
|
|
11
12
|
from datetime import date
|
|
12
13
|
import json
|
|
@@ -19,7 +20,7 @@ from pydantic_core import PydanticUndefined
|
|
|
19
20
|
from dotenv import load_dotenv
|
|
20
21
|
|
|
21
22
|
# Runtime imports for components used at module level
|
|
22
|
-
from llama_index.core.llms import MessageRole
|
|
23
|
+
from llama_index.core.llms import MessageRole, ChatMessage
|
|
23
24
|
from llama_index.core.callbacks import CallbackManager
|
|
24
25
|
from llama_index.core.memory import Memory
|
|
25
26
|
|
|
@@ -27,7 +28,6 @@ from llama_index.core.memory import Memory
|
|
|
27
28
|
if TYPE_CHECKING:
|
|
28
29
|
from llama_index.core.tools import FunctionTool
|
|
29
30
|
from llama_index.core.workflow import Workflow
|
|
30
|
-
from llama_index.core.agent.runner.base import AgentRunner
|
|
31
31
|
from llama_index.core.agent.types import BaseAgent
|
|
32
32
|
from llama_index.core.callbacks.base_handler import BaseCallbackHandler
|
|
33
33
|
|
|
@@ -96,6 +96,7 @@ class Agent:
|
|
|
96
96
|
workflow_cls: Optional["Workflow"] = None,
|
|
97
97
|
workflow_timeout: int = 120,
|
|
98
98
|
vectara_api_key: Optional[str] = None,
|
|
99
|
+
session_id: Optional[str] = None,
|
|
99
100
|
) -> None:
|
|
100
101
|
"""
|
|
101
102
|
Initialize the agent with the specified type, tools, topic, and system message.
|
|
@@ -120,7 +121,9 @@ class Agent:
|
|
|
120
121
|
Defaults to False.
|
|
121
122
|
workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
|
|
122
123
|
workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
|
|
123
|
-
vectara_api_key (str, optional): The Vectara API key for
|
|
124
|
+
vectara_api_key (str, optional): The Vectara API key for VHC computation. Defaults to None.
|
|
125
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
126
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
124
127
|
"""
|
|
125
128
|
self.agent_config = agent_config or AgentConfig()
|
|
126
129
|
self.agent_config_type = AgentConfigType.DEFAULT
|
|
@@ -147,7 +150,9 @@ class Agent:
|
|
|
147
150
|
|
|
148
151
|
# Validate tools
|
|
149
152
|
if validate_tools:
|
|
150
|
-
validate_tool_consistency(
|
|
153
|
+
validate_tool_consistency(
|
|
154
|
+
self.tools, self._custom_instructions, self.agent_config
|
|
155
|
+
)
|
|
151
156
|
|
|
152
157
|
# Setup callback manager
|
|
153
158
|
callbacks: list[BaseCallbackHandler] = [
|
|
@@ -157,15 +162,15 @@ class Agent:
|
|
|
157
162
|
self.verbose = verbose
|
|
158
163
|
|
|
159
164
|
self.session_id = (
|
|
160
|
-
|
|
165
|
+
session_id
|
|
166
|
+
or getattr(self, "session_id", None)
|
|
167
|
+
or f"{topic}:{date.today().isoformat()}"
|
|
161
168
|
)
|
|
162
169
|
|
|
163
170
|
self.memory = Memory.from_defaults(
|
|
164
171
|
session_id=self.session_id, token_limit=65536
|
|
165
172
|
)
|
|
166
173
|
if chat_history:
|
|
167
|
-
from llama_index.core.llms import ChatMessage
|
|
168
|
-
|
|
169
174
|
msgs = []
|
|
170
175
|
for u, a in chat_history:
|
|
171
176
|
msgs.append(ChatMessage.from_str(u, role=MessageRole.USER))
|
|
@@ -184,6 +189,12 @@ class Agent:
|
|
|
184
189
|
logger.warning(f"Failed to set up observer ({e}), ignoring")
|
|
185
190
|
self.observability_enabled = False
|
|
186
191
|
|
|
192
|
+
# VHC state tracking
|
|
193
|
+
self._vhc_cache = {} # Cache VHC results by query hash
|
|
194
|
+
self._last_query = None
|
|
195
|
+
self._last_response = None
|
|
196
|
+
self._current_tool_outputs = [] # Store tool outputs from current query for VHC
|
|
197
|
+
|
|
187
198
|
@property
|
|
188
199
|
def llm(self):
|
|
189
200
|
"""Lazy-loads the LLM."""
|
|
@@ -209,7 +220,7 @@ class Agent:
|
|
|
209
220
|
|
|
210
221
|
def _create_agent(
|
|
211
222
|
self, config: AgentConfig, llm_callback_manager: "CallbackManager"
|
|
212
|
-
) ->
|
|
223
|
+
) -> "BaseAgent":
|
|
213
224
|
"""
|
|
214
225
|
Creates the agent based on the configuration object.
|
|
215
226
|
|
|
@@ -218,7 +229,7 @@ class Agent:
|
|
|
218
229
|
llm_callback_manager: The callback manager for the agent's llm.
|
|
219
230
|
|
|
220
231
|
Returns:
|
|
221
|
-
|
|
232
|
+
BaseAgent: The configured agent object.
|
|
222
233
|
"""
|
|
223
234
|
# Use the same LLM instance for consistency
|
|
224
235
|
llm = (
|
|
@@ -241,12 +252,11 @@ class Agent:
|
|
|
241
252
|
)
|
|
242
253
|
|
|
243
254
|
def clear_memory(self) -> None:
|
|
244
|
-
"""Clear the agent's memory."""
|
|
255
|
+
"""Clear the agent's memory and reset agent instances to ensure consistency."""
|
|
245
256
|
self.memory.reset()
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
self._fallback_agent.memory = self.memory
|
|
257
|
+
# Clear agent instances so they get recreated with the cleared memory
|
|
258
|
+
self._agent = None
|
|
259
|
+
self._fallback_agent = None
|
|
250
260
|
|
|
251
261
|
def __eq__(self, other):
|
|
252
262
|
if not isinstance(other, Agent):
|
|
@@ -320,6 +330,7 @@ class Agent:
|
|
|
320
330
|
chat_history: Optional[list[Tuple[str, str]]] = None,
|
|
321
331
|
workflow_cls: Optional["Workflow"] = None,
|
|
322
332
|
workflow_timeout: int = 120,
|
|
333
|
+
session_id: Optional[str] = None,
|
|
323
334
|
) -> "Agent":
|
|
324
335
|
"""
|
|
325
336
|
Create an agent from tools, agent type, and language model.
|
|
@@ -339,6 +350,8 @@ class Agent:
|
|
|
339
350
|
Defaults to False.
|
|
340
351
|
workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
|
|
341
352
|
workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
|
|
353
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
354
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
342
355
|
|
|
343
356
|
Returns:
|
|
344
357
|
Agent: An instance of the Agent class.
|
|
@@ -356,6 +369,7 @@ class Agent:
|
|
|
356
369
|
fallback_agent_config=fallback_agent_config,
|
|
357
370
|
workflow_cls=workflow_cls,
|
|
358
371
|
workflow_timeout=workflow_timeout,
|
|
372
|
+
session_id=session_id,
|
|
359
373
|
)
|
|
360
374
|
|
|
361
375
|
@classmethod
|
|
@@ -400,8 +414,18 @@ class Agent:
|
|
|
400
414
|
vectara_presence_penalty: Optional[float] = None,
|
|
401
415
|
vectara_save_history: bool = True,
|
|
402
416
|
return_direct: bool = False,
|
|
417
|
+
session_id: Optional[str] = None,
|
|
403
418
|
) -> "Agent":
|
|
404
|
-
"""Create an agent from a single Vectara corpus using the factory function.
|
|
419
|
+
"""Create an agent from a single Vectara corpus using the factory function.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
tool_name (str): Name of the tool to be created.
|
|
423
|
+
data_description (str): Description of the data/corpus.
|
|
424
|
+
assistant_specialty (str): The specialty/topic of the assistant.
|
|
425
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
426
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
427
|
+
... (other parameters as documented in factory function)
|
|
428
|
+
"""
|
|
405
429
|
# Use the factory function to avoid code duplication
|
|
406
430
|
config = create_agent_from_corpus(
|
|
407
431
|
tool_name=tool_name,
|
|
@@ -444,6 +468,7 @@ class Agent:
|
|
|
444
468
|
chat_history=chat_history,
|
|
445
469
|
agent_progress_callback=agent_progress_callback,
|
|
446
470
|
query_logging_callback=query_logging_callback,
|
|
471
|
+
session_id=session_id,
|
|
447
472
|
**config,
|
|
448
473
|
)
|
|
449
474
|
|
|
@@ -451,11 +476,16 @@ class Agent:
|
|
|
451
476
|
"""
|
|
452
477
|
Switch the configuration type of the agent.
|
|
453
478
|
This function is called automatically to switch the agent configuration if the current configuration fails.
|
|
479
|
+
Ensures memory consistency by clearing agent instances so they are recreated with current memory.
|
|
454
480
|
"""
|
|
455
481
|
if self.agent_config_type == AgentConfigType.DEFAULT:
|
|
456
482
|
self.agent_config_type = AgentConfigType.FALLBACK
|
|
483
|
+
# Clear the fallback agent so it gets recreated with current memory
|
|
484
|
+
self._fallback_agent = None
|
|
457
485
|
else:
|
|
458
486
|
self.agent_config_type = AgentConfigType.DEFAULT
|
|
487
|
+
# Clear the main agent so it gets recreated with current memory
|
|
488
|
+
self._agent = None
|
|
459
489
|
|
|
460
490
|
def report(self, detailed: bool = False) -> None:
|
|
461
491
|
"""
|
|
@@ -501,19 +531,6 @@ class Agent:
|
|
|
501
531
|
else self.fallback_agent_config.agent_type
|
|
502
532
|
)
|
|
503
533
|
|
|
504
|
-
async def _aformat_for_lats(self, prompt, agent_response):
|
|
505
|
-
llm_prompt = f"""
|
|
506
|
-
Given the question '{prompt}', and agent response '{agent_response.response}',
|
|
507
|
-
Please provide a well formatted final response to the query.
|
|
508
|
-
final response:
|
|
509
|
-
"""
|
|
510
|
-
agent_type = self._get_current_agent_type()
|
|
511
|
-
if agent_type != AgentType.LATS:
|
|
512
|
-
return
|
|
513
|
-
|
|
514
|
-
agent = self._get_current_agent()
|
|
515
|
-
agent_response.response = (await agent.llm.acomplete(llm_prompt)).text
|
|
516
|
-
|
|
517
534
|
def chat(self, prompt: str) -> AgentResponse:
|
|
518
535
|
"""
|
|
519
536
|
Interact with the agent using a chat prompt.
|
|
@@ -562,9 +579,12 @@ class Agent:
|
|
|
562
579
|
]:
|
|
563
580
|
from llama_index.core.workflow import Context
|
|
564
581
|
|
|
582
|
+
# Create context and pass memory to the workflow agent
|
|
583
|
+
# According to LlamaIndex docs, we should let the workflow manage memory internally
|
|
565
584
|
ctx = Context(current_agent)
|
|
585
|
+
|
|
566
586
|
handler = current_agent.run(
|
|
567
|
-
user_msg=prompt,
|
|
587
|
+
user_msg=prompt, memory=self.memory, ctx=ctx
|
|
568
588
|
)
|
|
569
589
|
|
|
570
590
|
# Listen to workflow events if progress callback is set
|
|
@@ -698,6 +718,27 @@ class Agent:
|
|
|
698
718
|
response=response_text, metadata=getattr(result, "metadata", {})
|
|
699
719
|
)
|
|
700
720
|
|
|
721
|
+
# Retrieve updated memory from workflow context
|
|
722
|
+
# According to LlamaIndex docs, workflow agents manage memory internally
|
|
723
|
+
# and we can access it via ctx.store.get("memory")
|
|
724
|
+
try:
|
|
725
|
+
workflow_memory = await ctx.store.get("memory")
|
|
726
|
+
if workflow_memory:
|
|
727
|
+
# Update our external memory with the workflow's memory
|
|
728
|
+
self.memory = workflow_memory
|
|
729
|
+
except Exception as e:
|
|
730
|
+
# If we can't retrieve workflow memory, fall back to manual management
|
|
731
|
+
warning_msg = (
|
|
732
|
+
f"Could not retrieve workflow memory, falling back to "
|
|
733
|
+
f"manual management: {e}"
|
|
734
|
+
)
|
|
735
|
+
logger.warning(warning_msg)
|
|
736
|
+
user_msg = ChatMessage.from_str(prompt, role=MessageRole.USER)
|
|
737
|
+
assistant_msg = ChatMessage.from_str(
|
|
738
|
+
response_text, role=MessageRole.ASSISTANT
|
|
739
|
+
)
|
|
740
|
+
self.memory.put_messages([user_msg, assistant_msg])
|
|
741
|
+
|
|
701
742
|
# Standard chat interaction for other agent types
|
|
702
743
|
else:
|
|
703
744
|
agent_response = await current_agent.achat(prompt)
|
|
@@ -713,7 +754,9 @@ class Agent:
|
|
|
713
754
|
except Exception as e:
|
|
714
755
|
last_error = e
|
|
715
756
|
if self.verbose:
|
|
716
|
-
logger.warning(
|
|
757
|
+
logger.warning(
|
|
758
|
+
f"LLM call failed on attempt {attempt}. " f"Error: {e}."
|
|
759
|
+
)
|
|
717
760
|
if attempt >= 2 and self.fallback_agent_config:
|
|
718
761
|
self._switch_agent_config()
|
|
719
762
|
await asyncio.sleep(1)
|
|
@@ -750,6 +793,9 @@ class Agent:
|
|
|
750
793
|
Returns:
|
|
751
794
|
AgentStreamingResponse: The streaming response from the agent.
|
|
752
795
|
"""
|
|
796
|
+
# Store query for VHC processing and clear previous tool outputs
|
|
797
|
+
self._last_query = prompt
|
|
798
|
+
self._clear_tool_outputs()
|
|
753
799
|
max_attempts = 4 if self.fallback_agent_config else 2
|
|
754
800
|
attempt = 0
|
|
755
801
|
orig_llm = self.llm.metadata.model_name
|
|
@@ -763,9 +809,12 @@ class Agent:
|
|
|
763
809
|
if self._get_current_agent_type() == AgentType.FUNCTION_CALLING:
|
|
764
810
|
from llama_index.core.workflow import Context
|
|
765
811
|
|
|
812
|
+
# Create context and pass memory to the workflow agent
|
|
813
|
+
# According to LlamaIndex docs, we should let the workflow manage memory internally
|
|
766
814
|
ctx = Context(current_agent)
|
|
815
|
+
|
|
767
816
|
handler = current_agent.run(
|
|
768
|
-
user_msg=prompt,
|
|
817
|
+
user_msg=prompt, memory=self.memory, ctx=ctx
|
|
769
818
|
)
|
|
770
819
|
|
|
771
820
|
# Use the dedicated FunctionCallingStreamHandler
|
|
@@ -809,6 +858,134 @@ class Agent:
|
|
|
809
858
|
f"{max_attempts} attempts ({last_error})."
|
|
810
859
|
)
|
|
811
860
|
|
|
861
|
+
def _clear_tool_outputs(self):
|
|
862
|
+
"""Clear stored tool outputs at the start of a new query."""
|
|
863
|
+
self._current_tool_outputs.clear()
|
|
864
|
+
logging.info("🔧 [TOOL_STORAGE] Cleared stored tool outputs for new query")
|
|
865
|
+
|
|
866
|
+
def _add_tool_output(self, tool_name: str, content: str):
|
|
867
|
+
"""Add a tool output to the current collection for VHC."""
|
|
868
|
+
tool_output = {
|
|
869
|
+
'status_type': 'TOOL_OUTPUT',
|
|
870
|
+
'content': content,
|
|
871
|
+
'tool_name': tool_name
|
|
872
|
+
}
|
|
873
|
+
self._current_tool_outputs.append(tool_output)
|
|
874
|
+
logging.info(f"🔧 [TOOL_STORAGE] Added tool output from '{tool_name}': {len(content)} chars")
|
|
875
|
+
|
|
876
|
+
def _get_stored_tool_outputs(self) -> List[dict]:
|
|
877
|
+
"""Get the stored tool outputs from the current query."""
|
|
878
|
+
logging.info(f"🔧 [TOOL_STORAGE] Retrieved {len(self._current_tool_outputs)} stored tool outputs")
|
|
879
|
+
return self._current_tool_outputs.copy()
|
|
880
|
+
|
|
881
|
+
async def acompute_vhc(self) -> Dict[str, Any]:
|
|
882
|
+
"""
|
|
883
|
+
Compute VHC for the last query/response pair (async version).
|
|
884
|
+
Results are cached for subsequent calls. Tool outputs are automatically
|
|
885
|
+
collected during streaming and used internally.
|
|
886
|
+
|
|
887
|
+
Returns:
|
|
888
|
+
Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
|
|
889
|
+
"""
|
|
890
|
+
logging.info(
|
|
891
|
+
f"🔍🔍🔍 [VHC_AGENT_ENTRY] UNIQUE_DEBUG_MESSAGE acompute_vhc method called - "
|
|
892
|
+
f"stored_tool_outputs_count={len(self._current_tool_outputs)}"
|
|
893
|
+
)
|
|
894
|
+
logging.info(
|
|
895
|
+
f"🔍🔍🔍 [VHC_AGENT_ENTRY] _last_query: {'set' if self._last_query else 'None'}"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
if not self._last_query:
|
|
899
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no _last_query")
|
|
900
|
+
return {"corrected_text": None, "corrections": []}
|
|
901
|
+
|
|
902
|
+
# For VHC to work, we need the response text from memory
|
|
903
|
+
# Get the latest assistant response from memory
|
|
904
|
+
messages = self.memory.get()
|
|
905
|
+
logging.info(
|
|
906
|
+
f"🔍 [VHC_AGENT] memory.get() returned {len(messages) if messages else 0} messages"
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
if not messages:
|
|
910
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no messages in memory")
|
|
911
|
+
return {"corrected_text": None, "corrections": []}
|
|
912
|
+
|
|
913
|
+
# Find the last assistant message
|
|
914
|
+
last_response = None
|
|
915
|
+
for msg in reversed(messages):
|
|
916
|
+
if msg.role == MessageRole.ASSISTANT:
|
|
917
|
+
last_response = msg.content
|
|
918
|
+
break
|
|
919
|
+
|
|
920
|
+
logging.info(
|
|
921
|
+
f"🔍 [VHC_AGENT] Found last_response: {'set' if last_response else 'None'}"
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
if not last_response:
|
|
925
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no last assistant response found")
|
|
926
|
+
return {"corrected_text": None, "corrections": []}
|
|
927
|
+
|
|
928
|
+
# Update stored response for caching
|
|
929
|
+
self._last_response = last_response
|
|
930
|
+
|
|
931
|
+
# Create cache key from query + response
|
|
932
|
+
cache_key = hash(f"{self._last_query}:{self._last_response}")
|
|
933
|
+
|
|
934
|
+
# Return cached results if available
|
|
935
|
+
if cache_key in self._vhc_cache:
|
|
936
|
+
return self._vhc_cache[cache_key]
|
|
937
|
+
|
|
938
|
+
# Check if we have VHC API key
|
|
939
|
+
logging.info(
|
|
940
|
+
f"🔍 [VHC_AGENT] acompute_vhc called with vectara_api_key={'set' if self.vectara_api_key else 'None'}"
|
|
941
|
+
)
|
|
942
|
+
if not self.vectara_api_key:
|
|
943
|
+
logging.info("🔍 [VHC_AGENT] No vectara_api_key - returning early with None")
|
|
944
|
+
return {"corrected_text": None, "corrections": []}
|
|
945
|
+
|
|
946
|
+
# Compute VHC using existing library function
|
|
947
|
+
from .agent_core.utils.hallucination import analyze_hallucinations
|
|
948
|
+
|
|
949
|
+
try:
|
|
950
|
+
# Use stored tool outputs from current query
|
|
951
|
+
stored_tool_outputs = self._get_stored_tool_outputs()
|
|
952
|
+
logging.info(f"🔧 [VHC_AGENT] Using {len(stored_tool_outputs)} stored tool outputs for VHC")
|
|
953
|
+
|
|
954
|
+
corrected_text, corrections = analyze_hallucinations(
|
|
955
|
+
query=self._last_query,
|
|
956
|
+
chat_history=self.memory.get(),
|
|
957
|
+
agent_response=self._last_response,
|
|
958
|
+
tools=self.tools,
|
|
959
|
+
vectara_api_key=self.vectara_api_key,
|
|
960
|
+
tool_outputs=stored_tool_outputs,
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
# Cache results
|
|
964
|
+
results = {"corrected_text": corrected_text, "corrections": corrections}
|
|
965
|
+
self._vhc_cache[cache_key] = results
|
|
966
|
+
|
|
967
|
+
return results
|
|
968
|
+
|
|
969
|
+
except Exception as e:
|
|
970
|
+
logger.error(f"VHC computation failed: {e}")
|
|
971
|
+
return {"corrected_text": None, "corrections": []}
|
|
972
|
+
|
|
973
|
+
def compute_vhc(self) -> Dict[str, Any]:
|
|
974
|
+
"""
|
|
975
|
+
Compute VHC for the last query/response pair (sync version).
|
|
976
|
+
Results are cached for subsequent calls. Tool outputs are automatically
|
|
977
|
+
collected during streaming and used internally.
|
|
978
|
+
|
|
979
|
+
Returns:
|
|
980
|
+
Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
|
|
981
|
+
"""
|
|
982
|
+
try:
|
|
983
|
+
loop = asyncio.get_event_loop()
|
|
984
|
+
return loop.run_until_complete(self.acompute_vhc())
|
|
985
|
+
except RuntimeError:
|
|
986
|
+
# No event loop running, create a new one
|
|
987
|
+
return asyncio.run(self.acompute_vhc())
|
|
988
|
+
|
|
812
989
|
#
|
|
813
990
|
# run() method for running a workflow
|
|
814
991
|
# workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
|
|
@@ -882,7 +1059,9 @@ class Agent:
|
|
|
882
1059
|
input_dict[key] = value
|
|
883
1060
|
output = outputs_model_on_fail_cls.model_validate(input_dict)
|
|
884
1061
|
else:
|
|
885
|
-
logger.warning(
|
|
1062
|
+
logger.warning(
|
|
1063
|
+
f"Vectara Agentic: Workflow failed with unexpected error: {e}"
|
|
1064
|
+
)
|
|
886
1065
|
raise type(e)(str(e)).with_traceback(e.__traceback__)
|
|
887
1066
|
|
|
888
1067
|
return output
|