vectara-agentic 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/benchmark_models.py +945 -0
- tests/conftest.py +9 -5
- tests/run_tests.py +3 -0
- tests/test_agent.py +57 -29
- tests/test_agent_fallback_memory.py +270 -0
- tests/test_agent_memory_consistency.py +229 -0
- tests/test_agent_type.py +4 -0
- tests/test_bedrock.py +46 -31
- tests/test_fallback.py +1 -1
- tests/test_gemini.py +7 -22
- tests/test_groq.py +46 -31
- tests/test_private_llm.py +1 -1
- tests/test_serialization.py +3 -6
- tests/test_session_memory.py +252 -0
- tests/test_streaming.py +58 -37
- tests/test_together.py +62 -0
- tests/test_vhc.py +3 -2
- tests/test_workflow.py +9 -28
- vectara_agentic/_observability.py +19 -0
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +246 -37
- vectara_agentic/agent_core/factory.py +34 -153
- vectara_agentic/agent_core/prompts.py +19 -13
- vectara_agentic/agent_core/serialization.py +17 -8
- vectara_agentic/agent_core/streaming.py +27 -43
- vectara_agentic/agent_core/utils/__init__.py +0 -5
- vectara_agentic/agent_core/utils/hallucination.py +54 -99
- vectara_agentic/llm_utils.py +4 -2
- vectara_agentic/sub_query_workflow.py +3 -2
- vectara_agentic/tools.py +0 -19
- vectara_agentic/types.py +9 -3
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/METADATA +79 -39
- vectara_agentic-0.4.2.dist-info/RECORD +54 -0
- vectara_agentic/agent_core/utils/prompt_formatting.py +0 -56
- vectara_agentic-0.4.0.dist-info/RECORD +0 -50
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/top_level.txt +0 -0
vectara_agentic/agent.py
CHANGED
|
@@ -3,10 +3,11 @@ This module contains the Agent class for handling different types of agents and
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import warnings
|
|
6
|
+
|
|
6
7
|
warnings.simplefilter("ignore", DeprecationWarning)
|
|
7
8
|
|
|
8
9
|
# pylint: disable=wrong-import-position
|
|
9
|
-
from typing import List, Callable, Optional, Dict, Any,
|
|
10
|
+
from typing import List, Callable, Optional, Dict, Any, Tuple, TYPE_CHECKING
|
|
10
11
|
import os
|
|
11
12
|
from datetime import date
|
|
12
13
|
import json
|
|
@@ -19,16 +20,16 @@ from pydantic_core import PydanticUndefined
|
|
|
19
20
|
from dotenv import load_dotenv
|
|
20
21
|
|
|
21
22
|
# Runtime imports for components used at module level
|
|
22
|
-
from llama_index.core.llms import MessageRole
|
|
23
|
+
from llama_index.core.llms import MessageRole, ChatMessage
|
|
23
24
|
from llama_index.core.callbacks import CallbackManager
|
|
24
|
-
from llama_index.core.memory import
|
|
25
|
+
from llama_index.core.memory import ChatMemoryBuffer
|
|
26
|
+
from llama_index.core.storage.chat_store import SimpleChatStore
|
|
25
27
|
|
|
26
28
|
# Heavy llama_index imports moved to TYPE_CHECKING for lazy loading
|
|
27
29
|
if TYPE_CHECKING:
|
|
28
30
|
from llama_index.core.tools import FunctionTool
|
|
29
31
|
from llama_index.core.workflow import Workflow
|
|
30
|
-
from llama_index.core.agent
|
|
31
|
-
from llama_index.core.agent.types import BaseAgent
|
|
32
|
+
from llama_index.core.agent import BaseWorkflowAgent
|
|
32
33
|
from llama_index.core.callbacks.base_handler import BaseCallbackHandler
|
|
33
34
|
|
|
34
35
|
|
|
@@ -96,6 +97,7 @@ class Agent:
|
|
|
96
97
|
workflow_cls: Optional["Workflow"] = None,
|
|
97
98
|
workflow_timeout: int = 120,
|
|
98
99
|
vectara_api_key: Optional[str] = None,
|
|
100
|
+
session_id: Optional[str] = None,
|
|
99
101
|
) -> None:
|
|
100
102
|
"""
|
|
101
103
|
Initialize the agent with the specified type, tools, topic, and system message.
|
|
@@ -120,7 +122,9 @@ class Agent:
|
|
|
120
122
|
Defaults to False.
|
|
121
123
|
workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
|
|
122
124
|
workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
|
|
123
|
-
vectara_api_key (str, optional): The Vectara API key for
|
|
125
|
+
vectara_api_key (str, optional): The Vectara API key for VHC computation. Defaults to None.
|
|
126
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
127
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
124
128
|
"""
|
|
125
129
|
self.agent_config = agent_config or AgentConfig()
|
|
126
130
|
self.agent_config_type = AgentConfigType.DEFAULT
|
|
@@ -147,7 +151,9 @@ class Agent:
|
|
|
147
151
|
|
|
148
152
|
# Validate tools
|
|
149
153
|
if validate_tools:
|
|
150
|
-
validate_tool_consistency(
|
|
154
|
+
validate_tool_consistency(
|
|
155
|
+
self.tools, self._custom_instructions, self.agent_config
|
|
156
|
+
)
|
|
151
157
|
|
|
152
158
|
# Setup callback manager
|
|
153
159
|
callbacks: list[BaseCallbackHandler] = [
|
|
@@ -157,15 +163,18 @@ class Agent:
|
|
|
157
163
|
self.verbose = verbose
|
|
158
164
|
|
|
159
165
|
self.session_id = (
|
|
160
|
-
|
|
166
|
+
session_id
|
|
167
|
+
or getattr(self, "session_id", None)
|
|
168
|
+
or f"{topic}:{date.today().isoformat()}"
|
|
161
169
|
)
|
|
162
170
|
|
|
163
|
-
|
|
164
|
-
|
|
171
|
+
chat_store = SimpleChatStore()
|
|
172
|
+
self.memory = ChatMemoryBuffer.from_defaults(
|
|
173
|
+
chat_store=chat_store,
|
|
174
|
+
chat_store_key=self.session_id,
|
|
175
|
+
token_limit=65536
|
|
165
176
|
)
|
|
166
177
|
if chat_history:
|
|
167
|
-
from llama_index.core.llms import ChatMessage
|
|
168
|
-
|
|
169
178
|
msgs = []
|
|
170
179
|
for u, a in chat_history:
|
|
171
180
|
msgs.append(ChatMessage.from_str(u, role=MessageRole.USER))
|
|
@@ -184,6 +193,12 @@ class Agent:
|
|
|
184
193
|
logger.warning(f"Failed to set up observer ({e}), ignoring")
|
|
185
194
|
self.observability_enabled = False
|
|
186
195
|
|
|
196
|
+
# VHC state tracking
|
|
197
|
+
self._vhc_cache = {} # Cache VHC results by query hash
|
|
198
|
+
self._last_query = None
|
|
199
|
+
self._last_response = None
|
|
200
|
+
self._current_tool_outputs = [] # Store tool outputs from current query for VHC
|
|
201
|
+
|
|
187
202
|
@property
|
|
188
203
|
def llm(self):
|
|
189
204
|
"""Lazy-loads the LLM."""
|
|
@@ -209,7 +224,7 @@ class Agent:
|
|
|
209
224
|
|
|
210
225
|
def _create_agent(
|
|
211
226
|
self, config: AgentConfig, llm_callback_manager: "CallbackManager"
|
|
212
|
-
) ->
|
|
227
|
+
) -> "BaseWorkflowAgent":
|
|
213
228
|
"""
|
|
214
229
|
Creates the agent based on the configuration object.
|
|
215
230
|
|
|
@@ -218,7 +233,7 @@ class Agent:
|
|
|
218
233
|
llm_callback_manager: The callback manager for the agent's llm.
|
|
219
234
|
|
|
220
235
|
Returns:
|
|
221
|
-
|
|
236
|
+
BaseWorkflowAgent: The configured agent object.
|
|
222
237
|
"""
|
|
223
238
|
# Use the same LLM instance for consistency
|
|
224
239
|
llm = (
|
|
@@ -241,12 +256,11 @@ class Agent:
|
|
|
241
256
|
)
|
|
242
257
|
|
|
243
258
|
def clear_memory(self) -> None:
|
|
244
|
-
"""Clear the agent's memory."""
|
|
259
|
+
"""Clear the agent's memory and reset agent instances to ensure consistency."""
|
|
245
260
|
self.memory.reset()
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
self._fallback_agent.memory = self.memory
|
|
261
|
+
# Clear agent instances so they get recreated with the cleared memory
|
|
262
|
+
self._agent = None
|
|
263
|
+
self._fallback_agent = None
|
|
250
264
|
|
|
251
265
|
def __eq__(self, other):
|
|
252
266
|
if not isinstance(other, Agent):
|
|
@@ -320,6 +334,7 @@ class Agent:
|
|
|
320
334
|
chat_history: Optional[list[Tuple[str, str]]] = None,
|
|
321
335
|
workflow_cls: Optional["Workflow"] = None,
|
|
322
336
|
workflow_timeout: int = 120,
|
|
337
|
+
session_id: Optional[str] = None,
|
|
323
338
|
) -> "Agent":
|
|
324
339
|
"""
|
|
325
340
|
Create an agent from tools, agent type, and language model.
|
|
@@ -339,6 +354,8 @@ class Agent:
|
|
|
339
354
|
Defaults to False.
|
|
340
355
|
workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
|
|
341
356
|
workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
|
|
357
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
358
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
342
359
|
|
|
343
360
|
Returns:
|
|
344
361
|
Agent: An instance of the Agent class.
|
|
@@ -356,6 +373,7 @@ class Agent:
|
|
|
356
373
|
fallback_agent_config=fallback_agent_config,
|
|
357
374
|
workflow_cls=workflow_cls,
|
|
358
375
|
workflow_timeout=workflow_timeout,
|
|
376
|
+
session_id=session_id,
|
|
359
377
|
)
|
|
360
378
|
|
|
361
379
|
@classmethod
|
|
@@ -400,8 +418,18 @@ class Agent:
|
|
|
400
418
|
vectara_presence_penalty: Optional[float] = None,
|
|
401
419
|
vectara_save_history: bool = True,
|
|
402
420
|
return_direct: bool = False,
|
|
421
|
+
session_id: Optional[str] = None,
|
|
403
422
|
) -> "Agent":
|
|
404
|
-
"""Create an agent from a single Vectara corpus using the factory function.
|
|
423
|
+
"""Create an agent from a single Vectara corpus using the factory function.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
tool_name (str): Name of the tool to be created.
|
|
427
|
+
data_description (str): Description of the data/corpus.
|
|
428
|
+
assistant_specialty (str): The specialty/topic of the assistant.
|
|
429
|
+
session_id (str, optional): The session ID for memory persistence.
|
|
430
|
+
If None, auto-generates from topic and date. Defaults to None.
|
|
431
|
+
... (other parameters as documented in factory function)
|
|
432
|
+
"""
|
|
405
433
|
# Use the factory function to avoid code duplication
|
|
406
434
|
config = create_agent_from_corpus(
|
|
407
435
|
tool_name=tool_name,
|
|
@@ -444,6 +472,7 @@ class Agent:
|
|
|
444
472
|
chat_history=chat_history,
|
|
445
473
|
agent_progress_callback=agent_progress_callback,
|
|
446
474
|
query_logging_callback=query_logging_callback,
|
|
475
|
+
session_id=session_id,
|
|
447
476
|
**config,
|
|
448
477
|
)
|
|
449
478
|
|
|
@@ -451,11 +480,16 @@ class Agent:
|
|
|
451
480
|
"""
|
|
452
481
|
Switch the configuration type of the agent.
|
|
453
482
|
This function is called automatically to switch the agent configuration if the current configuration fails.
|
|
483
|
+
Ensures memory consistency by clearing agent instances so they are recreated with current memory.
|
|
454
484
|
"""
|
|
455
485
|
if self.agent_config_type == AgentConfigType.DEFAULT:
|
|
456
486
|
self.agent_config_type = AgentConfigType.FALLBACK
|
|
487
|
+
# Clear the fallback agent so it gets recreated with current memory
|
|
488
|
+
self._fallback_agent = None
|
|
457
489
|
else:
|
|
458
490
|
self.agent_config_type = AgentConfigType.DEFAULT
|
|
491
|
+
# Clear the main agent so it gets recreated with current memory
|
|
492
|
+
self._agent = None
|
|
459
493
|
|
|
460
494
|
def report(self, detailed: bool = False) -> None:
|
|
461
495
|
"""
|
|
@@ -501,19 +535,6 @@ class Agent:
|
|
|
501
535
|
else self.fallback_agent_config.agent_type
|
|
502
536
|
)
|
|
503
537
|
|
|
504
|
-
async def _aformat_for_lats(self, prompt, agent_response):
|
|
505
|
-
llm_prompt = f"""
|
|
506
|
-
Given the question '{prompt}', and agent response '{agent_response.response}',
|
|
507
|
-
Please provide a well formatted final response to the query.
|
|
508
|
-
final response:
|
|
509
|
-
"""
|
|
510
|
-
agent_type = self._get_current_agent_type()
|
|
511
|
-
if agent_type != AgentType.LATS:
|
|
512
|
-
return
|
|
513
|
-
|
|
514
|
-
agent = self._get_current_agent()
|
|
515
|
-
agent_response.response = (await agent.llm.acomplete(llm_prompt)).text
|
|
516
|
-
|
|
517
538
|
def chat(self, prompt: str) -> AgentResponse:
|
|
518
539
|
"""
|
|
519
540
|
Interact with the agent using a chat prompt.
|
|
@@ -562,9 +583,12 @@ class Agent:
|
|
|
562
583
|
]:
|
|
563
584
|
from llama_index.core.workflow import Context
|
|
564
585
|
|
|
586
|
+
# Create context and pass memory to the workflow agent
|
|
587
|
+
# According to LlamaIndex docs, we should let the workflow manage memory internally
|
|
565
588
|
ctx = Context(current_agent)
|
|
589
|
+
|
|
566
590
|
handler = current_agent.run(
|
|
567
|
-
user_msg=prompt,
|
|
591
|
+
user_msg=prompt, memory=self.memory, ctx=ctx
|
|
568
592
|
)
|
|
569
593
|
|
|
570
594
|
# Listen to workflow events if progress callback is set
|
|
@@ -698,6 +722,27 @@ class Agent:
|
|
|
698
722
|
response=response_text, metadata=getattr(result, "metadata", {})
|
|
699
723
|
)
|
|
700
724
|
|
|
725
|
+
# Retrieve updated memory from workflow context
|
|
726
|
+
# According to LlamaIndex docs, workflow agents manage memory internally
|
|
727
|
+
# and we can access it via ctx.store.get("memory")
|
|
728
|
+
try:
|
|
729
|
+
workflow_memory = await ctx.store.get("memory")
|
|
730
|
+
if workflow_memory:
|
|
731
|
+
# Update our external memory with the workflow's memory
|
|
732
|
+
self.memory = workflow_memory
|
|
733
|
+
except Exception as e:
|
|
734
|
+
# If we can't retrieve workflow memory, fall back to manual management
|
|
735
|
+
warning_msg = (
|
|
736
|
+
f"Could not retrieve workflow memory, falling back to "
|
|
737
|
+
f"manual management: {e}"
|
|
738
|
+
)
|
|
739
|
+
logger.warning(warning_msg)
|
|
740
|
+
user_msg = ChatMessage.from_str(prompt, role=MessageRole.USER)
|
|
741
|
+
assistant_msg = ChatMessage.from_str(
|
|
742
|
+
response_text, role=MessageRole.ASSISTANT
|
|
743
|
+
)
|
|
744
|
+
self.memory.put_messages([user_msg, assistant_msg])
|
|
745
|
+
|
|
701
746
|
# Standard chat interaction for other agent types
|
|
702
747
|
else:
|
|
703
748
|
agent_response = await current_agent.achat(prompt)
|
|
@@ -713,7 +758,9 @@ class Agent:
|
|
|
713
758
|
except Exception as e:
|
|
714
759
|
last_error = e
|
|
715
760
|
if self.verbose:
|
|
716
|
-
logger.warning(
|
|
761
|
+
logger.warning(
|
|
762
|
+
f"LLM call failed on attempt {attempt}. " f"Error: {e}."
|
|
763
|
+
)
|
|
717
764
|
if attempt >= 2 and self.fallback_agent_config:
|
|
718
765
|
self._switch_agent_config()
|
|
719
766
|
await asyncio.sleep(1)
|
|
@@ -750,6 +797,9 @@ class Agent:
|
|
|
750
797
|
Returns:
|
|
751
798
|
AgentStreamingResponse: The streaming response from the agent.
|
|
752
799
|
"""
|
|
800
|
+
# Store query for VHC processing and clear previous tool outputs
|
|
801
|
+
self._last_query = prompt
|
|
802
|
+
self._clear_tool_outputs()
|
|
753
803
|
max_attempts = 4 if self.fallback_agent_config else 2
|
|
754
804
|
attempt = 0
|
|
755
805
|
orig_llm = self.llm.metadata.model_name
|
|
@@ -763,9 +813,12 @@ class Agent:
|
|
|
763
813
|
if self._get_current_agent_type() == AgentType.FUNCTION_CALLING:
|
|
764
814
|
from llama_index.core.workflow import Context
|
|
765
815
|
|
|
816
|
+
# Create context and pass memory to the workflow agent
|
|
817
|
+
# According to LlamaIndex docs, we should let the workflow manage memory internally
|
|
766
818
|
ctx = Context(current_agent)
|
|
819
|
+
|
|
767
820
|
handler = current_agent.run(
|
|
768
|
-
user_msg=prompt,
|
|
821
|
+
user_msg=prompt, memory=self.memory, ctx=ctx
|
|
769
822
|
)
|
|
770
823
|
|
|
771
824
|
# Use the dedicated FunctionCallingStreamHandler
|
|
@@ -809,6 +862,134 @@ class Agent:
|
|
|
809
862
|
f"{max_attempts} attempts ({last_error})."
|
|
810
863
|
)
|
|
811
864
|
|
|
865
|
+
def _clear_tool_outputs(self):
|
|
866
|
+
"""Clear stored tool outputs at the start of a new query."""
|
|
867
|
+
self._current_tool_outputs.clear()
|
|
868
|
+
logging.info("🔧 [TOOL_STORAGE] Cleared stored tool outputs for new query")
|
|
869
|
+
|
|
870
|
+
def _add_tool_output(self, tool_name: str, content: str):
|
|
871
|
+
"""Add a tool output to the current collection for VHC."""
|
|
872
|
+
tool_output = {
|
|
873
|
+
'status_type': 'TOOL_OUTPUT',
|
|
874
|
+
'content': content,
|
|
875
|
+
'tool_name': tool_name
|
|
876
|
+
}
|
|
877
|
+
self._current_tool_outputs.append(tool_output)
|
|
878
|
+
logging.info(f"🔧 [TOOL_STORAGE] Added tool output from '{tool_name}': {len(content)} chars")
|
|
879
|
+
|
|
880
|
+
def _get_stored_tool_outputs(self) -> List[dict]:
|
|
881
|
+
"""Get the stored tool outputs from the current query."""
|
|
882
|
+
logging.info(f"🔧 [TOOL_STORAGE] Retrieved {len(self._current_tool_outputs)} stored tool outputs")
|
|
883
|
+
return self._current_tool_outputs.copy()
|
|
884
|
+
|
|
885
|
+
async def acompute_vhc(self) -> Dict[str, Any]:
|
|
886
|
+
"""
|
|
887
|
+
Compute VHC for the last query/response pair (async version).
|
|
888
|
+
Results are cached for subsequent calls. Tool outputs are automatically
|
|
889
|
+
collected during streaming and used internally.
|
|
890
|
+
|
|
891
|
+
Returns:
|
|
892
|
+
Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
|
|
893
|
+
"""
|
|
894
|
+
logging.info(
|
|
895
|
+
f"🔍🔍🔍 [VHC_AGENT_ENTRY] UNIQUE_DEBUG_MESSAGE acompute_vhc method called - "
|
|
896
|
+
f"stored_tool_outputs_count={len(self._current_tool_outputs)}"
|
|
897
|
+
)
|
|
898
|
+
logging.info(
|
|
899
|
+
f"🔍🔍🔍 [VHC_AGENT_ENTRY] _last_query: {'set' if self._last_query else 'None'}"
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
if not self._last_query:
|
|
903
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no _last_query")
|
|
904
|
+
return {"corrected_text": None, "corrections": []}
|
|
905
|
+
|
|
906
|
+
# For VHC to work, we need the response text from memory
|
|
907
|
+
# Get the latest assistant response from memory
|
|
908
|
+
messages = self.memory.get()
|
|
909
|
+
logging.info(
|
|
910
|
+
f"🔍 [VHC_AGENT] memory.get() returned {len(messages) if messages else 0} messages"
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
if not messages:
|
|
914
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no messages in memory")
|
|
915
|
+
return {"corrected_text": None, "corrections": []}
|
|
916
|
+
|
|
917
|
+
# Find the last assistant message
|
|
918
|
+
last_response = None
|
|
919
|
+
for msg in reversed(messages):
|
|
920
|
+
if msg.role == MessageRole.ASSISTANT:
|
|
921
|
+
last_response = msg.content
|
|
922
|
+
break
|
|
923
|
+
|
|
924
|
+
logging.info(
|
|
925
|
+
f"🔍 [VHC_AGENT] Found last_response: {'set' if last_response else 'None'}"
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
if not last_response:
|
|
929
|
+
logging.info("🔍 [VHC_AGENT] Returning early - no last assistant response found")
|
|
930
|
+
return {"corrected_text": None, "corrections": []}
|
|
931
|
+
|
|
932
|
+
# Update stored response for caching
|
|
933
|
+
self._last_response = last_response
|
|
934
|
+
|
|
935
|
+
# Create cache key from query + response
|
|
936
|
+
cache_key = hash(f"{self._last_query}:{self._last_response}")
|
|
937
|
+
|
|
938
|
+
# Return cached results if available
|
|
939
|
+
if cache_key in self._vhc_cache:
|
|
940
|
+
return self._vhc_cache[cache_key]
|
|
941
|
+
|
|
942
|
+
# Check if we have VHC API key
|
|
943
|
+
logging.info(
|
|
944
|
+
f"🔍 [VHC_AGENT] acompute_vhc called with vectara_api_key={'set' if self.vectara_api_key else 'None'}"
|
|
945
|
+
)
|
|
946
|
+
if not self.vectara_api_key:
|
|
947
|
+
logging.info("🔍 [VHC_AGENT] No vectara_api_key - returning early with None")
|
|
948
|
+
return {"corrected_text": None, "corrections": []}
|
|
949
|
+
|
|
950
|
+
# Compute VHC using existing library function
|
|
951
|
+
from .agent_core.utils.hallucination import analyze_hallucinations
|
|
952
|
+
|
|
953
|
+
try:
|
|
954
|
+
# Use stored tool outputs from current query
|
|
955
|
+
stored_tool_outputs = self._get_stored_tool_outputs()
|
|
956
|
+
logging.info(f"🔧 [VHC_AGENT] Using {len(stored_tool_outputs)} stored tool outputs for VHC")
|
|
957
|
+
|
|
958
|
+
corrected_text, corrections = analyze_hallucinations(
|
|
959
|
+
query=self._last_query,
|
|
960
|
+
chat_history=self.memory.get(),
|
|
961
|
+
agent_response=self._last_response,
|
|
962
|
+
tools=self.tools,
|
|
963
|
+
vectara_api_key=self.vectara_api_key,
|
|
964
|
+
tool_outputs=stored_tool_outputs,
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
# Cache results
|
|
968
|
+
results = {"corrected_text": corrected_text, "corrections": corrections}
|
|
969
|
+
self._vhc_cache[cache_key] = results
|
|
970
|
+
|
|
971
|
+
return results
|
|
972
|
+
|
|
973
|
+
except Exception as e:
|
|
974
|
+
logger.error(f"VHC computation failed: {e}")
|
|
975
|
+
return {"corrected_text": None, "corrections": []}
|
|
976
|
+
|
|
977
|
+
def compute_vhc(self) -> Dict[str, Any]:
|
|
978
|
+
"""
|
|
979
|
+
Compute VHC for the last query/response pair (sync version).
|
|
980
|
+
Results are cached for subsequent calls. Tool outputs are automatically
|
|
981
|
+
collected during streaming and used internally.
|
|
982
|
+
|
|
983
|
+
Returns:
|
|
984
|
+
Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
|
|
985
|
+
"""
|
|
986
|
+
try:
|
|
987
|
+
loop = asyncio.get_event_loop()
|
|
988
|
+
return loop.run_until_complete(self.acompute_vhc())
|
|
989
|
+
except RuntimeError:
|
|
990
|
+
# No event loop running, create a new one
|
|
991
|
+
return asyncio.run(self.acompute_vhc())
|
|
992
|
+
|
|
812
993
|
#
|
|
813
994
|
# run() method for running a workflow
|
|
814
995
|
# workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
|
|
@@ -882,7 +1063,9 @@ class Agent:
|
|
|
882
1063
|
input_dict[key] = value
|
|
883
1064
|
output = outputs_model_on_fail_cls.model_validate(input_dict)
|
|
884
1065
|
else:
|
|
885
|
-
logger.warning(
|
|
1066
|
+
logger.warning(
|
|
1067
|
+
f"Vectara Agentic: Workflow failed with unexpected error: {e}"
|
|
1068
|
+
)
|
|
886
1069
|
raise type(e)(str(e)).with_traceback(e.__traceback__)
|
|
887
1070
|
|
|
888
1071
|
return output
|
|
@@ -923,3 +1106,29 @@ class Agent:
|
|
|
923
1106
|
return deserialize_agent_from_dict(
|
|
924
1107
|
cls, data, agent_progress_callback, query_logging_callback
|
|
925
1108
|
)
|
|
1109
|
+
|
|
1110
|
+
def cleanup(self) -> None:
|
|
1111
|
+
"""Clean up resources used by the agent."""
|
|
1112
|
+
from ._observability import shutdown_observer
|
|
1113
|
+
|
|
1114
|
+
if hasattr(self, 'agent') and hasattr(self.agent, '_llm'):
|
|
1115
|
+
llm = self.agent._llm
|
|
1116
|
+
if hasattr(llm, 'client') and hasattr(llm.client, 'close'):
|
|
1117
|
+
try:
|
|
1118
|
+
if asyncio.iscoroutinefunction(llm.client.close):
|
|
1119
|
+
asyncio.run(llm.client.close())
|
|
1120
|
+
else:
|
|
1121
|
+
llm.client.close()
|
|
1122
|
+
except Exception:
|
|
1123
|
+
pass
|
|
1124
|
+
|
|
1125
|
+
# Shutdown observability connections
|
|
1126
|
+
shutdown_observer()
|
|
1127
|
+
|
|
1128
|
+
def __enter__(self):
|
|
1129
|
+
"""Context manager entry."""
|
|
1130
|
+
return self
|
|
1131
|
+
|
|
1132
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
1133
|
+
"""Context manager exit with cleanup."""
|
|
1134
|
+
self.cleanup()
|