vectara-agentic 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

Files changed (38) hide show
  1. tests/benchmark_models.py +945 -0
  2. tests/conftest.py +9 -5
  3. tests/run_tests.py +3 -0
  4. tests/test_agent.py +57 -29
  5. tests/test_agent_fallback_memory.py +270 -0
  6. tests/test_agent_memory_consistency.py +229 -0
  7. tests/test_agent_type.py +4 -0
  8. tests/test_bedrock.py +46 -31
  9. tests/test_fallback.py +1 -1
  10. tests/test_gemini.py +7 -22
  11. tests/test_groq.py +46 -31
  12. tests/test_private_llm.py +1 -1
  13. tests/test_serialization.py +3 -6
  14. tests/test_session_memory.py +252 -0
  15. tests/test_streaming.py +58 -37
  16. tests/test_together.py +62 -0
  17. tests/test_vhc.py +3 -2
  18. tests/test_workflow.py +9 -28
  19. vectara_agentic/_observability.py +19 -0
  20. vectara_agentic/_version.py +1 -1
  21. vectara_agentic/agent.py +246 -37
  22. vectara_agentic/agent_core/factory.py +34 -153
  23. vectara_agentic/agent_core/prompts.py +19 -13
  24. vectara_agentic/agent_core/serialization.py +17 -8
  25. vectara_agentic/agent_core/streaming.py +27 -43
  26. vectara_agentic/agent_core/utils/__init__.py +0 -5
  27. vectara_agentic/agent_core/utils/hallucination.py +54 -99
  28. vectara_agentic/llm_utils.py +4 -2
  29. vectara_agentic/sub_query_workflow.py +3 -2
  30. vectara_agentic/tools.py +0 -19
  31. vectara_agentic/types.py +9 -3
  32. {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/METADATA +79 -39
  33. vectara_agentic-0.4.2.dist-info/RECORD +54 -0
  34. vectara_agentic/agent_core/utils/prompt_formatting.py +0 -56
  35. vectara_agentic-0.4.0.dist-info/RECORD +0 -50
  36. {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/WHEEL +0 -0
  37. {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/licenses/LICENSE +0 -0
  38. {vectara_agentic-0.4.0.dist-info → vectara_agentic-0.4.2.dist-info}/top_level.txt +0 -0
vectara_agentic/agent.py CHANGED
@@ -3,10 +3,11 @@ This module contains the Agent class for handling different types of agents and
3
3
  """
4
4
 
5
5
  import warnings
6
+
6
7
  warnings.simplefilter("ignore", DeprecationWarning)
7
8
 
8
9
  # pylint: disable=wrong-import-position
9
- from typing import List, Callable, Optional, Dict, Any, Union, Tuple, TYPE_CHECKING
10
+ from typing import List, Callable, Optional, Dict, Any, Tuple, TYPE_CHECKING
10
11
  import os
11
12
  from datetime import date
12
13
  import json
@@ -19,16 +20,16 @@ from pydantic_core import PydanticUndefined
19
20
  from dotenv import load_dotenv
20
21
 
21
22
  # Runtime imports for components used at module level
22
- from llama_index.core.llms import MessageRole
23
+ from llama_index.core.llms import MessageRole, ChatMessage
23
24
  from llama_index.core.callbacks import CallbackManager
24
- from llama_index.core.memory import Memory
25
+ from llama_index.core.memory import ChatMemoryBuffer
26
+ from llama_index.core.storage.chat_store import SimpleChatStore
25
27
 
26
28
  # Heavy llama_index imports moved to TYPE_CHECKING for lazy loading
27
29
  if TYPE_CHECKING:
28
30
  from llama_index.core.tools import FunctionTool
29
31
  from llama_index.core.workflow import Workflow
30
- from llama_index.core.agent.runner.base import AgentRunner
31
- from llama_index.core.agent.types import BaseAgent
32
+ from llama_index.core.agent import BaseWorkflowAgent
32
33
  from llama_index.core.callbacks.base_handler import BaseCallbackHandler
33
34
 
34
35
 
@@ -96,6 +97,7 @@ class Agent:
96
97
  workflow_cls: Optional["Workflow"] = None,
97
98
  workflow_timeout: int = 120,
98
99
  vectara_api_key: Optional[str] = None,
100
+ session_id: Optional[str] = None,
99
101
  ) -> None:
100
102
  """
101
103
  Initialize the agent with the specified type, tools, topic, and system message.
@@ -120,7 +122,9 @@ class Agent:
120
122
  Defaults to False.
121
123
  workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
122
124
  workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
123
- vectara_api_key (str, optional): The Vectara API key for FCS evaluation. Defaults to None.
125
+ vectara_api_key (str, optional): The Vectara API key for VHC computation. Defaults to None.
126
+ session_id (str, optional): The session ID for memory persistence.
127
+ If None, auto-generates from topic and date. Defaults to None.
124
128
  """
125
129
  self.agent_config = agent_config or AgentConfig()
126
130
  self.agent_config_type = AgentConfigType.DEFAULT
@@ -147,7 +151,9 @@ class Agent:
147
151
 
148
152
  # Validate tools
149
153
  if validate_tools:
150
- validate_tool_consistency(self.tools, self._custom_instructions, self.agent_config)
154
+ validate_tool_consistency(
155
+ self.tools, self._custom_instructions, self.agent_config
156
+ )
151
157
 
152
158
  # Setup callback manager
153
159
  callbacks: list[BaseCallbackHandler] = [
@@ -157,15 +163,18 @@ class Agent:
157
163
  self.verbose = verbose
158
164
 
159
165
  self.session_id = (
160
- getattr(self, "session_id", None) or f"{topic}:{date.today().isoformat()}"
166
+ session_id
167
+ or getattr(self, "session_id", None)
168
+ or f"{topic}:{date.today().isoformat()}"
161
169
  )
162
170
 
163
- self.memory = Memory.from_defaults(
164
- session_id=self.session_id, token_limit=65536
171
+ chat_store = SimpleChatStore()
172
+ self.memory = ChatMemoryBuffer.from_defaults(
173
+ chat_store=chat_store,
174
+ chat_store_key=self.session_id,
175
+ token_limit=65536
165
176
  )
166
177
  if chat_history:
167
- from llama_index.core.llms import ChatMessage
168
-
169
178
  msgs = []
170
179
  for u, a in chat_history:
171
180
  msgs.append(ChatMessage.from_str(u, role=MessageRole.USER))
@@ -184,6 +193,12 @@ class Agent:
184
193
  logger.warning(f"Failed to set up observer ({e}), ignoring")
185
194
  self.observability_enabled = False
186
195
 
196
+ # VHC state tracking
197
+ self._vhc_cache = {} # Cache VHC results by query hash
198
+ self._last_query = None
199
+ self._last_response = None
200
+ self._current_tool_outputs = [] # Store tool outputs from current query for VHC
201
+
187
202
  @property
188
203
  def llm(self):
189
204
  """Lazy-loads the LLM."""
@@ -209,7 +224,7 @@ class Agent:
209
224
 
210
225
  def _create_agent(
211
226
  self, config: AgentConfig, llm_callback_manager: "CallbackManager"
212
- ) -> Union["BaseAgent", "AgentRunner"]:
227
+ ) -> "BaseWorkflowAgent":
213
228
  """
214
229
  Creates the agent based on the configuration object.
215
230
 
@@ -218,7 +233,7 @@ class Agent:
218
233
  llm_callback_manager: The callback manager for the agent's llm.
219
234
 
220
235
  Returns:
221
- Union[BaseAgent, AgentRunner]: The configured agent object.
236
+ BaseWorkflowAgent: The configured agent object.
222
237
  """
223
238
  # Use the same LLM instance for consistency
224
239
  llm = (
@@ -241,12 +256,11 @@ class Agent:
241
256
  )
242
257
 
243
258
  def clear_memory(self) -> None:
244
- """Clear the agent's memory."""
259
+ """Clear the agent's memory and reset agent instances to ensure consistency."""
245
260
  self.memory.reset()
246
- if getattr(self, "_agent", None):
247
- self._agent.memory = self.memory
248
- if getattr(self, "_fallback_agent", None):
249
- self._fallback_agent.memory = self.memory
261
+ # Clear agent instances so they get recreated with the cleared memory
262
+ self._agent = None
263
+ self._fallback_agent = None
250
264
 
251
265
  def __eq__(self, other):
252
266
  if not isinstance(other, Agent):
@@ -320,6 +334,7 @@ class Agent:
320
334
  chat_history: Optional[list[Tuple[str, str]]] = None,
321
335
  workflow_cls: Optional["Workflow"] = None,
322
336
  workflow_timeout: int = 120,
337
+ session_id: Optional[str] = None,
323
338
  ) -> "Agent":
324
339
  """
325
340
  Create an agent from tools, agent type, and language model.
@@ -339,6 +354,8 @@ class Agent:
339
354
  Defaults to False.
340
355
  workflow_cls (Workflow, optional): The workflow class to be used with run(). Defaults to None.
341
356
  workflow_timeout (int, optional): The timeout for the workflow in seconds. Defaults to 120.
357
+ session_id (str, optional): The session ID for memory persistence.
358
+ If None, auto-generates from topic and date. Defaults to None.
342
359
 
343
360
  Returns:
344
361
  Agent: An instance of the Agent class.
@@ -356,6 +373,7 @@ class Agent:
356
373
  fallback_agent_config=fallback_agent_config,
357
374
  workflow_cls=workflow_cls,
358
375
  workflow_timeout=workflow_timeout,
376
+ session_id=session_id,
359
377
  )
360
378
 
361
379
  @classmethod
@@ -400,8 +418,18 @@ class Agent:
400
418
  vectara_presence_penalty: Optional[float] = None,
401
419
  vectara_save_history: bool = True,
402
420
  return_direct: bool = False,
421
+ session_id: Optional[str] = None,
403
422
  ) -> "Agent":
404
- """Create an agent from a single Vectara corpus using the factory function."""
423
+ """Create an agent from a single Vectara corpus using the factory function.
424
+
425
+ Args:
426
+ tool_name (str): Name of the tool to be created.
427
+ data_description (str): Description of the data/corpus.
428
+ assistant_specialty (str): The specialty/topic of the assistant.
429
+ session_id (str, optional): The session ID for memory persistence.
430
+ If None, auto-generates from topic and date. Defaults to None.
431
+ ... (other parameters as documented in factory function)
432
+ """
405
433
  # Use the factory function to avoid code duplication
406
434
  config = create_agent_from_corpus(
407
435
  tool_name=tool_name,
@@ -444,6 +472,7 @@ class Agent:
444
472
  chat_history=chat_history,
445
473
  agent_progress_callback=agent_progress_callback,
446
474
  query_logging_callback=query_logging_callback,
475
+ session_id=session_id,
447
476
  **config,
448
477
  )
449
478
 
@@ -451,11 +480,16 @@ class Agent:
451
480
  """
452
481
  Switch the configuration type of the agent.
453
482
  This function is called automatically to switch the agent configuration if the current configuration fails.
483
+ Ensures memory consistency by clearing agent instances so they are recreated with current memory.
454
484
  """
455
485
  if self.agent_config_type == AgentConfigType.DEFAULT:
456
486
  self.agent_config_type = AgentConfigType.FALLBACK
487
+ # Clear the fallback agent so it gets recreated with current memory
488
+ self._fallback_agent = None
457
489
  else:
458
490
  self.agent_config_type = AgentConfigType.DEFAULT
491
+ # Clear the main agent so it gets recreated with current memory
492
+ self._agent = None
459
493
 
460
494
  def report(self, detailed: bool = False) -> None:
461
495
  """
@@ -501,19 +535,6 @@ class Agent:
501
535
  else self.fallback_agent_config.agent_type
502
536
  )
503
537
 
504
- async def _aformat_for_lats(self, prompt, agent_response):
505
- llm_prompt = f"""
506
- Given the question '{prompt}', and agent response '{agent_response.response}',
507
- Please provide a well formatted final response to the query.
508
- final response:
509
- """
510
- agent_type = self._get_current_agent_type()
511
- if agent_type != AgentType.LATS:
512
- return
513
-
514
- agent = self._get_current_agent()
515
- agent_response.response = (await agent.llm.acomplete(llm_prompt)).text
516
-
517
538
  def chat(self, prompt: str) -> AgentResponse:
518
539
  """
519
540
  Interact with the agent using a chat prompt.
@@ -562,9 +583,12 @@ class Agent:
562
583
  ]:
563
584
  from llama_index.core.workflow import Context
564
585
 
586
+ # Create context and pass memory to the workflow agent
587
+ # According to LlamaIndex docs, we should let the workflow manage memory internally
565
588
  ctx = Context(current_agent)
589
+
566
590
  handler = current_agent.run(
567
- user_msg=prompt, ctx=ctx, memory=self.memory
591
+ user_msg=prompt, memory=self.memory, ctx=ctx
568
592
  )
569
593
 
570
594
  # Listen to workflow events if progress callback is set
@@ -698,6 +722,27 @@ class Agent:
698
722
  response=response_text, metadata=getattr(result, "metadata", {})
699
723
  )
700
724
 
725
+ # Retrieve updated memory from workflow context
726
+ # According to LlamaIndex docs, workflow agents manage memory internally
727
+ # and we can access it via ctx.store.get("memory")
728
+ try:
729
+ workflow_memory = await ctx.store.get("memory")
730
+ if workflow_memory:
731
+ # Update our external memory with the workflow's memory
732
+ self.memory = workflow_memory
733
+ except Exception as e:
734
+ # If we can't retrieve workflow memory, fall back to manual management
735
+ warning_msg = (
736
+ f"Could not retrieve workflow memory, falling back to "
737
+ f"manual management: {e}"
738
+ )
739
+ logger.warning(warning_msg)
740
+ user_msg = ChatMessage.from_str(prompt, role=MessageRole.USER)
741
+ assistant_msg = ChatMessage.from_str(
742
+ response_text, role=MessageRole.ASSISTANT
743
+ )
744
+ self.memory.put_messages([user_msg, assistant_msg])
745
+
701
746
  # Standard chat interaction for other agent types
702
747
  else:
703
748
  agent_response = await current_agent.achat(prompt)
@@ -713,7 +758,9 @@ class Agent:
713
758
  except Exception as e:
714
759
  last_error = e
715
760
  if self.verbose:
716
- logger.warning(f"LLM call failed on attempt {attempt}. " f"Error: {e}.")
761
+ logger.warning(
762
+ f"LLM call failed on attempt {attempt}. " f"Error: {e}."
763
+ )
717
764
  if attempt >= 2 and self.fallback_agent_config:
718
765
  self._switch_agent_config()
719
766
  await asyncio.sleep(1)
@@ -750,6 +797,9 @@ class Agent:
750
797
  Returns:
751
798
  AgentStreamingResponse: The streaming response from the agent.
752
799
  """
800
+ # Store query for VHC processing and clear previous tool outputs
801
+ self._last_query = prompt
802
+ self._clear_tool_outputs()
753
803
  max_attempts = 4 if self.fallback_agent_config else 2
754
804
  attempt = 0
755
805
  orig_llm = self.llm.metadata.model_name
@@ -763,9 +813,12 @@ class Agent:
763
813
  if self._get_current_agent_type() == AgentType.FUNCTION_CALLING:
764
814
  from llama_index.core.workflow import Context
765
815
 
816
+ # Create context and pass memory to the workflow agent
817
+ # According to LlamaIndex docs, we should let the workflow manage memory internally
766
818
  ctx = Context(current_agent)
819
+
767
820
  handler = current_agent.run(
768
- user_msg=prompt, ctx=ctx, memory=self.memory
821
+ user_msg=prompt, memory=self.memory, ctx=ctx
769
822
  )
770
823
 
771
824
  # Use the dedicated FunctionCallingStreamHandler
@@ -809,6 +862,134 @@ class Agent:
809
862
  f"{max_attempts} attempts ({last_error})."
810
863
  )
811
864
 
865
+ def _clear_tool_outputs(self):
866
+ """Clear stored tool outputs at the start of a new query."""
867
+ self._current_tool_outputs.clear()
868
+ logging.info("🔧 [TOOL_STORAGE] Cleared stored tool outputs for new query")
869
+
870
+ def _add_tool_output(self, tool_name: str, content: str):
871
+ """Add a tool output to the current collection for VHC."""
872
+ tool_output = {
873
+ 'status_type': 'TOOL_OUTPUT',
874
+ 'content': content,
875
+ 'tool_name': tool_name
876
+ }
877
+ self._current_tool_outputs.append(tool_output)
878
+ logging.info(f"🔧 [TOOL_STORAGE] Added tool output from '{tool_name}': {len(content)} chars")
879
+
880
+ def _get_stored_tool_outputs(self) -> List[dict]:
881
+ """Get the stored tool outputs from the current query."""
882
+ logging.info(f"🔧 [TOOL_STORAGE] Retrieved {len(self._current_tool_outputs)} stored tool outputs")
883
+ return self._current_tool_outputs.copy()
884
+
885
+ async def acompute_vhc(self) -> Dict[str, Any]:
886
+ """
887
+ Compute VHC for the last query/response pair (async version).
888
+ Results are cached for subsequent calls. Tool outputs are automatically
889
+ collected during streaming and used internally.
890
+
891
+ Returns:
892
+ Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
893
+ """
894
+ logging.info(
895
+ f"🔍🔍🔍 [VHC_AGENT_ENTRY] UNIQUE_DEBUG_MESSAGE acompute_vhc method called - "
896
+ f"stored_tool_outputs_count={len(self._current_tool_outputs)}"
897
+ )
898
+ logging.info(
899
+ f"🔍🔍🔍 [VHC_AGENT_ENTRY] _last_query: {'set' if self._last_query else 'None'}"
900
+ )
901
+
902
+ if not self._last_query:
903
+ logging.info("🔍 [VHC_AGENT] Returning early - no _last_query")
904
+ return {"corrected_text": None, "corrections": []}
905
+
906
+ # For VHC to work, we need the response text from memory
907
+ # Get the latest assistant response from memory
908
+ messages = self.memory.get()
909
+ logging.info(
910
+ f"🔍 [VHC_AGENT] memory.get() returned {len(messages) if messages else 0} messages"
911
+ )
912
+
913
+ if not messages:
914
+ logging.info("🔍 [VHC_AGENT] Returning early - no messages in memory")
915
+ return {"corrected_text": None, "corrections": []}
916
+
917
+ # Find the last assistant message
918
+ last_response = None
919
+ for msg in reversed(messages):
920
+ if msg.role == MessageRole.ASSISTANT:
921
+ last_response = msg.content
922
+ break
923
+
924
+ logging.info(
925
+ f"🔍 [VHC_AGENT] Found last_response: {'set' if last_response else 'None'}"
926
+ )
927
+
928
+ if not last_response:
929
+ logging.info("🔍 [VHC_AGENT] Returning early - no last assistant response found")
930
+ return {"corrected_text": None, "corrections": []}
931
+
932
+ # Update stored response for caching
933
+ self._last_response = last_response
934
+
935
+ # Create cache key from query + response
936
+ cache_key = hash(f"{self._last_query}:{self._last_response}")
937
+
938
+ # Return cached results if available
939
+ if cache_key in self._vhc_cache:
940
+ return self._vhc_cache[cache_key]
941
+
942
+ # Check if we have VHC API key
943
+ logging.info(
944
+ f"🔍 [VHC_AGENT] acompute_vhc called with vectara_api_key={'set' if self.vectara_api_key else 'None'}"
945
+ )
946
+ if not self.vectara_api_key:
947
+ logging.info("🔍 [VHC_AGENT] No vectara_api_key - returning early with None")
948
+ return {"corrected_text": None, "corrections": []}
949
+
950
+ # Compute VHC using existing library function
951
+ from .agent_core.utils.hallucination import analyze_hallucinations
952
+
953
+ try:
954
+ # Use stored tool outputs from current query
955
+ stored_tool_outputs = self._get_stored_tool_outputs()
956
+ logging.info(f"🔧 [VHC_AGENT] Using {len(stored_tool_outputs)} stored tool outputs for VHC")
957
+
958
+ corrected_text, corrections = analyze_hallucinations(
959
+ query=self._last_query,
960
+ chat_history=self.memory.get(),
961
+ agent_response=self._last_response,
962
+ tools=self.tools,
963
+ vectara_api_key=self.vectara_api_key,
964
+ tool_outputs=stored_tool_outputs,
965
+ )
966
+
967
+ # Cache results
968
+ results = {"corrected_text": corrected_text, "corrections": corrections}
969
+ self._vhc_cache[cache_key] = results
970
+
971
+ return results
972
+
973
+ except Exception as e:
974
+ logger.error(f"VHC computation failed: {e}")
975
+ return {"corrected_text": None, "corrections": []}
976
+
977
+ def compute_vhc(self) -> Dict[str, Any]:
978
+ """
979
+ Compute VHC for the last query/response pair (sync version).
980
+ Results are cached for subsequent calls. Tool outputs are automatically
981
+ collected during streaming and used internally.
982
+
983
+ Returns:
984
+ Dict[str, Any]: Dictionary containing 'corrected_text' and 'corrections'
985
+ """
986
+ try:
987
+ loop = asyncio.get_event_loop()
988
+ return loop.run_until_complete(self.acompute_vhc())
989
+ except RuntimeError:
990
+ # No event loop running, create a new one
991
+ return asyncio.run(self.acompute_vhc())
992
+
812
993
  #
813
994
  # run() method for running a workflow
814
995
  # workflow will always get these arguments in the StartEvent: agent, tools, llm, verbose
@@ -882,7 +1063,9 @@ class Agent:
882
1063
  input_dict[key] = value
883
1064
  output = outputs_model_on_fail_cls.model_validate(input_dict)
884
1065
  else:
885
- logger.warning(f"Vectara Agentic: Workflow failed with unexpected error: {e}")
1066
+ logger.warning(
1067
+ f"Vectara Agentic: Workflow failed with unexpected error: {e}"
1068
+ )
886
1069
  raise type(e)(str(e)).with_traceback(e.__traceback__)
887
1070
 
888
1071
  return output
@@ -923,3 +1106,29 @@ class Agent:
923
1106
  return deserialize_agent_from_dict(
924
1107
  cls, data, agent_progress_callback, query_logging_callback
925
1108
  )
1109
+
1110
+ def cleanup(self) -> None:
1111
+ """Clean up resources used by the agent."""
1112
+ from ._observability import shutdown_observer
1113
+
1114
+ if hasattr(self, 'agent') and hasattr(self.agent, '_llm'):
1115
+ llm = self.agent._llm
1116
+ if hasattr(llm, 'client') and hasattr(llm.client, 'close'):
1117
+ try:
1118
+ if asyncio.iscoroutinefunction(llm.client.close):
1119
+ asyncio.run(llm.client.close())
1120
+ else:
1121
+ llm.client.close()
1122
+ except Exception:
1123
+ pass
1124
+
1125
+ # Shutdown observability connections
1126
+ shutdown_observer()
1127
+
1128
+ def __enter__(self):
1129
+ """Context manager entry."""
1130
+ return self
1131
+
1132
+ def __exit__(self, exc_type, exc_val, exc_tb):
1133
+ """Context manager exit with cleanup."""
1134
+ self.cleanup()