vectara-agentic 0.4.4__tar.gz → 0.4.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {vectara_agentic-0.4.4/vectara_agentic.egg-info → vectara_agentic-0.4.10}/PKG-INFO +39 -36
  2. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/README.md +5 -4
  3. vectara_agentic-0.4.10/requirements.txt +46 -0
  4. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/benchmark_models.py +12 -12
  5. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/endpoint.py +27 -2
  6. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_agent.py +4 -3
  7. vectara_agentic-0.4.10/tests/test_bedrock.py +170 -0
  8. vectara_agentic-0.4.10/tests/test_gemini.py +147 -0
  9. vectara_agentic-0.4.10/tests/test_groq.py +193 -0
  10. vectara_agentic-0.4.10/tests/test_openai.py +261 -0
  11. vectara_agentic-0.4.10/tests/test_private_llm.py +161 -0
  12. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_react_streaming.py +26 -2
  13. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_together.py +32 -0
  14. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_tools.py +161 -0
  15. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/_version.py +1 -1
  16. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent.py +19 -30
  17. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_config.py +5 -0
  18. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/factory.py +11 -4
  19. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/prompts.py +72 -16
  20. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/serialization.py +3 -3
  21. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/streaming.py +176 -198
  22. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/hallucination.py +33 -1
  23. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/tools.py +19 -11
  24. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/db_tools.py +4 -0
  25. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/llm_utils.py +84 -14
  26. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/sub_query_workflow.py +31 -31
  27. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/tools.py +110 -5
  28. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/utils.py +35 -10
  29. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10/vectara_agentic.egg-info}/PKG-INFO +39 -36
  30. vectara_agentic-0.4.10/vectara_agentic.egg-info/requires.txt +46 -0
  31. vectara_agentic-0.4.4/requirements.txt +0 -44
  32. vectara_agentic-0.4.4/tests/test_bedrock.py +0 -69
  33. vectara_agentic-0.4.4/tests/test_gemini.py +0 -57
  34. vectara_agentic-0.4.4/tests/test_groq.py +0 -103
  35. vectara_agentic-0.4.4/tests/test_openai.py +0 -160
  36. vectara_agentic-0.4.4/tests/test_private_llm.py +0 -90
  37. vectara_agentic-0.4.4/vectara_agentic.egg-info/requires.txt +0 -44
  38. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/LICENSE +0 -0
  39. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/MANIFEST.in +0 -0
  40. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/setup.cfg +0 -0
  41. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/setup.py +0 -0
  42. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/__init__.py +0 -0
  43. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/conftest.py +0 -0
  44. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/run_tests.py +0 -0
  45. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_agent_fallback_memory.py +0 -0
  46. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_agent_memory_consistency.py +0 -0
  47. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_agent_type.py +0 -0
  48. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_api_endpoint.py +0 -0
  49. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_fallback.py +0 -0
  50. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_react_error_handling.py +0 -0
  51. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_react_memory.py +0 -0
  52. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_react_workflow_events.py +0 -0
  53. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_return_direct.py +0 -0
  54. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_serialization.py +0 -0
  55. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_session_memory.py +0 -0
  56. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_streaming.py +0 -0
  57. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_vectara_llms.py +0 -0
  58. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_vhc.py +0 -0
  59. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/tests/test_workflow.py +0 -0
  60. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/__init__.py +0 -0
  61. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/_callback.py +0 -0
  62. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/_observability.py +0 -0
  63. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/__init__.py +0 -0
  64. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/__init__.py +0 -0
  65. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/logging.py +0 -0
  66. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/schemas.py +0 -0
  67. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/agent_endpoint.py +0 -0
  68. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/tool_utils.py +0 -0
  69. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/tools_catalog.py +0 -0
  70. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic/types.py +0 -0
  71. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/SOURCES.txt +0 -0
  72. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/dependency_links.txt +0 -0
  73. {vectara_agentic-0.4.4 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.4.4
3
+ Version: 0.4.10
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -16,41 +16,43 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: llama-index==0.13.3
20
- Requires-Dist: llama-index-core==0.13.3
21
- Requires-Dist: llama-index-workflows==1.3.0
22
- Requires-Dist: llama-index-cli==0.5.0
23
- Requires-Dist: llama-index-indices-managed-vectara==0.5.0
24
- Requires-Dist: llama-index-llms-openai==0.5.4
25
- Requires-Dist: llama-index-llms-openai-like==0.5.0
26
- Requires-Dist: llama-index-llms-anthropic==0.8.5
27
- Requires-Dist: llama-index-llms-together==0.4.0
28
- Requires-Dist: llama-index-llms-groq==0.4.0
29
- Requires-Dist: llama-index-llms-cohere==0.6.0
30
- Requires-Dist: llama-index-llms-google-genai==0.3.0
31
- Requires-Dist: google_genai>=1.31.0
32
- Requires-Dist: llama-index-llms-bedrock-converse==0.8.2
33
- Requires-Dist: llama-index-tools-yahoo-finance==0.4.0
34
- Requires-Dist: llama-index-tools-arxiv==0.4.0
35
- Requires-Dist: llama-index-tools-database==0.4.0
36
- Requires-Dist: llama-index-tools-google==0.6.0
37
- Requires-Dist: llama-index-tools-tavily_research==0.4.0
38
- Requires-Dist: llama_index.tools.brave_search==0.4.0
39
- Requires-Dist: llama-index-tools-neo4j==0.4.0
40
- Requires-Dist: llama-index-tools-waii==0.4.0
41
- Requires-Dist: llama-index-graph-stores-kuzu==0.9.0
42
- Requires-Dist: llama-index-tools-salesforce==0.4.0
43
- Requires-Dist: llama-index-tools-slack==0.4.0
44
- Requires-Dist: llama-index-tools-exa==0.4.0
45
- Requires-Dist: llama-index-tools-wikipedia==0.4.0
46
- Requires-Dist: llama-index-tools-bing-search==0.4.0
19
+ Requires-Dist: llama-index==0.14.7
20
+ Requires-Dist: llama-index-core==0.14.7
21
+ Requires-Dist: llama-index-workflows==2.10.3
22
+ Requires-Dist: llama-index-cli==0.5.3
23
+ Requires-Dist: llama-index-indices-managed-vectara==0.5.1
24
+ Requires-Dist: llama-index-llms-openai==0.6.7
25
+ Requires-Dist: llama-index-llms-openai-like==0.5.3
26
+ Requires-Dist: llama-index-llms-anthropic==0.9.7
27
+ Requires-Dist: llama-index-llms-together==0.4.1
28
+ Requires-Dist: llama-index-llms-groq==0.4.1
29
+ Requires-Dist: llama-index-llms-cohere==0.6.1
30
+ Requires-Dist: llama-index-llms-google-genai==0.7.1
31
+ Requires-Dist: google_genai==1.48.0
32
+ Requires-Dist: llama-index-llms-bedrock-converse==0.11.0
33
+ Requires-Dist: llama-index-tools-yahoo-finance==0.4.1
34
+ Requires-Dist: llama-index-tools-arxiv==0.4.1
35
+ Requires-Dist: llama-index-tools-database==0.4.1
36
+ Requires-Dist: llama-index-tools-google==0.6.2
37
+ Requires-Dist: llama-index-tools-tavily_research==0.4.1
38
+ Requires-Dist: llama_index.tools.brave_search==0.4.1
39
+ Requires-Dist: llama-index-tools-neo4j==0.4.1
40
+ Requires-Dist: llama-index-tools-waii==0.4.1
41
+ Requires-Dist: llama-index-graph-stores-kuzu==0.9.1
42
+ Requires-Dist: llama-index-tools-salesforce==0.4.1
43
+ Requires-Dist: llama-index-tools-slack==0.4.1
44
+ Requires-Dist: llama-index-tools-exa==0.4.1
45
+ Requires-Dist: llama-index-tools-wikipedia==0.4.1
46
+ Requires-Dist: llama-index-tools-bing-search==0.4.1
47
47
  Requires-Dist: openai>=1.99.3
48
48
  Requires-Dist: tavily-python>=0.7.10
49
49
  Requires-Dist: exa-py>=1.14.20
50
- Requires-Dist: openinference-instrumentation-llama-index==4.3.4
50
+ Requires-Dist: openinference-instrumentation-llama-index==4.3.8
51
51
  Requires-Dist: opentelemetry-proto>=1.31.0
52
- Requires-Dist: arize-phoenix==10.9.1
53
- Requires-Dist: arize-phoenix-otel==0.10.3
52
+ Requires-Dist: arize-phoenix==12.9.0
53
+ Requires-Dist: arize-phoenix-otel==0.13.1
54
+ Requires-Dist: arize-phoenix-client==1.21.0
55
+ Requires-Dist: arize-phoenix-evals==2.5.0
54
56
  Requires-Dist: protobuf==5.29.5
55
57
  Requires-Dist: tokenizers>=0.20
56
58
  Requires-Dist: pydantic>=2.11.5
@@ -736,13 +738,13 @@ If you want to use `agent`, `tools`, `llm` or `verbose` in other events (that ar
736
738
  the `Context` of the Workflow as follows:
737
739
 
738
740
  ```python
739
- await ctx.set("agent", ev.agent)
741
+ await ctx.store.set("agent", ev.agent)
740
742
  ```
741
743
 
742
744
  and then in any other event you can pull that agent object with
743
745
 
744
746
  ```python
745
- agent = await ctx.get("agent")
747
+ agent = await ctx.store.get("agent")
746
748
  ```
747
749
 
748
750
  Similarly you can reuse the `llm`, `tools` or `verbose` arguments within other nodes in the workflow.
@@ -868,7 +870,7 @@ from vectara_agentic import AgentConfig, AgentType, ModelProvider
868
870
  agent_config = AgentConfig(
869
871
  agent_type = AgentType.REACT,
870
872
  main_llm_provider = ModelProvider.ANTHROPIC,
871
- main_llm_model_name = 'claude-3-5-sonnet-20241022',
873
+ main_llm_model_name = 'claude-4-5-sonnet',
872
874
  tool_llm_provider = ModelProvider.TOGETHER,
873
875
  tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
874
876
  )
@@ -886,7 +888,7 @@ The `AgentConfig` object may include the following items:
886
888
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
887
889
 
888
890
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
889
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
891
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
890
892
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
891
893
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
892
894
 
@@ -913,6 +915,7 @@ config = AgentConfig(
913
915
  main_llm_model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
914
916
  private_llm_api_base="http://vllm-server.company.com/v1",
915
917
  private_llm_api_key="TEST_API_KEY",
918
+ private_llm_max_tokens=8192, # Optional: set max output tokens for your private LLM
916
919
  )
917
920
 
918
921
  agent = Agent(
@@ -661,13 +661,13 @@ If you want to use `agent`, `tools`, `llm` or `verbose` in other events (that ar
661
661
  the `Context` of the Workflow as follows:
662
662
 
663
663
  ```python
664
- await ctx.set("agent", ev.agent)
664
+ await ctx.store.set("agent", ev.agent)
665
665
  ```
666
666
 
667
667
  and then in any other event you can pull that agent object with
668
668
 
669
669
  ```python
670
- agent = await ctx.get("agent")
670
+ agent = await ctx.store.get("agent")
671
671
  ```
672
672
 
673
673
  Similarly you can reuse the `llm`, `tools` or `verbose` arguments within other nodes in the workflow.
@@ -793,7 +793,7 @@ from vectara_agentic import AgentConfig, AgentType, ModelProvider
793
793
  agent_config = AgentConfig(
794
794
  agent_type = AgentType.REACT,
795
795
  main_llm_provider = ModelProvider.ANTHROPIC,
796
- main_llm_model_name = 'claude-3-5-sonnet-20241022',
796
+ main_llm_model_name = 'claude-4-5-sonnet',
797
797
  tool_llm_provider = ModelProvider.TOGETHER,
798
798
  tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
799
799
  )
@@ -811,7 +811,7 @@ The `AgentConfig` object may include the following items:
811
811
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
812
812
 
813
813
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
814
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
814
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
815
815
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
816
816
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
817
817
 
@@ -838,6 +838,7 @@ config = AgentConfig(
838
838
  main_llm_model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
839
839
  private_llm_api_base="http://vllm-server.company.com/v1",
840
840
  private_llm_api_key="TEST_API_KEY",
841
+ private_llm_max_tokens=8192, # Optional: set max output tokens for your private LLM
841
842
  )
842
843
 
843
844
  agent = Agent(
@@ -0,0 +1,46 @@
1
+ llama-index==0.14.7
2
+ llama-index-core==0.14.7
3
+ llama-index-workflows==2.10.3
4
+ llama-index-cli==0.5.3
5
+ llama-index-indices-managed-vectara==0.5.1
6
+ llama-index-llms-openai==0.6.7
7
+ llama-index-llms-openai-like==0.5.3
8
+ llama-index-llms-anthropic==0.9.7
9
+ llama-index-llms-together==0.4.1
10
+ llama-index-llms-groq==0.4.1
11
+ llama-index-llms-cohere==0.6.1
12
+ llama-index-llms-google-genai==0.7.1
13
+ google_genai==1.48.0
14
+ llama-index-llms-bedrock-converse==0.11.0
15
+ llama-index-tools-yahoo-finance==0.4.1
16
+ llama-index-tools-arxiv==0.4.1
17
+ llama-index-tools-database==0.4.1
18
+ llama-index-tools-google==0.6.2
19
+ llama-index-tools-tavily_research==0.4.1
20
+ llama_index.tools.brave_search==0.4.1
21
+ llama-index-tools-neo4j==0.4.1
22
+ llama-index-tools-waii==0.4.1
23
+ llama-index-graph-stores-kuzu==0.9.1
24
+ llama-index-tools-salesforce==0.4.1
25
+ llama-index-tools-slack==0.4.1
26
+ llama-index-tools-exa==0.4.1
27
+ llama-index-tools-wikipedia==0.4.1
28
+ llama-index-tools-bing-search==0.4.1
29
+ openai>=1.99.3
30
+ tavily-python>=0.7.10
31
+ exa-py>=1.14.20
32
+ openinference-instrumentation-llama-index==4.3.8
33
+ opentelemetry-proto>=1.31.0
34
+ arize-phoenix==12.9.0
35
+ arize-phoenix-otel==0.13.1
36
+ arize-phoenix-client==1.21.0
37
+ arize-phoenix-evals==2.5.0
38
+ protobuf==5.29.5
39
+ tokenizers>=0.20
40
+ pydantic>=2.11.5
41
+ pandas==2.2.3
42
+ retrying==1.4.2
43
+ python-dotenv==1.0.1
44
+ cloudpickle>=3.1.1
45
+ httpx==0.28.1
46
+ commonmark==0.9.1
@@ -68,7 +68,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
68
68
  missing_keys.append(key)
69
69
 
70
70
  if missing_keys:
71
- print("ERROR: Missing required API keys for benchmark execution:")
71
+ print("ERROR: Missing required API keys for benchmark execution:")
72
72
  print()
73
73
  for key in sorted(missing_keys):
74
74
  print(f" • {key}")
@@ -83,7 +83,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
83
83
 
84
84
  sys.exit(1)
85
85
 
86
- print("All required API keys are present")
86
+ print("All required API keys are present")
87
87
  print(f"Found API keys for {len(required_keys)} required environment variables")
88
88
 
89
89
 
@@ -135,7 +135,7 @@ class ModelBenchmark:
135
135
  {"provider": ModelProvider.OPENAI, "model": "gpt-5-mini"},
136
136
  {"provider": ModelProvider.OPENAI, "model": "gpt-4o-mini"},
137
137
  {"provider": ModelProvider.OPENAI, "model": "gpt-4.1-mini"},
138
- {"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-20250514"},
138
+ {"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-5"},
139
139
  {"provider": ModelProvider.TOGETHER, "model": "deepseek-ai/DeepSeek-V3"},
140
140
  {"provider": ModelProvider.GROQ, "model": "openai/gpt-oss-20b"},
141
141
  {"provider": ModelProvider.GEMINI, "model": "models/gemini-2.5-flash-lite"},
@@ -817,11 +817,11 @@ class ModelBenchmark:
817
817
  observability_setup = setup_observer(dummy_config, verbose=True)
818
818
  if observability_setup:
819
819
  print(
820
- "Arize Phoenix observability enabled - LLM calls will be traced\n"
820
+ "Arize Phoenix observability enabled - LLM calls will be traced\n"
821
821
  )
822
822
  _observability_initialized = True
823
823
  else:
824
- print("⚠️ Arize Phoenix observability setup failed\n")
824
+ print("Arize Phoenix observability setup failed\n")
825
825
 
826
826
  # Create semaphore to limit concurrent model testing
827
827
  model_semaphore = asyncio.Semaphore(self.max_concurrent_models)
@@ -835,7 +835,7 @@ class ModelBenchmark:
835
835
  tasks.append(task)
836
836
 
837
837
  # Execute all model benchmarks in parallel
838
- print("🚀 Starting parallel benchmark execution...\n")
838
+ print("Starting parallel benchmark execution...\n")
839
839
  await asyncio.gather(*tasks, return_exceptions=True)
840
840
 
841
841
  async def _run_model_benchmark(
@@ -857,9 +857,9 @@ class ModelBenchmark:
857
857
  provider, model_name, test_name, test_config
858
858
  )
859
859
  except Exception as e:
860
- print(f"Error in {model_name} - {test_name}: {e}")
860
+ print(f"Error in {model_name} - {test_name}: {e}")
861
861
 
862
- print(f"Completed: {provider.value} - {model_name}")
862
+ print(f"Completed: {provider.value} - {model_name}")
863
863
 
864
864
  async def _run_scenario_benchmark(
865
865
  self,
@@ -892,18 +892,18 @@ class ModelBenchmark:
892
892
 
893
893
  if result.error:
894
894
  print(
895
- f"{model_name}/{test_name} Iteration {iteration_num}: {result.error}"
895
+ f"{model_name}/{test_name} Iteration {iteration_num}: {result.error}"
896
896
  )
897
897
  else:
898
898
  print(
899
- f"{model_name}/{test_name} Iteration {iteration_num}: "
899
+ f"{model_name}/{test_name} Iteration {iteration_num}: "
900
900
  f"{result.total_response_time:.2f}s, "
901
901
  f"first token: {result.first_token_latency:.2f}s, "
902
902
  f"{result.tokens_per_second:.1f} chars/sec"
903
903
  )
904
904
 
905
905
  except Exception as e:
906
- print(f"{model_name}/{test_name} Iteration {iteration_num}: {e}")
906
+ print(f"{model_name}/{test_name} Iteration {iteration_num}: {e}")
907
907
  # Create error result
908
908
  error_result = BenchmarkResult(
909
909
  model_name=model_name,
@@ -929,7 +929,7 @@ class ModelBenchmark:
929
929
  successful = len([r for r in iteration_results if r.error is None])
930
930
  success_rate = (successful / len(iteration_results)) * 100
931
931
  print(
932
- f" 📊 {model_name}/{test_name} complete: {successful}/{len(iteration_results)} successful ({success_rate:.1f}%)"
932
+ f"{model_name}/{test_name} complete: {successful}/{len(iteration_results)} successful ({success_rate:.1f}%)"
933
933
  )
934
934
 
935
935
  return iteration_results
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  import json
3
3
  import logging
4
+ import os
4
5
  from functools import wraps
5
6
  from flask import Flask, request, Response, jsonify
6
7
  from openai import OpenAI
@@ -16,6 +17,15 @@ werkzeug_log.setLevel(logging.ERROR)
16
17
  # Load expected API key from environment (fallback for testing)
17
18
  EXPECTED_API_KEY = "TEST_API_KEY"
18
19
 
20
+ # Together.AI models that should be routed to Together.AI
21
+ TOGETHER_MODELS = [
22
+ "openai/gpt-oss-120b",
23
+ "openai/gpt-oss-20b",
24
+ "deepseek-ai/DeepSeek-V3",
25
+ "deepseek-ai/DeepSeek-V3.1",
26
+ "deepseek-ai/DeepSeek-R1",
27
+ ]
28
+
19
29
 
20
30
  # Authentication decorator
21
31
  def require_api_key(f):
@@ -41,7 +51,8 @@ def log_request_info():
41
51
  @require_api_key
42
52
  def chat_completions():
43
53
  """
44
- Proxy endpoint for OpenAI Chat Completions.
54
+ Proxy endpoint for OpenAI-compatible Chat Completions.
55
+ Routes to different backends based on model name.
45
56
  Supports both streaming and non-streaming modes.
46
57
  """
47
58
  try:
@@ -49,7 +60,21 @@ def chat_completions():
49
60
  except Exception:
50
61
  return jsonify({"error": "Invalid JSON payload"}), 400
51
62
 
52
- client = OpenAI()
63
+ # Determine which client to use based on model name
64
+ model_name = data.get("model", "")
65
+ if model_name in TOGETHER_MODELS:
66
+ # Route to Together.AI
67
+ together_api_key = os.getenv("TOGETHER_API_KEY")
68
+ if not together_api_key:
69
+ return jsonify({"error": "TOGETHER_API_KEY environment variable not set"}), 500
70
+ client = OpenAI(
71
+ api_key=together_api_key,
72
+ base_url="https://api.together.xyz/v1"
73
+ )
74
+ else:
75
+ # Default to OpenAI
76
+ client = OpenAI()
77
+
53
78
  is_stream = data.get("stream", False)
54
79
 
55
80
  if is_stream:
@@ -13,7 +13,6 @@ from vectara_agentic.agent_config import AgentConfig
13
13
  from vectara_agentic.types import ModelProvider, ObserverType
14
14
  from vectara_agentic.tools import ToolsFactory
15
15
 
16
- from vectara_agentic.agent_core.prompts import GENERAL_INSTRUCTIONS
17
16
  from conftest import mult, STANDARD_TEST_TOPIC, STANDARD_TEST_INSTRUCTIONS
18
17
 
19
18
 
@@ -54,9 +53,11 @@ class TestAgentPackage(unittest.TestCase):
54
53
  + date.today().strftime("%A, %B %d, %Y")
55
54
  + " with Always do as your mother tells you!"
56
55
  )
56
+ # Test format_prompt with dummy instructions since we're only testing template substitution
57
+ dummy_instructions = "Test instructions"
57
58
  self.assertEqual(
58
59
  format_prompt(
59
- prompt_template, GENERAL_INSTRUCTIONS, topic, custom_instructions
60
+ prompt_template, dummy_instructions, topic, custom_instructions
60
61
  ),
61
62
  expected_output,
62
63
  )
@@ -83,7 +84,7 @@ class TestAgentPackage(unittest.TestCase):
83
84
  config = AgentConfig(
84
85
  agent_type=AgentType.REACT,
85
86
  main_llm_provider=ModelProvider.ANTHROPIC,
86
- main_llm_model_name="claude-sonnet-4-20250514",
87
+ main_llm_model_name="claude-sonnet-4-5",
87
88
  tool_llm_provider=ModelProvider.TOGETHER,
88
89
  tool_llm_model_name="moonshotai/Kimi-K2-Instruct",
89
90
  observer=ObserverType.ARIZE_PHOENIX,
@@ -0,0 +1,170 @@
1
+ # Suppress external dependency warnings before any other imports
2
+ import warnings
3
+
4
+ warnings.simplefilter("ignore", DeprecationWarning)
5
+
6
+ import unittest
7
+ import threading
8
+
9
+ from vectara_agentic.agent import Agent
10
+ from vectara_agentic.tools import ToolsFactory
11
+ from vectara_agentic.tools_catalog import ToolsCatalog
12
+
13
+ import nest_asyncio
14
+
15
+ nest_asyncio.apply()
16
+
17
+ from conftest import (
18
+ mult,
19
+ add,
20
+ fc_config_bedrock,
21
+ STANDARD_TEST_TOPIC,
22
+ STANDARD_TEST_INSTRUCTIONS,
23
+ )
24
+
25
+ ARIZE_LOCK = threading.Lock()
26
+
27
+
28
+ class TestBedrock(unittest.IsolatedAsyncioTestCase):
29
+
30
+ async def test_multiturn(self):
31
+ with ARIZE_LOCK:
32
+ tools = [ToolsFactory().create_tool(mult)]
33
+ agent = Agent(
34
+ tools=tools,
35
+ topic=STANDARD_TEST_TOPIC,
36
+ custom_instructions=STANDARD_TEST_INSTRUCTIONS,
37
+ agent_config=fc_config_bedrock,
38
+ )
39
+
40
+ # First calculation: 5 * 10 = 50
41
+ stream1 = await agent.astream_chat(
42
+ "What is 5 times 10. Only give the answer, nothing else"
43
+ )
44
+ # Consume the stream
45
+ async for chunk in stream1.async_response_gen():
46
+ pass
47
+ _ = await stream1.aget_response()
48
+
49
+ # Second calculation: 3 * 7 = 21
50
+ stream2 = await agent.astream_chat(
51
+ "what is 3 times 7. Only give the answer, nothing else"
52
+ )
53
+ # Consume the stream
54
+ async for chunk in stream2.async_response_gen():
55
+ pass
56
+ _ = await stream2.aget_response()
57
+
58
+ # Final calculation: 50 * 21 = 1050
59
+ stream3 = await agent.astream_chat(
60
+ "multiply the results of the last two questions. Output only the answer."
61
+ )
62
+ # Consume the stream
63
+ async for chunk in stream3.async_response_gen():
64
+ pass
65
+ response3 = await stream3.aget_response()
66
+
67
+ self.assertEqual(response3.response, "1050")
68
+
69
+ async def test_claude_sonnet_4_multi_tool_chain(self):
70
+ """Test Claude Sonnet 4 with complex multi-step reasoning chain using multiple tools via Bedrock."""
71
+ with ARIZE_LOCK:
72
+ # Use Bedrock config (Claude Sonnet 4)
73
+ tools_catalog = ToolsCatalog(fc_config_bedrock)
74
+ tools = [
75
+ ToolsFactory().create_tool(mult),
76
+ ToolsFactory().create_tool(add),
77
+ ToolsFactory().create_tool(tools_catalog.summarize_text),
78
+ ToolsFactory().create_tool(tools_catalog.rephrase_text),
79
+ ]
80
+
81
+ agent = Agent(
82
+ agent_config=fc_config_bedrock,
83
+ tools=tools,
84
+ topic=STANDARD_TEST_TOPIC,
85
+ custom_instructions="You are a mathematical reasoning agent that explains your work step by step.",
86
+ )
87
+
88
+ # Complex multi-step reasoning task
89
+ complex_query = (
90
+ "Perform this calculation step by step: "
91
+ "First multiply 5 by 9, then add 13 to that result, "
92
+ "then multiply the new result by 2. "
93
+ "After getting the final number, summarize the entire mathematical process "
94
+ "with expertise in 'mathematics education', "
95
+ "then rephrase that summary as a 10-year-old would explain it."
96
+ )
97
+
98
+ print("\nStarting Claude Sonnet 4 multi-tool chain test (Bedrock)")
99
+ print(f"Query: {complex_query}")
100
+ print("Streaming response:\n" + "="*50)
101
+
102
+ stream = await agent.astream_chat(complex_query)
103
+
104
+ # Capture streaming deltas and tool calls
105
+ streaming_deltas = []
106
+ tool_calls_made = []
107
+ full_response = ""
108
+
109
+ async for chunk in stream.async_response_gen():
110
+ if chunk and chunk.strip():
111
+ streaming_deltas.append(chunk)
112
+ full_response += chunk
113
+ # Display each streaming delta
114
+ print(f"Delta: {repr(chunk)}")
115
+
116
+ # Track tool calls in the stream
117
+ if "mult" in chunk.lower():
118
+ if "mult" not in [call["tool"] for call in tool_calls_made]:
119
+ tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
120
+ print(f"Tool call detected: mult (#{len(tool_calls_made)})")
121
+ if "add" in chunk.lower():
122
+ if "add" not in [call["tool"] for call in tool_calls_made]:
123
+ tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
124
+ print(f"Tool call detected: add (#{len(tool_calls_made)})")
125
+ if "summarize" in chunk.lower():
126
+ if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
127
+ tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
128
+ print(f"Tool call detected: summarize_text (#{len(tool_calls_made)})")
129
+ if "rephrase" in chunk.lower():
130
+ if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
131
+ tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
132
+ print(f"Tool call detected: rephrase_text (#{len(tool_calls_made)})")
133
+
134
+ response = await stream.aget_response()
135
+
136
+ print("="*50)
137
+ print(f"Streaming completed. Total deltas: {len(streaming_deltas)}")
138
+ print(f"Tool calls made: {[call['tool'] for call in tool_calls_made]}")
139
+ print(f"📄 Final response length: {len(response.response)} chars")
140
+ print(f"Final response: {response.response}")
141
+
142
+ # Validate tool usage sequence
143
+ tools_used = [call["tool"] for call in tool_calls_made]
144
+ print(f"🧪 Tools used in order: {tools_used}")
145
+
146
+ # Check that at least multiplication happened (basic requirement)
147
+ self.assertIn("mult", tools_used, f"Expected multiplication tool to be used. Tools used: {tools_used}")
148
+
149
+ # Check for mathematical results in the full response or streaming deltas
150
+ # Expected: 5*9=45, 45+13=58, 58*2=116
151
+ expected_intermediate_results = ["45", "58", "116"]
152
+ all_text = (full_response + " " + response.response).lower()
153
+ math_results_found = sum(1 for result in expected_intermediate_results
154
+ if result in all_text)
155
+
156
+ print(f"🔢 Mathematical results found: {math_results_found}/3 expected")
157
+ print(f"Full text searched: {all_text[:200]}...")
158
+
159
+ # More lenient assertion - just check that some mathematical progress was made
160
+ self.assertGreaterEqual(math_results_found, 1,
161
+ f"Expected at least 1 mathematical result. Found {math_results_found}. "
162
+ f"Full text: {all_text}")
163
+
164
+ # Verify that streaming actually produced content
165
+ self.assertGreater(len(streaming_deltas), 0, "Expected streaming deltas to be produced")
166
+ self.assertGreater(len(response.response.strip()), 0, "Expected non-empty final response")
167
+
168
+
169
+ if __name__ == "__main__":
170
+ unittest.main()