vectara-agentic 0.4.3__tar.gz → 0.4.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectara_agentic-0.4.3/vectara_agentic.egg-info → vectara_agentic-0.4.10}/PKG-INFO +41 -37
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/README.md +6 -5
- vectara_agentic-0.4.10/requirements.txt +46 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/benchmark_models.py +12 -12
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/endpoint.py +27 -2
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_agent.py +4 -3
- vectara_agentic-0.4.10/tests/test_bedrock.py +170 -0
- vectara_agentic-0.4.10/tests/test_gemini.py +147 -0
- vectara_agentic-0.4.10/tests/test_groq.py +193 -0
- vectara_agentic-0.4.10/tests/test_openai.py +261 -0
- vectara_agentic-0.4.10/tests/test_private_llm.py +161 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_react_streaming.py +26 -2
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_streaming.py +0 -44
- vectara_agentic-0.4.10/tests/test_together.py +168 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_tools.py +161 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/_version.py +1 -1
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent.py +19 -30
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_config.py +5 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/factory.py +11 -4
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/prompts.py +72 -16
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/serialization.py +3 -3
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/streaming.py +176 -198
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/hallucination.py +33 -1
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/tools.py +19 -11
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/db_tools.py +4 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/llm_utils.py +133 -16
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/sub_query_workflow.py +31 -31
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/tools.py +194 -32
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/utils.py +35 -10
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10/vectara_agentic.egg-info}/PKG-INFO +41 -37
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/SOURCES.txt +1 -0
- vectara_agentic-0.4.10/vectara_agentic.egg-info/requires.txt +46 -0
- vectara_agentic-0.4.3/requirements.txt +0 -43
- vectara_agentic-0.4.3/tests/test_bedrock.py +0 -69
- vectara_agentic-0.4.3/tests/test_gemini.py +0 -57
- vectara_agentic-0.4.3/tests/test_groq.py +0 -69
- vectara_agentic-0.4.3/tests/test_private_llm.py +0 -90
- vectara_agentic-0.4.3/tests/test_together.py +0 -70
- vectara_agentic-0.4.3/vectara_agentic.egg-info/requires.txt +0 -43
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/LICENSE +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/MANIFEST.in +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/setup.cfg +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/setup.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/__init__.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/conftest.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/run_tests.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_agent_fallback_memory.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_agent_memory_consistency.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_agent_type.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_api_endpoint.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_fallback.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_react_error_handling.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_react_memory.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_react_workflow_events.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_return_direct.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_serialization.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_session_memory.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_vectara_llms.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_vhc.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/tests/test_workflow.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/__init__.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/_callback.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/_observability.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/__init__.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/__init__.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/logging.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_core/utils/schemas.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/agent_endpoint.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/tool_utils.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/tools_catalog.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic/types.py +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/dependency_links.txt +0 -0
- {vectara_agentic-0.4.3 → vectara_agentic-0.4.10}/vectara_agentic.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vectara_agentic
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.10
|
|
4
4
|
Summary: A Python package for creating AI Assistants and AI Agents with Vectara
|
|
5
5
|
Home-page: https://github.com/vectara/py-vectara-agentic
|
|
6
6
|
Author: Ofer Mendelevitch
|
|
@@ -16,45 +16,48 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
16
16
|
Requires-Python: >=3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: llama-index==0.
|
|
20
|
-
Requires-Dist: llama-index-core==0.
|
|
21
|
-
Requires-Dist: llama-index-workflows==
|
|
22
|
-
Requires-Dist: llama-index-cli==0.5.
|
|
23
|
-
Requires-Dist: llama-index-indices-managed-vectara==0.5.
|
|
24
|
-
Requires-Dist: llama-index-llms-openai==0.
|
|
25
|
-
Requires-Dist: llama-index-llms-openai-like==0.5.
|
|
26
|
-
Requires-Dist: llama-index-llms-anthropic==0.
|
|
27
|
-
Requires-Dist: llama-index-llms-together==0.4.
|
|
28
|
-
Requires-Dist: llama-index-llms-groq==0.4.
|
|
29
|
-
Requires-Dist: llama-index-llms-cohere==0.6.
|
|
30
|
-
Requires-Dist: llama-index-llms-google-genai==0.
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: llama-index-
|
|
33
|
-
Requires-Dist: llama-index-tools-
|
|
34
|
-
Requires-Dist: llama-index-tools-
|
|
35
|
-
Requires-Dist: llama-index-tools-
|
|
36
|
-
Requires-Dist: llama-index-tools-
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: llama-index-tools-
|
|
40
|
-
Requires-Dist: llama-index-
|
|
41
|
-
Requires-Dist: llama-index-
|
|
42
|
-
Requires-Dist: llama-index-tools-
|
|
43
|
-
Requires-Dist: llama-index-tools-
|
|
44
|
-
Requires-Dist: llama-index-tools-
|
|
45
|
-
Requires-Dist: llama-index-tools-
|
|
19
|
+
Requires-Dist: llama-index==0.14.7
|
|
20
|
+
Requires-Dist: llama-index-core==0.14.7
|
|
21
|
+
Requires-Dist: llama-index-workflows==2.10.3
|
|
22
|
+
Requires-Dist: llama-index-cli==0.5.3
|
|
23
|
+
Requires-Dist: llama-index-indices-managed-vectara==0.5.1
|
|
24
|
+
Requires-Dist: llama-index-llms-openai==0.6.7
|
|
25
|
+
Requires-Dist: llama-index-llms-openai-like==0.5.3
|
|
26
|
+
Requires-Dist: llama-index-llms-anthropic==0.9.7
|
|
27
|
+
Requires-Dist: llama-index-llms-together==0.4.1
|
|
28
|
+
Requires-Dist: llama-index-llms-groq==0.4.1
|
|
29
|
+
Requires-Dist: llama-index-llms-cohere==0.6.1
|
|
30
|
+
Requires-Dist: llama-index-llms-google-genai==0.7.1
|
|
31
|
+
Requires-Dist: google_genai==1.48.0
|
|
32
|
+
Requires-Dist: llama-index-llms-bedrock-converse==0.11.0
|
|
33
|
+
Requires-Dist: llama-index-tools-yahoo-finance==0.4.1
|
|
34
|
+
Requires-Dist: llama-index-tools-arxiv==0.4.1
|
|
35
|
+
Requires-Dist: llama-index-tools-database==0.4.1
|
|
36
|
+
Requires-Dist: llama-index-tools-google==0.6.2
|
|
37
|
+
Requires-Dist: llama-index-tools-tavily_research==0.4.1
|
|
38
|
+
Requires-Dist: llama_index.tools.brave_search==0.4.1
|
|
39
|
+
Requires-Dist: llama-index-tools-neo4j==0.4.1
|
|
40
|
+
Requires-Dist: llama-index-tools-waii==0.4.1
|
|
41
|
+
Requires-Dist: llama-index-graph-stores-kuzu==0.9.1
|
|
42
|
+
Requires-Dist: llama-index-tools-salesforce==0.4.1
|
|
43
|
+
Requires-Dist: llama-index-tools-slack==0.4.1
|
|
44
|
+
Requires-Dist: llama-index-tools-exa==0.4.1
|
|
45
|
+
Requires-Dist: llama-index-tools-wikipedia==0.4.1
|
|
46
|
+
Requires-Dist: llama-index-tools-bing-search==0.4.1
|
|
46
47
|
Requires-Dist: openai>=1.99.3
|
|
47
48
|
Requires-Dist: tavily-python>=0.7.10
|
|
48
49
|
Requires-Dist: exa-py>=1.14.20
|
|
49
|
-
Requires-Dist: openinference-instrumentation-llama-index==4.3.
|
|
50
|
+
Requires-Dist: openinference-instrumentation-llama-index==4.3.8
|
|
50
51
|
Requires-Dist: opentelemetry-proto>=1.31.0
|
|
51
|
-
Requires-Dist: arize-phoenix==
|
|
52
|
-
Requires-Dist: arize-phoenix-otel==0.
|
|
52
|
+
Requires-Dist: arize-phoenix==12.9.0
|
|
53
|
+
Requires-Dist: arize-phoenix-otel==0.13.1
|
|
54
|
+
Requires-Dist: arize-phoenix-client==1.21.0
|
|
55
|
+
Requires-Dist: arize-phoenix-evals==2.5.0
|
|
53
56
|
Requires-Dist: protobuf==5.29.5
|
|
54
57
|
Requires-Dist: tokenizers>=0.20
|
|
55
58
|
Requires-Dist: pydantic>=2.11.5
|
|
56
59
|
Requires-Dist: pandas==2.2.3
|
|
57
|
-
Requires-Dist: retrying==1.
|
|
60
|
+
Requires-Dist: retrying==1.4.2
|
|
58
61
|
Requires-Dist: python-dotenv==1.0.1
|
|
59
62
|
Requires-Dist: cloudpickle>=3.1.1
|
|
60
63
|
Requires-Dist: httpx==0.28.1
|
|
@@ -735,13 +738,13 @@ If you want to use `agent`, `tools`, `llm` or `verbose` in other events (that ar
|
|
|
735
738
|
the `Context` of the Workflow as follows:
|
|
736
739
|
|
|
737
740
|
```python
|
|
738
|
-
await ctx.set("agent", ev.agent)
|
|
741
|
+
await ctx.store.set("agent", ev.agent)
|
|
739
742
|
```
|
|
740
743
|
|
|
741
744
|
and then in any other event you can pull that agent object with
|
|
742
745
|
|
|
743
746
|
```python
|
|
744
|
-
agent = await ctx.get("agent")
|
|
747
|
+
agent = await ctx.store.get("agent")
|
|
745
748
|
```
|
|
746
749
|
|
|
747
750
|
Similarly you can reuse the `llm`, `tools` or `verbose` arguments within other nodes in the workflow.
|
|
@@ -867,9 +870,9 @@ from vectara_agentic import AgentConfig, AgentType, ModelProvider
|
|
|
867
870
|
agent_config = AgentConfig(
|
|
868
871
|
agent_type = AgentType.REACT,
|
|
869
872
|
main_llm_provider = ModelProvider.ANTHROPIC,
|
|
870
|
-
main_llm_model_name = 'claude-
|
|
873
|
+
main_llm_model_name = 'claude-4-5-sonnet',
|
|
871
874
|
tool_llm_provider = ModelProvider.TOGETHER,
|
|
872
|
-
tool_llm_model_name = '
|
|
875
|
+
tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
|
|
873
876
|
)
|
|
874
877
|
|
|
875
878
|
agent = Agent(
|
|
@@ -885,7 +888,7 @@ The `AgentConfig` object may include the following items:
|
|
|
885
888
|
- `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
|
|
886
889
|
|
|
887
890
|
> **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
|
|
888
|
-
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-
|
|
891
|
+
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
|
|
889
892
|
- `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
|
|
890
893
|
- `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
|
|
891
894
|
|
|
@@ -912,6 +915,7 @@ config = AgentConfig(
|
|
|
912
915
|
main_llm_model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
913
916
|
private_llm_api_base="http://vllm-server.company.com/v1",
|
|
914
917
|
private_llm_api_key="TEST_API_KEY",
|
|
918
|
+
private_llm_max_tokens=8192, # Optional: set max output tokens for your private LLM
|
|
915
919
|
)
|
|
916
920
|
|
|
917
921
|
agent = Agent(
|
|
@@ -661,13 +661,13 @@ If you want to use `agent`, `tools`, `llm` or `verbose` in other events (that ar
|
|
|
661
661
|
the `Context` of the Workflow as follows:
|
|
662
662
|
|
|
663
663
|
```python
|
|
664
|
-
await ctx.set("agent", ev.agent)
|
|
664
|
+
await ctx.store.set("agent", ev.agent)
|
|
665
665
|
```
|
|
666
666
|
|
|
667
667
|
and then in any other event you can pull that agent object with
|
|
668
668
|
|
|
669
669
|
```python
|
|
670
|
-
agent = await ctx.get("agent")
|
|
670
|
+
agent = await ctx.store.get("agent")
|
|
671
671
|
```
|
|
672
672
|
|
|
673
673
|
Similarly you can reuse the `llm`, `tools` or `verbose` arguments within other nodes in the workflow.
|
|
@@ -793,9 +793,9 @@ from vectara_agentic import AgentConfig, AgentType, ModelProvider
|
|
|
793
793
|
agent_config = AgentConfig(
|
|
794
794
|
agent_type = AgentType.REACT,
|
|
795
795
|
main_llm_provider = ModelProvider.ANTHROPIC,
|
|
796
|
-
main_llm_model_name = 'claude-
|
|
796
|
+
main_llm_model_name = 'claude-4-5-sonnet',
|
|
797
797
|
tool_llm_provider = ModelProvider.TOGETHER,
|
|
798
|
-
tool_llm_model_name = '
|
|
798
|
+
tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
|
|
799
799
|
)
|
|
800
800
|
|
|
801
801
|
agent = Agent(
|
|
@@ -811,7 +811,7 @@ The `AgentConfig` object may include the following items:
|
|
|
811
811
|
- `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
|
|
812
812
|
|
|
813
813
|
> **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
|
|
814
|
-
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-
|
|
814
|
+
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
|
|
815
815
|
- `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
|
|
816
816
|
- `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
|
|
817
817
|
|
|
@@ -838,6 +838,7 @@ config = AgentConfig(
|
|
|
838
838
|
main_llm_model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
839
839
|
private_llm_api_base="http://vllm-server.company.com/v1",
|
|
840
840
|
private_llm_api_key="TEST_API_KEY",
|
|
841
|
+
private_llm_max_tokens=8192, # Optional: set max output tokens for your private LLM
|
|
841
842
|
)
|
|
842
843
|
|
|
843
844
|
agent = Agent(
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
llama-index==0.14.7
|
|
2
|
+
llama-index-core==0.14.7
|
|
3
|
+
llama-index-workflows==2.10.3
|
|
4
|
+
llama-index-cli==0.5.3
|
|
5
|
+
llama-index-indices-managed-vectara==0.5.1
|
|
6
|
+
llama-index-llms-openai==0.6.7
|
|
7
|
+
llama-index-llms-openai-like==0.5.3
|
|
8
|
+
llama-index-llms-anthropic==0.9.7
|
|
9
|
+
llama-index-llms-together==0.4.1
|
|
10
|
+
llama-index-llms-groq==0.4.1
|
|
11
|
+
llama-index-llms-cohere==0.6.1
|
|
12
|
+
llama-index-llms-google-genai==0.7.1
|
|
13
|
+
google_genai==1.48.0
|
|
14
|
+
llama-index-llms-bedrock-converse==0.11.0
|
|
15
|
+
llama-index-tools-yahoo-finance==0.4.1
|
|
16
|
+
llama-index-tools-arxiv==0.4.1
|
|
17
|
+
llama-index-tools-database==0.4.1
|
|
18
|
+
llama-index-tools-google==0.6.2
|
|
19
|
+
llama-index-tools-tavily_research==0.4.1
|
|
20
|
+
llama_index.tools.brave_search==0.4.1
|
|
21
|
+
llama-index-tools-neo4j==0.4.1
|
|
22
|
+
llama-index-tools-waii==0.4.1
|
|
23
|
+
llama-index-graph-stores-kuzu==0.9.1
|
|
24
|
+
llama-index-tools-salesforce==0.4.1
|
|
25
|
+
llama-index-tools-slack==0.4.1
|
|
26
|
+
llama-index-tools-exa==0.4.1
|
|
27
|
+
llama-index-tools-wikipedia==0.4.1
|
|
28
|
+
llama-index-tools-bing-search==0.4.1
|
|
29
|
+
openai>=1.99.3
|
|
30
|
+
tavily-python>=0.7.10
|
|
31
|
+
exa-py>=1.14.20
|
|
32
|
+
openinference-instrumentation-llama-index==4.3.8
|
|
33
|
+
opentelemetry-proto>=1.31.0
|
|
34
|
+
arize-phoenix==12.9.0
|
|
35
|
+
arize-phoenix-otel==0.13.1
|
|
36
|
+
arize-phoenix-client==1.21.0
|
|
37
|
+
arize-phoenix-evals==2.5.0
|
|
38
|
+
protobuf==5.29.5
|
|
39
|
+
tokenizers>=0.20
|
|
40
|
+
pydantic>=2.11.5
|
|
41
|
+
pandas==2.2.3
|
|
42
|
+
retrying==1.4.2
|
|
43
|
+
python-dotenv==1.0.1
|
|
44
|
+
cloudpickle>=3.1.1
|
|
45
|
+
httpx==0.28.1
|
|
46
|
+
commonmark==0.9.1
|
|
@@ -68,7 +68,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
|
|
|
68
68
|
missing_keys.append(key)
|
|
69
69
|
|
|
70
70
|
if missing_keys:
|
|
71
|
-
print("
|
|
71
|
+
print("ERROR: Missing required API keys for benchmark execution:")
|
|
72
72
|
print()
|
|
73
73
|
for key in sorted(missing_keys):
|
|
74
74
|
print(f" • {key}")
|
|
@@ -83,7 +83,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
|
|
|
83
83
|
|
|
84
84
|
sys.exit(1)
|
|
85
85
|
|
|
86
|
-
print("
|
|
86
|
+
print("All required API keys are present")
|
|
87
87
|
print(f"Found API keys for {len(required_keys)} required environment variables")
|
|
88
88
|
|
|
89
89
|
|
|
@@ -135,7 +135,7 @@ class ModelBenchmark:
|
|
|
135
135
|
{"provider": ModelProvider.OPENAI, "model": "gpt-5-mini"},
|
|
136
136
|
{"provider": ModelProvider.OPENAI, "model": "gpt-4o-mini"},
|
|
137
137
|
{"provider": ModelProvider.OPENAI, "model": "gpt-4.1-mini"},
|
|
138
|
-
{"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-
|
|
138
|
+
{"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-5"},
|
|
139
139
|
{"provider": ModelProvider.TOGETHER, "model": "deepseek-ai/DeepSeek-V3"},
|
|
140
140
|
{"provider": ModelProvider.GROQ, "model": "openai/gpt-oss-20b"},
|
|
141
141
|
{"provider": ModelProvider.GEMINI, "model": "models/gemini-2.5-flash-lite"},
|
|
@@ -817,11 +817,11 @@ class ModelBenchmark:
|
|
|
817
817
|
observability_setup = setup_observer(dummy_config, verbose=True)
|
|
818
818
|
if observability_setup:
|
|
819
819
|
print(
|
|
820
|
-
"
|
|
820
|
+
"Arize Phoenix observability enabled - LLM calls will be traced\n"
|
|
821
821
|
)
|
|
822
822
|
_observability_initialized = True
|
|
823
823
|
else:
|
|
824
|
-
print("
|
|
824
|
+
print("Arize Phoenix observability setup failed\n")
|
|
825
825
|
|
|
826
826
|
# Create semaphore to limit concurrent model testing
|
|
827
827
|
model_semaphore = asyncio.Semaphore(self.max_concurrent_models)
|
|
@@ -835,7 +835,7 @@ class ModelBenchmark:
|
|
|
835
835
|
tasks.append(task)
|
|
836
836
|
|
|
837
837
|
# Execute all model benchmarks in parallel
|
|
838
|
-
print("
|
|
838
|
+
print("Starting parallel benchmark execution...\n")
|
|
839
839
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
840
840
|
|
|
841
841
|
async def _run_model_benchmark(
|
|
@@ -857,9 +857,9 @@ class ModelBenchmark:
|
|
|
857
857
|
provider, model_name, test_name, test_config
|
|
858
858
|
)
|
|
859
859
|
except Exception as e:
|
|
860
|
-
print(f"
|
|
860
|
+
print(f"Error in {model_name} - {test_name}: {e}")
|
|
861
861
|
|
|
862
|
-
print(f"
|
|
862
|
+
print(f"Completed: {provider.value} - {model_name}")
|
|
863
863
|
|
|
864
864
|
async def _run_scenario_benchmark(
|
|
865
865
|
self,
|
|
@@ -892,18 +892,18 @@ class ModelBenchmark:
|
|
|
892
892
|
|
|
893
893
|
if result.error:
|
|
894
894
|
print(
|
|
895
|
-
f"
|
|
895
|
+
f"{model_name}/{test_name} Iteration {iteration_num}: {result.error}"
|
|
896
896
|
)
|
|
897
897
|
else:
|
|
898
898
|
print(
|
|
899
|
-
f"
|
|
899
|
+
f"{model_name}/{test_name} Iteration {iteration_num}: "
|
|
900
900
|
f"{result.total_response_time:.2f}s, "
|
|
901
901
|
f"first token: {result.first_token_latency:.2f}s, "
|
|
902
902
|
f"{result.tokens_per_second:.1f} chars/sec"
|
|
903
903
|
)
|
|
904
904
|
|
|
905
905
|
except Exception as e:
|
|
906
|
-
print(f"
|
|
906
|
+
print(f"{model_name}/{test_name} Iteration {iteration_num}: {e}")
|
|
907
907
|
# Create error result
|
|
908
908
|
error_result = BenchmarkResult(
|
|
909
909
|
model_name=model_name,
|
|
@@ -929,7 +929,7 @@ class ModelBenchmark:
|
|
|
929
929
|
successful = len([r for r in iteration_results if r.error is None])
|
|
930
930
|
success_rate = (successful / len(iteration_results)) * 100
|
|
931
931
|
print(
|
|
932
|
-
f"
|
|
932
|
+
f"{model_name}/{test_name} complete: {successful}/{len(iteration_results)} successful ({success_rate:.1f}%)"
|
|
933
933
|
)
|
|
934
934
|
|
|
935
935
|
return iteration_results
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
from functools import wraps
|
|
5
6
|
from flask import Flask, request, Response, jsonify
|
|
6
7
|
from openai import OpenAI
|
|
@@ -16,6 +17,15 @@ werkzeug_log.setLevel(logging.ERROR)
|
|
|
16
17
|
# Load expected API key from environment (fallback for testing)
|
|
17
18
|
EXPECTED_API_KEY = "TEST_API_KEY"
|
|
18
19
|
|
|
20
|
+
# Together.AI models that should be routed to Together.AI
|
|
21
|
+
TOGETHER_MODELS = [
|
|
22
|
+
"openai/gpt-oss-120b",
|
|
23
|
+
"openai/gpt-oss-20b",
|
|
24
|
+
"deepseek-ai/DeepSeek-V3",
|
|
25
|
+
"deepseek-ai/DeepSeek-V3.1",
|
|
26
|
+
"deepseek-ai/DeepSeek-R1",
|
|
27
|
+
]
|
|
28
|
+
|
|
19
29
|
|
|
20
30
|
# Authentication decorator
|
|
21
31
|
def require_api_key(f):
|
|
@@ -41,7 +51,8 @@ def log_request_info():
|
|
|
41
51
|
@require_api_key
|
|
42
52
|
def chat_completions():
|
|
43
53
|
"""
|
|
44
|
-
Proxy endpoint for OpenAI Chat Completions.
|
|
54
|
+
Proxy endpoint for OpenAI-compatible Chat Completions.
|
|
55
|
+
Routes to different backends based on model name.
|
|
45
56
|
Supports both streaming and non-streaming modes.
|
|
46
57
|
"""
|
|
47
58
|
try:
|
|
@@ -49,7 +60,21 @@ def chat_completions():
|
|
|
49
60
|
except Exception:
|
|
50
61
|
return jsonify({"error": "Invalid JSON payload"}), 400
|
|
51
62
|
|
|
52
|
-
client
|
|
63
|
+
# Determine which client to use based on model name
|
|
64
|
+
model_name = data.get("model", "")
|
|
65
|
+
if model_name in TOGETHER_MODELS:
|
|
66
|
+
# Route to Together.AI
|
|
67
|
+
together_api_key = os.getenv("TOGETHER_API_KEY")
|
|
68
|
+
if not together_api_key:
|
|
69
|
+
return jsonify({"error": "TOGETHER_API_KEY environment variable not set"}), 500
|
|
70
|
+
client = OpenAI(
|
|
71
|
+
api_key=together_api_key,
|
|
72
|
+
base_url="https://api.together.xyz/v1"
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
# Default to OpenAI
|
|
76
|
+
client = OpenAI()
|
|
77
|
+
|
|
53
78
|
is_stream = data.get("stream", False)
|
|
54
79
|
|
|
55
80
|
if is_stream:
|
|
@@ -13,7 +13,6 @@ from vectara_agentic.agent_config import AgentConfig
|
|
|
13
13
|
from vectara_agentic.types import ModelProvider, ObserverType
|
|
14
14
|
from vectara_agentic.tools import ToolsFactory
|
|
15
15
|
|
|
16
|
-
from vectara_agentic.agent_core.prompts import GENERAL_INSTRUCTIONS
|
|
17
16
|
from conftest import mult, STANDARD_TEST_TOPIC, STANDARD_TEST_INSTRUCTIONS
|
|
18
17
|
|
|
19
18
|
|
|
@@ -54,9 +53,11 @@ class TestAgentPackage(unittest.TestCase):
|
|
|
54
53
|
+ date.today().strftime("%A, %B %d, %Y")
|
|
55
54
|
+ " with Always do as your mother tells you!"
|
|
56
55
|
)
|
|
56
|
+
# Test format_prompt with dummy instructions since we're only testing template substitution
|
|
57
|
+
dummy_instructions = "Test instructions"
|
|
57
58
|
self.assertEqual(
|
|
58
59
|
format_prompt(
|
|
59
|
-
prompt_template,
|
|
60
|
+
prompt_template, dummy_instructions, topic, custom_instructions
|
|
60
61
|
),
|
|
61
62
|
expected_output,
|
|
62
63
|
)
|
|
@@ -83,7 +84,7 @@ class TestAgentPackage(unittest.TestCase):
|
|
|
83
84
|
config = AgentConfig(
|
|
84
85
|
agent_type=AgentType.REACT,
|
|
85
86
|
main_llm_provider=ModelProvider.ANTHROPIC,
|
|
86
|
-
main_llm_model_name="claude-sonnet-4-
|
|
87
|
+
main_llm_model_name="claude-sonnet-4-5",
|
|
87
88
|
tool_llm_provider=ModelProvider.TOGETHER,
|
|
88
89
|
tool_llm_model_name="moonshotai/Kimi-K2-Instruct",
|
|
89
90
|
observer=ObserverType.ARIZE_PHOENIX,
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# Suppress external dependency warnings before any other imports
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
from vectara_agentic.agent import Agent
|
|
10
|
+
from vectara_agentic.tools import ToolsFactory
|
|
11
|
+
from vectara_agentic.tools_catalog import ToolsCatalog
|
|
12
|
+
|
|
13
|
+
import nest_asyncio
|
|
14
|
+
|
|
15
|
+
nest_asyncio.apply()
|
|
16
|
+
|
|
17
|
+
from conftest import (
|
|
18
|
+
mult,
|
|
19
|
+
add,
|
|
20
|
+
fc_config_bedrock,
|
|
21
|
+
STANDARD_TEST_TOPIC,
|
|
22
|
+
STANDARD_TEST_INSTRUCTIONS,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
ARIZE_LOCK = threading.Lock()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TestBedrock(unittest.IsolatedAsyncioTestCase):
|
|
29
|
+
|
|
30
|
+
async def test_multiturn(self):
|
|
31
|
+
with ARIZE_LOCK:
|
|
32
|
+
tools = [ToolsFactory().create_tool(mult)]
|
|
33
|
+
agent = Agent(
|
|
34
|
+
tools=tools,
|
|
35
|
+
topic=STANDARD_TEST_TOPIC,
|
|
36
|
+
custom_instructions=STANDARD_TEST_INSTRUCTIONS,
|
|
37
|
+
agent_config=fc_config_bedrock,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# First calculation: 5 * 10 = 50
|
|
41
|
+
stream1 = await agent.astream_chat(
|
|
42
|
+
"What is 5 times 10. Only give the answer, nothing else"
|
|
43
|
+
)
|
|
44
|
+
# Consume the stream
|
|
45
|
+
async for chunk in stream1.async_response_gen():
|
|
46
|
+
pass
|
|
47
|
+
_ = await stream1.aget_response()
|
|
48
|
+
|
|
49
|
+
# Second calculation: 3 * 7 = 21
|
|
50
|
+
stream2 = await agent.astream_chat(
|
|
51
|
+
"what is 3 times 7. Only give the answer, nothing else"
|
|
52
|
+
)
|
|
53
|
+
# Consume the stream
|
|
54
|
+
async for chunk in stream2.async_response_gen():
|
|
55
|
+
pass
|
|
56
|
+
_ = await stream2.aget_response()
|
|
57
|
+
|
|
58
|
+
# Final calculation: 50 * 21 = 1050
|
|
59
|
+
stream3 = await agent.astream_chat(
|
|
60
|
+
"multiply the results of the last two questions. Output only the answer."
|
|
61
|
+
)
|
|
62
|
+
# Consume the stream
|
|
63
|
+
async for chunk in stream3.async_response_gen():
|
|
64
|
+
pass
|
|
65
|
+
response3 = await stream3.aget_response()
|
|
66
|
+
|
|
67
|
+
self.assertEqual(response3.response, "1050")
|
|
68
|
+
|
|
69
|
+
async def test_claude_sonnet_4_multi_tool_chain(self):
|
|
70
|
+
"""Test Claude Sonnet 4 with complex multi-step reasoning chain using multiple tools via Bedrock."""
|
|
71
|
+
with ARIZE_LOCK:
|
|
72
|
+
# Use Bedrock config (Claude Sonnet 4)
|
|
73
|
+
tools_catalog = ToolsCatalog(fc_config_bedrock)
|
|
74
|
+
tools = [
|
|
75
|
+
ToolsFactory().create_tool(mult),
|
|
76
|
+
ToolsFactory().create_tool(add),
|
|
77
|
+
ToolsFactory().create_tool(tools_catalog.summarize_text),
|
|
78
|
+
ToolsFactory().create_tool(tools_catalog.rephrase_text),
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
agent = Agent(
|
|
82
|
+
agent_config=fc_config_bedrock,
|
|
83
|
+
tools=tools,
|
|
84
|
+
topic=STANDARD_TEST_TOPIC,
|
|
85
|
+
custom_instructions="You are a mathematical reasoning agent that explains your work step by step.",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Complex multi-step reasoning task
|
|
89
|
+
complex_query = (
|
|
90
|
+
"Perform this calculation step by step: "
|
|
91
|
+
"First multiply 5 by 9, then add 13 to that result, "
|
|
92
|
+
"then multiply the new result by 2. "
|
|
93
|
+
"After getting the final number, summarize the entire mathematical process "
|
|
94
|
+
"with expertise in 'mathematics education', "
|
|
95
|
+
"then rephrase that summary as a 10-year-old would explain it."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
print("\nStarting Claude Sonnet 4 multi-tool chain test (Bedrock)")
|
|
99
|
+
print(f"Query: {complex_query}")
|
|
100
|
+
print("Streaming response:\n" + "="*50)
|
|
101
|
+
|
|
102
|
+
stream = await agent.astream_chat(complex_query)
|
|
103
|
+
|
|
104
|
+
# Capture streaming deltas and tool calls
|
|
105
|
+
streaming_deltas = []
|
|
106
|
+
tool_calls_made = []
|
|
107
|
+
full_response = ""
|
|
108
|
+
|
|
109
|
+
async for chunk in stream.async_response_gen():
|
|
110
|
+
if chunk and chunk.strip():
|
|
111
|
+
streaming_deltas.append(chunk)
|
|
112
|
+
full_response += chunk
|
|
113
|
+
# Display each streaming delta
|
|
114
|
+
print(f"Delta: {repr(chunk)}")
|
|
115
|
+
|
|
116
|
+
# Track tool calls in the stream
|
|
117
|
+
if "mult" in chunk.lower():
|
|
118
|
+
if "mult" not in [call["tool"] for call in tool_calls_made]:
|
|
119
|
+
tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
|
|
120
|
+
print(f"Tool call detected: mult (#{len(tool_calls_made)})")
|
|
121
|
+
if "add" in chunk.lower():
|
|
122
|
+
if "add" not in [call["tool"] for call in tool_calls_made]:
|
|
123
|
+
tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
|
|
124
|
+
print(f"Tool call detected: add (#{len(tool_calls_made)})")
|
|
125
|
+
if "summarize" in chunk.lower():
|
|
126
|
+
if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
|
|
127
|
+
tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
|
|
128
|
+
print(f"Tool call detected: summarize_text (#{len(tool_calls_made)})")
|
|
129
|
+
if "rephrase" in chunk.lower():
|
|
130
|
+
if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
|
|
131
|
+
tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
|
|
132
|
+
print(f"Tool call detected: rephrase_text (#{len(tool_calls_made)})")
|
|
133
|
+
|
|
134
|
+
response = await stream.aget_response()
|
|
135
|
+
|
|
136
|
+
print("="*50)
|
|
137
|
+
print(f"Streaming completed. Total deltas: {len(streaming_deltas)}")
|
|
138
|
+
print(f"Tool calls made: {[call['tool'] for call in tool_calls_made]}")
|
|
139
|
+
print(f"📄 Final response length: {len(response.response)} chars")
|
|
140
|
+
print(f"Final response: {response.response}")
|
|
141
|
+
|
|
142
|
+
# Validate tool usage sequence
|
|
143
|
+
tools_used = [call["tool"] for call in tool_calls_made]
|
|
144
|
+
print(f"🧪 Tools used in order: {tools_used}")
|
|
145
|
+
|
|
146
|
+
# Check that at least multiplication happened (basic requirement)
|
|
147
|
+
self.assertIn("mult", tools_used, f"Expected multiplication tool to be used. Tools used: {tools_used}")
|
|
148
|
+
|
|
149
|
+
# Check for mathematical results in the full response or streaming deltas
|
|
150
|
+
# Expected: 5*9=45, 45+13=58, 58*2=116
|
|
151
|
+
expected_intermediate_results = ["45", "58", "116"]
|
|
152
|
+
all_text = (full_response + " " + response.response).lower()
|
|
153
|
+
math_results_found = sum(1 for result in expected_intermediate_results
|
|
154
|
+
if result in all_text)
|
|
155
|
+
|
|
156
|
+
print(f"🔢 Mathematical results found: {math_results_found}/3 expected")
|
|
157
|
+
print(f"Full text searched: {all_text[:200]}...")
|
|
158
|
+
|
|
159
|
+
# More lenient assertion - just check that some mathematical progress was made
|
|
160
|
+
self.assertGreaterEqual(math_results_found, 1,
|
|
161
|
+
f"Expected at least 1 mathematical result. Found {math_results_found}. "
|
|
162
|
+
f"Full text: {all_text}")
|
|
163
|
+
|
|
164
|
+
# Verify that streaming actually produced content
|
|
165
|
+
self.assertGreater(len(streaming_deltas), 0, "Expected streaming deltas to be produced")
|
|
166
|
+
self.assertGreater(len(response.response.strip()), 0, "Expected non-empty final response")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
if __name__ == "__main__":
|
|
170
|
+
unittest.main()
|