vectara-agentic 0.4.8__tar.gz → 0.4.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- {vectara_agentic-0.4.8/vectara_agentic.egg-info → vectara_agentic-0.4.9}/PKG-INFO +9 -10
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/README.md +1 -1
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/requirements.txt +7 -8
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/benchmark_models.py +12 -12
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_agent.py +4 -3
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_bedrock.py +12 -12
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_gemini.py +43 -21
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_groq.py +13 -117
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_openai.py +13 -13
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_react_streaming.py +26 -2
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/_version.py +1 -1
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent.py +18 -29
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/factory.py +11 -4
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/prompts.py +63 -8
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/serialization.py +3 -3
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/streaming.py +10 -15
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/utils/hallucination.py +33 -1
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/db_tools.py +4 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/llm_utils.py +54 -1
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/utils.py +35 -10
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9/vectara_agentic.egg-info}/PKG-INFO +9 -10
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic.egg-info/requires.txt +7 -8
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/LICENSE +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/MANIFEST.in +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/setup.cfg +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/setup.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/__init__.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/conftest.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/endpoint.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/run_tests.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_agent_fallback_memory.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_agent_memory_consistency.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_agent_type.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_api_endpoint.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_fallback.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_private_llm.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_react_error_handling.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_react_memory.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_react_workflow_events.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_return_direct.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_serialization.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_session_memory.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_streaming.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_together.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_tools.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_vectara_llms.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_vhc.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/tests/test_workflow.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/__init__.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/_callback.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/_observability.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_config.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/__init__.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/utils/__init__.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/utils/logging.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/utils/schemas.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_core/utils/tools.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/agent_endpoint.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/sub_query_workflow.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/tool_utils.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/tools.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/tools_catalog.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic/types.py +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic.egg-info/SOURCES.txt +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic.egg-info/dependency_links.txt +0 -0
- {vectara_agentic-0.4.8 → vectara_agentic-0.4.9}/vectara_agentic.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vectara_agentic
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.9
|
|
4
4
|
Summary: A Python package for creating AI Assistants and AI Agents with Vectara
|
|
5
5
|
Home-page: https://github.com/vectara/py-vectara-agentic
|
|
6
6
|
Author: Ofer Mendelevitch
|
|
@@ -16,21 +16,20 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
16
16
|
Requires-Python: >=3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: llama-index==0.14.
|
|
20
|
-
Requires-Dist: llama-index-core==0.14.
|
|
21
|
-
Requires-Dist: llama-index-workflows==2.
|
|
19
|
+
Requires-Dist: llama-index==0.14.3
|
|
20
|
+
Requires-Dist: llama-index-core==0.14.3
|
|
21
|
+
Requires-Dist: llama-index-workflows==2.5.0
|
|
22
22
|
Requires-Dist: llama-index-cli==0.5.1
|
|
23
23
|
Requires-Dist: llama-index-indices-managed-vectara==0.5.1
|
|
24
24
|
Requires-Dist: llama-index-llms-openai==0.5.6
|
|
25
25
|
Requires-Dist: llama-index-llms-openai-like==0.5.1
|
|
26
|
-
Requires-Dist: llama-index-llms-anthropic==0.
|
|
26
|
+
Requires-Dist: llama-index-llms-anthropic==0.9.3
|
|
27
27
|
Requires-Dist: llama-index-llms-together==0.4.1
|
|
28
28
|
Requires-Dist: llama-index-llms-groq==0.4.1
|
|
29
29
|
Requires-Dist: llama-index-llms-cohere==0.6.1
|
|
30
|
-
Requires-Dist: llama-index-llms-google-genai==0.5.
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist: llama-index-llms-bedrock-converse==0.9.2
|
|
30
|
+
Requires-Dist: llama-index-llms-google-genai==0.5.1
|
|
31
|
+
Requires-Dist: google_genai==1.39.1
|
|
32
|
+
Requires-Dist: llama-index-llms-bedrock-converse==0.9.5
|
|
34
33
|
Requires-Dist: llama-index-tools-yahoo-finance==0.4.1
|
|
35
34
|
Requires-Dist: llama-index-tools-arxiv==0.4.1
|
|
36
35
|
Requires-Dist: llama-index-tools-database==0.4.1
|
|
@@ -887,7 +886,7 @@ The `AgentConfig` object may include the following items:
|
|
|
887
886
|
- `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
|
|
888
887
|
|
|
889
888
|
> **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
|
|
890
|
-
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-
|
|
889
|
+
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
|
|
891
890
|
- `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
|
|
892
891
|
- `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
|
|
893
892
|
|
|
@@ -811,7 +811,7 @@ The `AgentConfig` object may include the following items:
|
|
|
811
811
|
- `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
|
|
812
812
|
|
|
813
813
|
> **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
|
|
814
|
-
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-
|
|
814
|
+
- `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Anthropic uses claude-sonnet-4-5, Gemini uses models/gemini-2.5-flash, Together.AI uses deepseek-ai/DeepSeek-V3, GROQ uses openai/gpt-oss-20b, Bedrock uses us.anthropic.claude-sonnet-4-20250514-v1:0, Cohere uses command-a-03-2025).
|
|
815
815
|
- `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
|
|
816
816
|
- `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
|
|
817
817
|
|
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
llama-index==0.14.
|
|
2
|
-
llama-index-core==0.14.
|
|
3
|
-
llama-index-workflows==2.
|
|
1
|
+
llama-index==0.14.3
|
|
2
|
+
llama-index-core==0.14.3
|
|
3
|
+
llama-index-workflows==2.5.0
|
|
4
4
|
llama-index-cli==0.5.1
|
|
5
5
|
llama-index-indices-managed-vectara==0.5.1
|
|
6
6
|
llama-index-llms-openai==0.5.6
|
|
7
7
|
llama-index-llms-openai-like==0.5.1
|
|
8
|
-
llama-index-llms-anthropic==0.
|
|
8
|
+
llama-index-llms-anthropic==0.9.3
|
|
9
9
|
llama-index-llms-together==0.4.1
|
|
10
10
|
llama-index-llms-groq==0.4.1
|
|
11
11
|
llama-index-llms-cohere==0.6.1
|
|
12
|
-
llama-index-llms-google-genai==0.5.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
llama-index-llms-bedrock-converse==0.9.2
|
|
12
|
+
llama-index-llms-google-genai==0.5.1
|
|
13
|
+
google_genai==1.39.1
|
|
14
|
+
llama-index-llms-bedrock-converse==0.9.5
|
|
16
15
|
llama-index-tools-yahoo-finance==0.4.1
|
|
17
16
|
llama-index-tools-arxiv==0.4.1
|
|
18
17
|
llama-index-tools-database==0.4.1
|
|
@@ -68,7 +68,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
|
|
|
68
68
|
missing_keys.append(key)
|
|
69
69
|
|
|
70
70
|
if missing_keys:
|
|
71
|
-
print("
|
|
71
|
+
print("ERROR: Missing required API keys for benchmark execution:")
|
|
72
72
|
print()
|
|
73
73
|
for key in sorted(missing_keys):
|
|
74
74
|
print(f" • {key}")
|
|
@@ -83,7 +83,7 @@ def validate_api_keys(models_to_test: List[Dict]) -> None:
|
|
|
83
83
|
|
|
84
84
|
sys.exit(1)
|
|
85
85
|
|
|
86
|
-
print("
|
|
86
|
+
print("All required API keys are present")
|
|
87
87
|
print(f"Found API keys for {len(required_keys)} required environment variables")
|
|
88
88
|
|
|
89
89
|
|
|
@@ -135,7 +135,7 @@ class ModelBenchmark:
|
|
|
135
135
|
{"provider": ModelProvider.OPENAI, "model": "gpt-5-mini"},
|
|
136
136
|
{"provider": ModelProvider.OPENAI, "model": "gpt-4o-mini"},
|
|
137
137
|
{"provider": ModelProvider.OPENAI, "model": "gpt-4.1-mini"},
|
|
138
|
-
{"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-
|
|
138
|
+
{"provider": ModelProvider.ANTHROPIC, "model": "claude-sonnet-4-5"},
|
|
139
139
|
{"provider": ModelProvider.TOGETHER, "model": "deepseek-ai/DeepSeek-V3"},
|
|
140
140
|
{"provider": ModelProvider.GROQ, "model": "openai/gpt-oss-20b"},
|
|
141
141
|
{"provider": ModelProvider.GEMINI, "model": "models/gemini-2.5-flash-lite"},
|
|
@@ -817,11 +817,11 @@ class ModelBenchmark:
|
|
|
817
817
|
observability_setup = setup_observer(dummy_config, verbose=True)
|
|
818
818
|
if observability_setup:
|
|
819
819
|
print(
|
|
820
|
-
"
|
|
820
|
+
"Arize Phoenix observability enabled - LLM calls will be traced\n"
|
|
821
821
|
)
|
|
822
822
|
_observability_initialized = True
|
|
823
823
|
else:
|
|
824
|
-
print("
|
|
824
|
+
print("Arize Phoenix observability setup failed\n")
|
|
825
825
|
|
|
826
826
|
# Create semaphore to limit concurrent model testing
|
|
827
827
|
model_semaphore = asyncio.Semaphore(self.max_concurrent_models)
|
|
@@ -835,7 +835,7 @@ class ModelBenchmark:
|
|
|
835
835
|
tasks.append(task)
|
|
836
836
|
|
|
837
837
|
# Execute all model benchmarks in parallel
|
|
838
|
-
print("
|
|
838
|
+
print("Starting parallel benchmark execution...\n")
|
|
839
839
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
840
840
|
|
|
841
841
|
async def _run_model_benchmark(
|
|
@@ -857,9 +857,9 @@ class ModelBenchmark:
|
|
|
857
857
|
provider, model_name, test_name, test_config
|
|
858
858
|
)
|
|
859
859
|
except Exception as e:
|
|
860
|
-
print(f"
|
|
860
|
+
print(f"Error in {model_name} - {test_name}: {e}")
|
|
861
861
|
|
|
862
|
-
print(f"
|
|
862
|
+
print(f"Completed: {provider.value} - {model_name}")
|
|
863
863
|
|
|
864
864
|
async def _run_scenario_benchmark(
|
|
865
865
|
self,
|
|
@@ -892,18 +892,18 @@ class ModelBenchmark:
|
|
|
892
892
|
|
|
893
893
|
if result.error:
|
|
894
894
|
print(
|
|
895
|
-
f"
|
|
895
|
+
f"{model_name}/{test_name} Iteration {iteration_num}: {result.error}"
|
|
896
896
|
)
|
|
897
897
|
else:
|
|
898
898
|
print(
|
|
899
|
-
f"
|
|
899
|
+
f"{model_name}/{test_name} Iteration {iteration_num}: "
|
|
900
900
|
f"{result.total_response_time:.2f}s, "
|
|
901
901
|
f"first token: {result.first_token_latency:.2f}s, "
|
|
902
902
|
f"{result.tokens_per_second:.1f} chars/sec"
|
|
903
903
|
)
|
|
904
904
|
|
|
905
905
|
except Exception as e:
|
|
906
|
-
print(f"
|
|
906
|
+
print(f"{model_name}/{test_name} Iteration {iteration_num}: {e}")
|
|
907
907
|
# Create error result
|
|
908
908
|
error_result = BenchmarkResult(
|
|
909
909
|
model_name=model_name,
|
|
@@ -929,7 +929,7 @@ class ModelBenchmark:
|
|
|
929
929
|
successful = len([r for r in iteration_results if r.error is None])
|
|
930
930
|
success_rate = (successful / len(iteration_results)) * 100
|
|
931
931
|
print(
|
|
932
|
-
f"
|
|
932
|
+
f"{model_name}/{test_name} complete: {successful}/{len(iteration_results)} successful ({success_rate:.1f}%)"
|
|
933
933
|
)
|
|
934
934
|
|
|
935
935
|
return iteration_results
|
|
@@ -13,7 +13,6 @@ from vectara_agentic.agent_config import AgentConfig
|
|
|
13
13
|
from vectara_agentic.types import ModelProvider, ObserverType
|
|
14
14
|
from vectara_agentic.tools import ToolsFactory
|
|
15
15
|
|
|
16
|
-
from vectara_agentic.agent_core.prompts import GENERAL_INSTRUCTIONS
|
|
17
16
|
from conftest import mult, STANDARD_TEST_TOPIC, STANDARD_TEST_INSTRUCTIONS
|
|
18
17
|
|
|
19
18
|
|
|
@@ -54,9 +53,11 @@ class TestAgentPackage(unittest.TestCase):
|
|
|
54
53
|
+ date.today().strftime("%A, %B %d, %Y")
|
|
55
54
|
+ " with Always do as your mother tells you!"
|
|
56
55
|
)
|
|
56
|
+
# Test format_prompt with dummy instructions since we're only testing template substitution
|
|
57
|
+
dummy_instructions = "Test instructions"
|
|
57
58
|
self.assertEqual(
|
|
58
59
|
format_prompt(
|
|
59
|
-
prompt_template,
|
|
60
|
+
prompt_template, dummy_instructions, topic, custom_instructions
|
|
60
61
|
),
|
|
61
62
|
expected_output,
|
|
62
63
|
)
|
|
@@ -83,7 +84,7 @@ class TestAgentPackage(unittest.TestCase):
|
|
|
83
84
|
config = AgentConfig(
|
|
84
85
|
agent_type=AgentType.REACT,
|
|
85
86
|
main_llm_provider=ModelProvider.ANTHROPIC,
|
|
86
|
-
main_llm_model_name="claude-sonnet-4-
|
|
87
|
+
main_llm_model_name="claude-sonnet-4-5",
|
|
87
88
|
tool_llm_provider=ModelProvider.TOGETHER,
|
|
88
89
|
tool_llm_model_name="moonshotai/Kimi-K2-Instruct",
|
|
89
90
|
observer=ObserverType.ARIZE_PHOENIX,
|
|
@@ -95,9 +95,9 @@ class TestBedrock(unittest.IsolatedAsyncioTestCase):
|
|
|
95
95
|
"then rephrase that summary as a 10-year-old would explain it."
|
|
96
96
|
)
|
|
97
97
|
|
|
98
|
-
print("\
|
|
99
|
-
print(f"
|
|
100
|
-
print("
|
|
98
|
+
print("\nStarting Claude Sonnet 4 multi-tool chain test (Bedrock)")
|
|
99
|
+
print(f"Query: {complex_query}")
|
|
100
|
+
print("Streaming response:\n" + "="*50)
|
|
101
101
|
|
|
102
102
|
stream = await agent.astream_chat(complex_query)
|
|
103
103
|
|
|
@@ -111,33 +111,33 @@ class TestBedrock(unittest.IsolatedAsyncioTestCase):
|
|
|
111
111
|
streaming_deltas.append(chunk)
|
|
112
112
|
full_response += chunk
|
|
113
113
|
# Display each streaming delta
|
|
114
|
-
print(f"
|
|
114
|
+
print(f"Delta: {repr(chunk)}")
|
|
115
115
|
|
|
116
116
|
# Track tool calls in the stream
|
|
117
117
|
if "mult" in chunk.lower():
|
|
118
118
|
if "mult" not in [call["tool"] for call in tool_calls_made]:
|
|
119
119
|
tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
|
|
120
|
-
print(f"
|
|
120
|
+
print(f"Tool call detected: mult (#{len(tool_calls_made)})")
|
|
121
121
|
if "add" in chunk.lower():
|
|
122
122
|
if "add" not in [call["tool"] for call in tool_calls_made]:
|
|
123
123
|
tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
|
|
124
|
-
print(f"
|
|
124
|
+
print(f"Tool call detected: add (#{len(tool_calls_made)})")
|
|
125
125
|
if "summarize" in chunk.lower():
|
|
126
126
|
if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
|
|
127
127
|
tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
|
|
128
|
-
print(f"
|
|
128
|
+
print(f"Tool call detected: summarize_text (#{len(tool_calls_made)})")
|
|
129
129
|
if "rephrase" in chunk.lower():
|
|
130
130
|
if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
|
|
131
131
|
tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
|
|
132
|
-
print(f"
|
|
132
|
+
print(f"Tool call detected: rephrase_text (#{len(tool_calls_made)})")
|
|
133
133
|
|
|
134
134
|
response = await stream.aget_response()
|
|
135
135
|
|
|
136
136
|
print("="*50)
|
|
137
|
-
print(f"
|
|
138
|
-
print(f"
|
|
137
|
+
print(f"Streaming completed. Total deltas: {len(streaming_deltas)}")
|
|
138
|
+
print(f"Tool calls made: {[call['tool'] for call in tool_calls_made]}")
|
|
139
139
|
print(f"📄 Final response length: {len(response.response)} chars")
|
|
140
|
-
print(f"
|
|
140
|
+
print(f"Final response: {response.response}")
|
|
141
141
|
|
|
142
142
|
# Validate tool usage sequence
|
|
143
143
|
tools_used = [call["tool"] for call in tool_calls_made]
|
|
@@ -154,7 +154,7 @@ class TestBedrock(unittest.IsolatedAsyncioTestCase):
|
|
|
154
154
|
if result in all_text)
|
|
155
155
|
|
|
156
156
|
print(f"🔢 Mathematical results found: {math_results_found}/3 expected")
|
|
157
|
-
print(f"
|
|
157
|
+
print(f"Full text searched: {all_text[:200]}...")
|
|
158
158
|
|
|
159
159
|
# More lenient assertion - just check that some mathematical progress was made
|
|
160
160
|
self.assertGreaterEqual(math_results_found, 1,
|
|
@@ -4,17 +4,20 @@ import warnings
|
|
|
4
4
|
warnings.simplefilter("ignore", DeprecationWarning)
|
|
5
5
|
|
|
6
6
|
import unittest
|
|
7
|
+
import asyncio
|
|
8
|
+
import gc
|
|
7
9
|
|
|
8
10
|
from vectara_agentic.agent import Agent
|
|
9
11
|
from vectara_agentic.tools import ToolsFactory
|
|
10
12
|
from vectara_agentic.tools_catalog import ToolsCatalog
|
|
13
|
+
from vectara_agentic.llm_utils import clear_llm_cache
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
import nest_asyncio
|
|
14
17
|
|
|
15
18
|
nest_asyncio.apply()
|
|
16
19
|
|
|
17
|
-
from conftest import (
|
|
20
|
+
from tests.conftest import (
|
|
18
21
|
mult,
|
|
19
22
|
add,
|
|
20
23
|
fc_config_gemini,
|
|
@@ -23,8 +26,26 @@ from conftest import (
|
|
|
23
26
|
)
|
|
24
27
|
|
|
25
28
|
|
|
26
|
-
class TestGEMINI(unittest.
|
|
27
|
-
def
|
|
29
|
+
class TestGEMINI(unittest.IsolatedAsyncioTestCase):
|
|
30
|
+
def setUp(self):
|
|
31
|
+
"""Set up test fixtures."""
|
|
32
|
+
super().setUp()
|
|
33
|
+
# Clear any cached LLM instances before each test
|
|
34
|
+
clear_llm_cache()
|
|
35
|
+
# Force garbage collection to clean up any lingering resources
|
|
36
|
+
gc.collect()
|
|
37
|
+
|
|
38
|
+
async def asyncTearDown(self):
|
|
39
|
+
"""Clean up after each test - async version."""
|
|
40
|
+
await super().asyncTearDown()
|
|
41
|
+
# Clear cached LLM instances after each test
|
|
42
|
+
clear_llm_cache()
|
|
43
|
+
# Force garbage collection
|
|
44
|
+
gc.collect()
|
|
45
|
+
# Small delay to allow cleanup
|
|
46
|
+
await asyncio.sleep(0.01)
|
|
47
|
+
|
|
48
|
+
async def test_gemini(self):
|
|
28
49
|
tools = [ToolsFactory().create_tool(mult)]
|
|
29
50
|
|
|
30
51
|
agent = Agent(
|
|
@@ -33,14 +54,14 @@ class TestGEMINI(unittest.TestCase):
|
|
|
33
54
|
topic=STANDARD_TEST_TOPIC,
|
|
34
55
|
custom_instructions=STANDARD_TEST_INSTRUCTIONS,
|
|
35
56
|
)
|
|
36
|
-
_ = agent.
|
|
37
|
-
_ = agent.
|
|
38
|
-
res = agent.
|
|
57
|
+
_ = await agent.achat("What is 5 times 10. Only give the answer, nothing else")
|
|
58
|
+
_ = await agent.achat("what is 3 times 7. Only give the answer, nothing else")
|
|
59
|
+
res = await agent.achat(
|
|
39
60
|
"what is the result of multiplying the results of the last two multiplications. Only give the answer, nothing else."
|
|
40
61
|
)
|
|
41
62
|
self.assertIn("1050", res.response)
|
|
42
63
|
|
|
43
|
-
def test_gemini_single_prompt(self):
|
|
64
|
+
async def test_gemini_single_prompt(self):
|
|
44
65
|
tools = [ToolsFactory().create_tool(mult)]
|
|
45
66
|
|
|
46
67
|
agent = Agent(
|
|
@@ -49,12 +70,12 @@ class TestGEMINI(unittest.TestCase):
|
|
|
49
70
|
topic=STANDARD_TEST_TOPIC,
|
|
50
71
|
custom_instructions=STANDARD_TEST_INSTRUCTIONS,
|
|
51
72
|
)
|
|
52
|
-
res = agent.
|
|
73
|
+
res = await agent.achat(
|
|
53
74
|
"First, multiply 5 by 10. Then, multiply 3 by 7. Finally, multiply the results of the first two calculations."
|
|
54
75
|
)
|
|
55
76
|
self.assertIn("1050", res.response)
|
|
56
77
|
|
|
57
|
-
def test_gemini_25_flash_multi_tool_chain(self):
|
|
78
|
+
async def test_gemini_25_flash_multi_tool_chain(self):
|
|
58
79
|
"""Test Gemini 2.5 Flash with complex multi-step reasoning chain using multiple tools."""
|
|
59
80
|
# Use Gemini config (Gemini 2.5 Flash)
|
|
60
81
|
tools_catalog = ToolsCatalog(fc_config_gemini)
|
|
@@ -77,18 +98,19 @@ class TestGEMINI(unittest.TestCase):
|
|
|
77
98
|
"Perform this calculation step by step: "
|
|
78
99
|
"First multiply 3 by 8, then add 14 to that result, "
|
|
79
100
|
"then multiply the new result by 3. "
|
|
80
|
-
"After getting the final number,
|
|
81
|
-
"
|
|
82
|
-
"
|
|
101
|
+
"After getting the final number, create a text description of the entire mathematical process "
|
|
102
|
+
"(e.g., 'First I multiplied 3 by 8 to get 24, then added 14 to get 38, then multiplied by 3 to get 114'). "
|
|
103
|
+
"Then use the summarize_text tool to summarize that text description with expertise in 'mathematics education'. "
|
|
104
|
+
"Finally, use the rephrase_text tool to rephrase that summary as a 10-year-old would explain it."
|
|
83
105
|
)
|
|
84
106
|
|
|
85
|
-
print("\
|
|
86
|
-
print(f"
|
|
107
|
+
print("\nStarting Gemini 2.5 Flash multi-tool chain test")
|
|
108
|
+
print(f"Query: {complex_query}")
|
|
87
109
|
|
|
88
|
-
# Note: Gemini tests use
|
|
89
|
-
response = agent.
|
|
110
|
+
# Note: Gemini tests now use async chat
|
|
111
|
+
response = await agent.achat(complex_query)
|
|
90
112
|
|
|
91
|
-
print(f"
|
|
113
|
+
print(f"Final response: {response.response}")
|
|
92
114
|
print(f"📄 Final response length: {len(response.response)} chars")
|
|
93
115
|
|
|
94
116
|
# Check for mathematical results in the response
|
|
@@ -98,8 +120,8 @@ class TestGEMINI(unittest.TestCase):
|
|
|
98
120
|
math_results_found = sum(1 for result in expected_intermediate_results
|
|
99
121
|
if result in response_text)
|
|
100
122
|
|
|
101
|
-
print(f"
|
|
102
|
-
print(f"
|
|
123
|
+
print(f"Mathematical results found: {math_results_found}/3 expected")
|
|
124
|
+
print(f"Response text searched: {response_text[:200]}...")
|
|
103
125
|
|
|
104
126
|
# More lenient assertion - just check that some mathematical progress was made
|
|
105
127
|
self.assertGreaterEqual(math_results_found, 1,
|
|
@@ -110,10 +132,10 @@ class TestGEMINI(unittest.TestCase):
|
|
|
110
132
|
self.assertGreater(len(response.response.strip()), 50, "Expected substantial response content")
|
|
111
133
|
|
|
112
134
|
# Check for indications of multi-tool usage (math, summary, or explanation content)
|
|
113
|
-
multi_tool_indicators = ["calculate", "
|
|
135
|
+
multi_tool_indicators = ["calculate", "multipl", "add", "summary", "explain", "mathematical", "process"]
|
|
114
136
|
indicators_found = sum(1 for indicator in multi_tool_indicators
|
|
115
137
|
if indicator in response_text)
|
|
116
|
-
self.assertGreaterEqual(indicators_found,
|
|
138
|
+
self.assertGreaterEqual(indicators_found, 2,
|
|
117
139
|
f"Expected multiple tool usage indicators. Found {indicators_found}: {response.response}")
|
|
118
140
|
|
|
119
141
|
|
|
@@ -68,112 +68,8 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
|
|
|
68
68
|
|
|
69
69
|
self.assertEqual(response3.response, "1050")
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with ARIZE_LOCK:
|
|
74
|
-
# Create config for GPT-OSS-120B via GROQ
|
|
75
|
-
gpt_oss_config = AgentConfig(
|
|
76
|
-
agent_type=AgentType.FUNCTION_CALLING,
|
|
77
|
-
main_llm_provider=ModelProvider.GROQ,
|
|
78
|
-
main_llm_model_name="openai/gpt-oss-120b",
|
|
79
|
-
tool_llm_provider=ModelProvider.GROQ,
|
|
80
|
-
tool_llm_model_name="openai/gpt-oss-120b",
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# Create multiple tools for complex reasoning
|
|
84
|
-
tools_catalog = ToolsCatalog(gpt_oss_config)
|
|
85
|
-
tools = [
|
|
86
|
-
ToolsFactory().create_tool(mult),
|
|
87
|
-
ToolsFactory().create_tool(add),
|
|
88
|
-
ToolsFactory().create_tool(tools_catalog.summarize_text),
|
|
89
|
-
ToolsFactory().create_tool(tools_catalog.rephrase_text),
|
|
90
|
-
]
|
|
91
|
-
|
|
92
|
-
agent = Agent(
|
|
93
|
-
agent_config=gpt_oss_config,
|
|
94
|
-
tools=tools,
|
|
95
|
-
topic=STANDARD_TEST_TOPIC,
|
|
96
|
-
custom_instructions="You are a mathematical reasoning agent that explains your work step by step.",
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
# Complex multi-step reasoning task
|
|
100
|
-
complex_query = (
|
|
101
|
-
"Perform this calculation step by step: "
|
|
102
|
-
"First multiply 7 by 8, then add 15 to that result, "
|
|
103
|
-
"then multiply the new result by 3. "
|
|
104
|
-
"After getting the final number, summarize the entire mathematical process "
|
|
105
|
-
"with expertise in 'mathematics education', "
|
|
106
|
-
"then rephrase that summary as a 10-year-old would explain it."
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
print("\n🔍 Starting GPT-OSS-120B multi-tool chain test (GROQ)")
|
|
110
|
-
print(f"📝 Query: {complex_query}")
|
|
111
|
-
print("🌊 Streaming response:\n" + "="*50)
|
|
112
|
-
|
|
113
|
-
stream = await agent.astream_chat(complex_query)
|
|
114
|
-
|
|
115
|
-
# Capture streaming deltas and tool calls
|
|
116
|
-
streaming_deltas = []
|
|
117
|
-
tool_calls_made = []
|
|
118
|
-
full_response = ""
|
|
119
|
-
|
|
120
|
-
async for chunk in stream.async_response_gen():
|
|
121
|
-
if chunk and chunk.strip():
|
|
122
|
-
streaming_deltas.append(chunk)
|
|
123
|
-
full_response += chunk
|
|
124
|
-
# Display each streaming delta
|
|
125
|
-
print(f"📡 Delta: {repr(chunk)}")
|
|
126
|
-
|
|
127
|
-
# Track tool calls in the stream
|
|
128
|
-
if "mult" in chunk.lower():
|
|
129
|
-
if "mult" not in [call["tool"] for call in tool_calls_made]:
|
|
130
|
-
tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
|
|
131
|
-
print(f"🔧 Tool call detected: mult (#{len(tool_calls_made)})")
|
|
132
|
-
if "add" in chunk.lower():
|
|
133
|
-
if "add" not in [call["tool"] for call in tool_calls_made]:
|
|
134
|
-
tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
|
|
135
|
-
print(f"🔧 Tool call detected: add (#{len(tool_calls_made)})")
|
|
136
|
-
if "summarize" in chunk.lower():
|
|
137
|
-
if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
|
|
138
|
-
tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
|
|
139
|
-
print(f"🔧 Tool call detected: summarize_text (#{len(tool_calls_made)})")
|
|
140
|
-
if "rephrase" in chunk.lower():
|
|
141
|
-
if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
|
|
142
|
-
tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
|
|
143
|
-
print(f"🔧 Tool call detected: rephrase_text (#{len(tool_calls_made)})")
|
|
144
|
-
|
|
145
|
-
response = await stream.aget_response()
|
|
146
|
-
|
|
147
|
-
print("="*50)
|
|
148
|
-
print(f"✅ Streaming completed. Total deltas: {len(streaming_deltas)}")
|
|
149
|
-
print(f"🔧 Tool calls made: {[call['tool'] for call in tool_calls_made]}")
|
|
150
|
-
print(f"📄 Final response length: {len(response.response)} chars")
|
|
151
|
-
print(f"🎯 Final response: {response.response}")
|
|
152
|
-
|
|
153
|
-
# Validate tool usage sequence
|
|
154
|
-
tools_used = [call["tool"] for call in tool_calls_made]
|
|
155
|
-
print(f"🧪 Tools used in order: {tools_used}")
|
|
156
|
-
|
|
157
|
-
# Check that at least multiplication happened (basic requirement)
|
|
158
|
-
self.assertIn("mult", tools_used, f"Expected multiplication tool to be used. Tools used: {tools_used}")
|
|
159
|
-
|
|
160
|
-
# Check for mathematical results in the full response or streaming deltas
|
|
161
|
-
expected_intermediate_results = ["56", "71", "213"]
|
|
162
|
-
all_text = (full_response + " " + response.response).lower()
|
|
163
|
-
math_results_found = sum(1 for result in expected_intermediate_results
|
|
164
|
-
if result in all_text)
|
|
165
|
-
|
|
166
|
-
print(f"🔢 Mathematical results found: {math_results_found}/3 expected")
|
|
167
|
-
print(f"🔍 Full text searched: {all_text[:200]}...")
|
|
168
|
-
|
|
169
|
-
# More lenient assertion - just check that some mathematical progress was made
|
|
170
|
-
self.assertGreaterEqual(math_results_found, 1,
|
|
171
|
-
f"Expected at least 1 mathematical result. Found {math_results_found}. "
|
|
172
|
-
f"Full text: {all_text}")
|
|
173
|
-
|
|
174
|
-
# Verify that streaming actually produced content
|
|
175
|
-
self.assertGreater(len(streaming_deltas), 0, "Expected streaming deltas to be produced")
|
|
176
|
-
self.assertGreater(len(response.response.strip()), 0, "Expected non-empty final response")
|
|
71
|
+
# Skipping test_gpt_oss_120b due to model's internal tools conflicting with function calling
|
|
72
|
+
# GPT-OSS-120B has internal tools like repo_browser.open_file that cause validation errors
|
|
177
73
|
|
|
178
74
|
async def test_gpt_oss_20b(self):
|
|
179
75
|
"""Test GPT-OSS-20B model with complex multi-step reasoning chain using multiple tools via GROQ."""
|
|
@@ -213,9 +109,9 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
|
|
|
213
109
|
"then rephrase that summary as a 10-year-old would explain it."
|
|
214
110
|
)
|
|
215
111
|
|
|
216
|
-
print("\
|
|
217
|
-
print(f"
|
|
218
|
-
print("
|
|
112
|
+
print("\nStarting GPT-OSS-20B multi-tool chain test (GROQ)")
|
|
113
|
+
print(f"Query: {complex_query}")
|
|
114
|
+
print("Streaming response:\n" + "="*50)
|
|
219
115
|
|
|
220
116
|
stream = await agent.astream_chat(complex_query)
|
|
221
117
|
|
|
@@ -235,27 +131,27 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
|
|
|
235
131
|
if "mult" in chunk.lower():
|
|
236
132
|
if "mult" not in [call["tool"] for call in tool_calls_made]:
|
|
237
133
|
tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
|
|
238
|
-
print(f"
|
|
134
|
+
print(f"Tool call detected: mult (#{len(tool_calls_made)})")
|
|
239
135
|
if "add" in chunk.lower():
|
|
240
136
|
if "add" not in [call["tool"] for call in tool_calls_made]:
|
|
241
137
|
tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
|
|
242
|
-
print(f"
|
|
138
|
+
print(f"Tool call detected: add (#{len(tool_calls_made)})")
|
|
243
139
|
if "summarize" in chunk.lower():
|
|
244
140
|
if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
|
|
245
141
|
tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
|
|
246
|
-
print(f"
|
|
142
|
+
print(f"Tool call detected: summarize_text (#{len(tool_calls_made)})")
|
|
247
143
|
if "rephrase" in chunk.lower():
|
|
248
144
|
if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
|
|
249
145
|
tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
|
|
250
|
-
print(f"
|
|
146
|
+
print(f"Tool call detected: rephrase_text (#{len(tool_calls_made)})")
|
|
251
147
|
|
|
252
148
|
response = await stream.aget_response()
|
|
253
149
|
|
|
254
150
|
print("="*50)
|
|
255
|
-
print(f"
|
|
256
|
-
print(f"
|
|
151
|
+
print(f"Streaming completed. Total deltas: {len(streaming_deltas)}")
|
|
152
|
+
print(f"Tool calls made: {[call['tool'] for call in tool_calls_made]}")
|
|
257
153
|
print(f"📄 Final response length: {len(response.response)} chars")
|
|
258
|
-
print(f"
|
|
154
|
+
print(f"Final response: {response.response}")
|
|
259
155
|
|
|
260
156
|
# Validate tool usage sequence
|
|
261
157
|
tools_used = [call["tool"] for call in tool_calls_made]
|
|
@@ -272,7 +168,7 @@ class TestGROQ(unittest.IsolatedAsyncioTestCase):
|
|
|
272
168
|
if result in all_text)
|
|
273
169
|
|
|
274
170
|
print(f"🔢 Mathematical results found: {math_results_found}/3 expected")
|
|
275
|
-
print(f"
|
|
171
|
+
print(f"Full text searched: {all_text[:200]}...")
|
|
276
172
|
|
|
277
173
|
# More lenient assertion - just check that some mathematical progress was made
|
|
278
174
|
self.assertGreaterEqual(math_results_found, 1,
|
|
@@ -186,9 +186,9 @@ class TestOpenAI(unittest.IsolatedAsyncioTestCase):
|
|
|
186
186
|
"then rephrase that summary as a 10-year-old would explain it."
|
|
187
187
|
)
|
|
188
188
|
|
|
189
|
-
print("\
|
|
190
|
-
print(f"
|
|
191
|
-
print("
|
|
189
|
+
print("\nStarting GPT-4.1-mini multi-tool chain test (OpenAI)")
|
|
190
|
+
print(f"Query: {complex_query}")
|
|
191
|
+
print("Streaming response:\n" + "="*50)
|
|
192
192
|
|
|
193
193
|
stream = await agent.astream_chat(complex_query)
|
|
194
194
|
|
|
@@ -202,33 +202,33 @@ class TestOpenAI(unittest.IsolatedAsyncioTestCase):
|
|
|
202
202
|
streaming_deltas.append(chunk)
|
|
203
203
|
full_response += chunk
|
|
204
204
|
# Display each streaming delta
|
|
205
|
-
print(f"
|
|
205
|
+
print(f"Delta: {repr(chunk)}")
|
|
206
206
|
|
|
207
207
|
# Track tool calls in the stream
|
|
208
208
|
if "mult" in chunk.lower():
|
|
209
209
|
if "mult" not in [call["tool"] for call in tool_calls_made]:
|
|
210
210
|
tool_calls_made.append({"tool": "mult", "order": len(tool_calls_made) + 1})
|
|
211
|
-
print(f"
|
|
211
|
+
print(f"Tool call detected: mult (#{len(tool_calls_made)})")
|
|
212
212
|
if "add" in chunk.lower():
|
|
213
213
|
if "add" not in [call["tool"] for call in tool_calls_made]:
|
|
214
214
|
tool_calls_made.append({"tool": "add", "order": len(tool_calls_made) + 1})
|
|
215
|
-
print(f"
|
|
215
|
+
print(f"Tool call detected: add (#{len(tool_calls_made)})")
|
|
216
216
|
if "summarize" in chunk.lower():
|
|
217
217
|
if "summarize_text" not in [call["tool"] for call in tool_calls_made]:
|
|
218
218
|
tool_calls_made.append({"tool": "summarize_text", "order": len(tool_calls_made) + 1})
|
|
219
|
-
print(f"
|
|
219
|
+
print(f"Tool call detected: summarize_text (#{len(tool_calls_made)})")
|
|
220
220
|
if "rephrase" in chunk.lower():
|
|
221
221
|
if "rephrase_text" not in [call["tool"] for call in tool_calls_made]:
|
|
222
222
|
tool_calls_made.append({"tool": "rephrase_text", "order": len(tool_calls_made) + 1})
|
|
223
|
-
print(f"
|
|
223
|
+
print(f"Tool call detected: rephrase_text (#{len(tool_calls_made)})")
|
|
224
224
|
|
|
225
225
|
response = await stream.aget_response()
|
|
226
226
|
|
|
227
227
|
print("="*50)
|
|
228
|
-
print(f"
|
|
229
|
-
print(f"
|
|
228
|
+
print(f"Streaming completed. Total deltas: {len(streaming_deltas)}")
|
|
229
|
+
print(f"Tool calls made: {[call['tool'] for call in tool_calls_made]}")
|
|
230
230
|
print(f"📄 Final response length: {len(response.response)} chars")
|
|
231
|
-
print(f"
|
|
231
|
+
print(f"Final response: {response.response}")
|
|
232
232
|
|
|
233
233
|
# Validate tool usage sequence
|
|
234
234
|
tools_used = [call["tool"] for call in tool_calls_made]
|
|
@@ -244,8 +244,8 @@ class TestOpenAI(unittest.IsolatedAsyncioTestCase):
|
|
|
244
244
|
math_results_found = sum(1 for result in expected_intermediate_results
|
|
245
245
|
if result in all_text)
|
|
246
246
|
|
|
247
|
-
print(f"
|
|
248
|
-
print(f"
|
|
247
|
+
print(f"Mathematical results found: {math_results_found}/3 expected")
|
|
248
|
+
print(f"Full text searched: {all_text[:200]}...")
|
|
249
249
|
|
|
250
250
|
# More lenient assertion - just check that some mathematical progress was made
|
|
251
251
|
self.assertGreaterEqual(math_results_found, 1,
|