universal-mcp-agents 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp_agents-0.1.6/.pre-commit-config.yaml +58 -0
- universal_mcp_agents-0.1.6/GEMINI.md +47 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/PKG-INFO +2 -2
- universal_mcp_agents-0.1.6/PROMPTS.md +27 -0
- universal_mcp_agents-0.1.6/builder_tools.py +34 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/bump_and_release.sh +3 -3
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/pyproject.toml +6 -4
- universal_mcp_agents-0.1.6/src/evals/datasets/tasks.jsonl +22 -0
- universal_mcp_agents-0.1.6/src/evals/datasets/test.jsonl +1 -0
- universal_mcp_agents-0.1.6/src/evals/evaluators.py +77 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/evals/run.py +32 -51
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/evals/utils.py +0 -10
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/tests/test_agents.py +16 -17
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/__init__.py +38 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/base.py +24 -13
- {universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtool2 → universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtool}/__init__.py +12 -9
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/__main__.py +6 -7
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/graph.py +18 -27
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/prompts.py +3 -3
- {universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtool → universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtool2}/__init__.py +19 -7
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/__main__.py +6 -5
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/graph.py +55 -20
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtool2/prompts.py +15 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/agent.py +2 -2
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/graph.py +5 -6
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/prompts.py +1 -2
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/builder.py +47 -14
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/cli.py +41 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/test.py +2 -1
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/llm.py +7 -3
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/__init__.py +8 -2
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/planner/__main__.py +28 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/graph.py +6 -2
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/planner/prompts.py +14 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/state.py +0 -1
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/react.py +36 -27
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/shared/tool_node.py +2 -3
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/simple.py +19 -3
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/utils.py +36 -36
- universal_mcp_agents-0.1.6/src/universal_mcp/applications/ui/app.py +305 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/uv.lock +367 -1185
- universal_mcp_agents-0.1.4/src/evals/datasets/tasks.jsonl +0 -22
- universal_mcp_agents-0.1.4/src/evals/evaluators.py +0 -44
- universal_mcp_agents-0.1.4/src/evals/test.py +0 -41
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/__init__.py +0 -19
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/autoagent/studio.py +0 -19
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtool/context.py +0 -24
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtool2/prompts.py +0 -12
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtoolcache/context.py +0 -32
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/cli.py +0 -27
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/planner/__main__.py +0 -26
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/planner/prompts.py +0 -1
- universal_mcp_agents-0.1.4/src/universal_mcp/agents/tools.py +0 -41
- universal_mcp_agents-0.1.4/tool_retrieve.py +0 -47
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/.gitignore +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/README.md +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/evals/__init__.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/evals/dataset.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/evals/datasets/exact.jsonl +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__init__.py +1 -1
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__main__.py +1 -1
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/context.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/graph.py +2 -2
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/prompts.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/state.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/utils.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/state.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/agent.py +1 -1
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/state.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__init__.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__main__.py +0 -0
- {universal_mcp_agents-0.1.4/src/universal_mcp/agents/bigtool2 → universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtoolcache}/context.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/state.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_all.txt +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_important.txt +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/__init__.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/sandbox.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/utils.py +0 -0
- {universal_mcp_agents-0.1.4 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/hil.py +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
fail_fast: false
|
|
2
|
+
|
|
3
|
+
repos:
|
|
4
|
+
- repo: https://github.com/pre-commit/mirrors-prettier
|
|
5
|
+
rev: v3.1.0
|
|
6
|
+
hooks:
|
|
7
|
+
- id: prettier
|
|
8
|
+
types_or: [yaml, json5]
|
|
9
|
+
|
|
10
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
11
|
+
rev: v0.11.13
|
|
12
|
+
hooks:
|
|
13
|
+
# Run the linter.
|
|
14
|
+
- id: ruff-check
|
|
15
|
+
args: [--fix]
|
|
16
|
+
# Run the formatter.
|
|
17
|
+
- id: ruff-format
|
|
18
|
+
|
|
19
|
+
# - repo: https://github.com/pre-commit/mirrors-mypy
|
|
20
|
+
# rev: v1.8.0
|
|
21
|
+
# hooks:
|
|
22
|
+
# - id: mypy
|
|
23
|
+
# additional_dependencies: []
|
|
24
|
+
# args: ["--install-types", "--non-interactive"]
|
|
25
|
+
|
|
26
|
+
- repo: https://github.com/pre-commit/pygrep-hooks
|
|
27
|
+
rev: v1.10.0
|
|
28
|
+
hooks:
|
|
29
|
+
- id: python-check-blanket-noqa
|
|
30
|
+
# - id: python-no-eval
|
|
31
|
+
- id: python-no-log-warn
|
|
32
|
+
- id: python-use-type-annotations
|
|
33
|
+
- id: python-check-mock-methods
|
|
34
|
+
|
|
35
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
36
|
+
rev: v4.5.0
|
|
37
|
+
hooks:
|
|
38
|
+
- id: trailing-whitespace
|
|
39
|
+
- id: end-of-file-fixer
|
|
40
|
+
- id: check-yaml
|
|
41
|
+
- id: check-added-large-files
|
|
42
|
+
- id: check-ast
|
|
43
|
+
- id: check-json
|
|
44
|
+
- id: check-merge-conflict
|
|
45
|
+
- id: detect-private-key
|
|
46
|
+
- id: mixed-line-ending
|
|
47
|
+
- id: debug-statements
|
|
48
|
+
# - id: name-tests-test
|
|
49
|
+
- id: requirements-txt-fixer
|
|
50
|
+
|
|
51
|
+
# - repo: local
|
|
52
|
+
# hooks:
|
|
53
|
+
# - id: uv-lock-check
|
|
54
|
+
# name: Check uv.lock is up to date
|
|
55
|
+
# entry: uv lock --check
|
|
56
|
+
# language: system
|
|
57
|
+
# files: ^(pyproject\.toml|uv\.lock)$
|
|
58
|
+
# pass_filenames: false
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Agent.md — Python + uv
|
|
2
|
+
|
|
3
|
+
Purpose
|
|
4
|
+
- This repository uses Python managed by uv for dependency resolution, virtual environments, locking, and execution. Always prefer uv subcommands (add/remove/run/sync/export) over raw pip/venv commands.
|
|
5
|
+
|
|
6
|
+
Core rules
|
|
7
|
+
- Use `uv add` to add or upgrade dependencies so that both `pyproject.toml` and `uv.lock` stay in sync; do not use `pip install` directly.
|
|
8
|
+
- Keep runtime dependencies in `[project.dependencies]` and development-only tools in the `dev` group via `uv add --dev ...`.
|
|
9
|
+
- Use `uv run` to execute Python, test, and tooling commands without manually activating a virtual environment.
|
|
10
|
+
|
|
11
|
+
Project bootstrap
|
|
12
|
+
- New project (scaffold files): `uv init`
|
|
13
|
+
- First install or clean install: `uv sync`
|
|
14
|
+
- Run the app: `uv run python -m <your_module>` or `uv run main.py`
|
|
15
|
+
- REPL: `uv run python`
|
|
16
|
+
- Scripts in pyproject: prefer `uv run <command>` to ensure the correct environment is used
|
|
17
|
+
|
|
18
|
+
Managing dependencies
|
|
19
|
+
- Add runtime dependency: `uv add <name>` (e.g., `uv add httpx`)
|
|
20
|
+
- Add dev dependencies: `uv add --dev pytest ruff`
|
|
21
|
+
- Pin/upgrade by constraint: `uv add "httpx>=0.27"` or adjust `pyproject.toml` and then `uv sync`
|
|
22
|
+
- Remove dependency: `uv remove <name>`
|
|
23
|
+
- Export lock for external tooling: `uv export --format requirements-txt --output-file requirements.txt`
|
|
24
|
+
|
|
25
|
+
Locking and environments
|
|
26
|
+
- `uv run` and `uv sync` will ensure the environment matches `pyproject.toml` and `uv.lock`
|
|
27
|
+
- Avoid manual `pip install` or manual `venv` activation; let uv manage the environment
|
|
28
|
+
- Commit `uv.lock` to version control for reproducible installs
|
|
29
|
+
|
|
30
|
+
pyproject guidance
|
|
31
|
+
- Dependencies live under `[project]` → `dependencies = [...]`
|
|
32
|
+
- Development-only tooling should go under a dev group (e.g., `uv add --dev ruff pytest`) for clean separation
|
|
33
|
+
- Keep `requires-python` current (e.g., `>=3.12`) to match the team’s baseline
|
|
34
|
+
|
|
35
|
+
Usage in this repo
|
|
36
|
+
- When adding libraries or changing versions, propose `uv add ...` changes that update both `pyproject.toml` and `uv.lock`, then run `uv run pytest -q` to validate
|
|
37
|
+
- Prefer minimal diffs, explain the plan, apply changes, and run tests/tooling via `uv run`
|
|
38
|
+
- If build/test fails, inspect error context, adjust constraints or code, and re-run via `uv run`
|
|
39
|
+
|
|
40
|
+
Common commands (copy/paste)
|
|
41
|
+
- Initialize: `uv init` | Install deps: `uv sync`
|
|
42
|
+
- Add runtime: `uv add <pkg>` | Add dev: `uv add --dev <pkg>`
|
|
43
|
+
- Remove: `uv remove <pkg>`
|
|
44
|
+
- Run app: `uv run python -m <your_module>` or `uv run main.py`
|
|
45
|
+
- Tests: `uv run pytest -q`
|
|
46
|
+
- Lint/format: `uv run ruff check .` and/or `uv run ruff format .`
|
|
47
|
+
- Export: `uv export --format requirements-txt --output-file requirements.txt`
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: universal-mcp-agents
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Project-URL: Homepage, https://github.com/universal-mcp/applications
|
|
6
6
|
Project-URL: Repository, https://github.com/universal-mcp/applications
|
|
@@ -11,11 +11,11 @@ Requires-Dist: langchain-anthropic>=0.3.19
|
|
|
11
11
|
Requires-Dist: langchain-google-genai>=2.1.10
|
|
12
12
|
Requires-Dist: langchain-openai>=0.3.32
|
|
13
13
|
Requires-Dist: langgraph>=0.6.6
|
|
14
|
-
Requires-Dist: universal-mcp-applications>=0.1.4
|
|
15
14
|
Requires-Dist: universal-mcp>=0.1.24rc17
|
|
16
15
|
Provides-Extra: dev
|
|
17
16
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
18
17
|
Requires-Dist: ruff; extra == 'dev'
|
|
19
18
|
Provides-Extra: test
|
|
19
|
+
Requires-Dist: pytest-asyncio>=1.1.0; extra == 'test'
|
|
20
20
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
21
21
|
Requires-Dist: pytest<9.0.0,>=7.0.0; extra == 'test'
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Differentiating Developer and System Prompts
|
|
2
|
+
|
|
3
|
+
This document explains the roles of the two different types of prompts used by agents: the Developer Prompt and the System Prompt.
|
|
4
|
+
|
|
5
|
+
## Developer Prompt
|
|
6
|
+
|
|
7
|
+
* **Role:** Defines the core identity, capabilities, and constraints of the agent. It's the fundamental instruction set that governs the agent's behavior.
|
|
8
|
+
* **Author:** The agent developer.
|
|
9
|
+
* **Nature:** Static. It is part of the agent's source code and does not change between different runs or users.
|
|
10
|
+
* **Example:** "You are a helpful assistant that can write code. You are an expert in Python. You must not engage in harmful conversations."
|
|
11
|
+
|
|
12
|
+
## System Prompt
|
|
13
|
+
|
|
14
|
+
* **Role:** Provides init time context to the agent. This includes user-specific information, environment details, or any other dynamic data that can influence the agent's response for a specific interaction.
|
|
15
|
+
* **Author:** The agent platform or the system running the agent.
|
|
16
|
+
* **Nature:** Dynamic. It can change with every request or for every user.
|
|
17
|
+
* **Example:** "The current user is John Doe. The current date is 2025-09-09. The user's timezone is UTC. The user is working on a project located at /path/to/project."
|
|
18
|
+
|
|
19
|
+
## User Input
|
|
20
|
+
|
|
21
|
+
* Provided everytime by user to trigger the agent
|
|
22
|
+
|
|
23
|
+
## How they work together
|
|
24
|
+
|
|
25
|
+
The developer prompt and system prompt are combined to form the final set of instructions for the LLM. Typically, the developer prompt comes first, establishing the agent's persona and rules, followed by the system prompt which provides the immediate context for the current task.
|
|
26
|
+
|
|
27
|
+
This separation allows for building robust, general-purpose agents (via developer prompts) that can be adapted to specific situations (via system prompts) without altering their core logic.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from universal_mcp.agentr.registry import AgentrRegistry
|
|
2
|
+
from universal_mcp.agents.builder import BuilderAgent
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_tasks():
|
|
7
|
+
with open("src/evals/datasets/tasks.jsonl", "r") as f:
|
|
8
|
+
for line in f:
|
|
9
|
+
yield json.loads(line)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def main():
|
|
13
|
+
registry = AgentrRegistry()
|
|
14
|
+
builder = BuilderAgent(
|
|
15
|
+
name="Builder Agent",
|
|
16
|
+
instructions="You are a builder agent that creates other agents.",
|
|
17
|
+
model="gemini/gemini-1.5-pro",
|
|
18
|
+
registry=registry,
|
|
19
|
+
)
|
|
20
|
+
updated_tasks = []
|
|
21
|
+
tasks = load_tasks()
|
|
22
|
+
for task in tasks:
|
|
23
|
+
print(task["user_input"])
|
|
24
|
+
result = await builder.invoke(task["user_input"])
|
|
25
|
+
tools = result["tool_config"] or {}
|
|
26
|
+
updated_tasks.append({**task, "required_tools": tools})
|
|
27
|
+
with open("src/evals/datasets/tasks_with_tools.jsonl", "w") as f:
|
|
28
|
+
for task in updated_tasks:
|
|
29
|
+
f.write(json.dumps(task) + "\n")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
import asyncio
|
|
34
|
+
asyncio.run(main())
|
|
@@ -5,7 +5,7 @@ set -x
|
|
|
5
5
|
|
|
6
6
|
# Ensure dependencies are installed
|
|
7
7
|
echo "Syncing dependencies..."
|
|
8
|
-
uv sync
|
|
8
|
+
uv sync --all-extras
|
|
9
9
|
|
|
10
10
|
# Run tests with pytest
|
|
11
11
|
echo "Running tests with pytest..."
|
|
@@ -55,7 +55,7 @@ sed -i '' "s/^version = ".*"/version = \"$NEW_VERSION\"/" pyproject.toml
|
|
|
55
55
|
echo "Version bumped from $CURRENT_VERSION to $NEW_VERSION"
|
|
56
56
|
|
|
57
57
|
# Stage the changed file
|
|
58
|
-
git add pyproject.toml
|
|
58
|
+
git add pyproject.toml uv.lock
|
|
59
59
|
|
|
60
60
|
# Commit the change
|
|
61
61
|
git commit -m "bump: version $CURRENT_VERSION → $NEW_VERSION"
|
|
@@ -87,7 +87,7 @@ if [ "$1" = "release" ]; then
|
|
|
87
87
|
rm -rf .idea
|
|
88
88
|
rm -rf .vscode
|
|
89
89
|
|
|
90
|
-
uv
|
|
90
|
+
uv build && uv publish
|
|
91
91
|
echo "Release complete!"
|
|
92
92
|
else
|
|
93
93
|
echo "Skipping release steps"
|
|
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "universal-mcp-agents"
|
|
9
|
-
version = "0.1.
|
|
9
|
+
version = "0.1.6"
|
|
10
10
|
description = "Add your description here"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
authors = [
|
|
@@ -18,8 +18,7 @@ dependencies = [
|
|
|
18
18
|
"langchain-google-genai>=2.1.10",
|
|
19
19
|
"langchain-openai>=0.3.32",
|
|
20
20
|
"langgraph>=0.6.6",
|
|
21
|
-
"universal-mcp
|
|
22
|
-
"universal-mcp-applications>=0.1.4",
|
|
21
|
+
"universal-mcp>=0.1.24rc17",
|
|
23
22
|
]
|
|
24
23
|
|
|
25
24
|
[project.license]
|
|
@@ -28,6 +27,7 @@ text = "MIT"
|
|
|
28
27
|
[project.optional-dependencies]
|
|
29
28
|
test = [
|
|
30
29
|
"pytest>=7.0.0,<9.0.0",
|
|
30
|
+
"pytest-asyncio>=1.1.0",
|
|
31
31
|
"pytest-cov",
|
|
32
32
|
]
|
|
33
33
|
dev = [
|
|
@@ -58,7 +58,7 @@ show_missing = true
|
|
|
58
58
|
fail_under = 70
|
|
59
59
|
|
|
60
60
|
[tool.ruff]
|
|
61
|
-
line-length =
|
|
61
|
+
line-length = 120
|
|
62
62
|
select = [
|
|
63
63
|
"E", "W", "F", "I", "UP", "PL", "T20",
|
|
64
64
|
]
|
|
@@ -72,4 +72,6 @@ quote-style = "double"
|
|
|
72
72
|
pythonpath = [
|
|
73
73
|
"src",
|
|
74
74
|
]
|
|
75
|
+
asyncio_mode = "strict"
|
|
76
|
+
asyncio_default_fixture_loop_scope = "module"
|
|
75
77
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1, "required_tools": {"google_mail": ["send_email"]}}
|
|
2
|
+
{"user_input": "Show me events from today's Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["list_events"]}}
|
|
3
|
+
{"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["get_pull_request"], "google_docs": ["get_document"]}}
|
|
4
|
+
{"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["create_filters: Set up new Gmail filter with criteria and automated actions"], "google_docs": [], "tavily": []}}
|
|
5
|
+
{"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search"], "google_sheet": ["create_spreadsheet"]}}
|
|
6
|
+
{"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5, "required_tools": {"reddit": [], "google_docs": []}}
|
|
7
|
+
{"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2, "required_tools": {"perplexity": []}}
|
|
8
|
+
{"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2, "required_tools": {"google_mail": ["list_messages"]}}
|
|
9
|
+
{"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_today_events"]}}
|
|
10
|
+
{"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2, "required_tools": {"google_calendar": ["add_an_event"]}}
|
|
11
|
+
{"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4, "required_tools": {"google_calendar": ["get_event: Retrieves detailed information about a specific Google Calendar event by its ID"], "linkedin": ["get_your_info"]}}
|
|
12
|
+
{"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1, "required_tools": {"outlook": ["user_get_mail_folder"]}}
|
|
13
|
+
{"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3, "required_tools": {"google_mail": []}}
|
|
14
|
+
{"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4, "required_tools": {"google_mail": [], "clickup": []}}
|
|
15
|
+
{"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"serpapi": ["search"], "perplexity": [], "e2b": []}}
|
|
16
|
+
{"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3, "required_tools": {}}
|
|
17
|
+
{"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"serpapi": ["search"], "google_sheet": []}}
|
|
18
|
+
{"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5, "required_tools": {"serpapi": [], "google_docs": ["create_document"]}}
|
|
19
|
+
{"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_docs": []}}
|
|
20
|
+
{"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"linkedin": [], "scraper": ["linkedin_list_all_posts"], "google_sheet": [], "openai": []}}
|
|
21
|
+
{"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": [], "google_sheet": []}}
|
|
22
|
+
{"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_sheet": [], "google_calendar": []}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"user_input": "What is 2 + 2?", "expected_output": "4"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
|
|
2
|
+
from agentevals.trajectory.llm import (
|
|
3
|
+
TRAJECTORY_ACCURACY_PROMPT,
|
|
4
|
+
create_trajectory_llm_as_judge,
|
|
5
|
+
)
|
|
6
|
+
from google.ai.generativelanguage_v1beta import ToolConfig
|
|
7
|
+
from langsmith.evaluation import EvaluationResult, run_evaluator
|
|
8
|
+
from langsmith.schemas import Example, Run
|
|
9
|
+
from openevals.llm import create_llm_as_judge
|
|
10
|
+
from openevals.prompts import CORRECTNESS_PROMPT
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@run_evaluator
|
|
14
|
+
def exact_match_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
|
|
15
|
+
"""
|
|
16
|
+
A simple evaluator that checks for exact match between the agent's output
|
|
17
|
+
and the expected output from the dataset.
|
|
18
|
+
"""
|
|
19
|
+
if example is None or "expected_output" not in example.outputs:
|
|
20
|
+
return EvaluationResult(
|
|
21
|
+
key="exact_match", score=0, comment="No expected output provided. Example: " + str(example)
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# The agent's response might be in a list of messages
|
|
25
|
+
agent_response_raw = run.outputs.get("output", "")
|
|
26
|
+
if isinstance(agent_response_raw, list):
|
|
27
|
+
# Extract text from the last dictionary in the list
|
|
28
|
+
agent_response_raw = agent_response_raw[-1]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
final_answer = agent_response_raw.get("content", "").strip().lower()
|
|
32
|
+
|
|
33
|
+
expected_output = example.outputs["expected_output"].strip().lower()
|
|
34
|
+
if final_answer == expected_output:
|
|
35
|
+
score = 1
|
|
36
|
+
comment = "Exact match."
|
|
37
|
+
else:
|
|
38
|
+
score = 0
|
|
39
|
+
comment = f"Mismatch: Expected '{expected_output}', but got '{final_answer}'."
|
|
40
|
+
|
|
41
|
+
return EvaluationResult(key="exact_match", score=score, comment=comment)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
correctness_evaluator = create_llm_as_judge(
|
|
45
|
+
prompt=CORRECTNESS_PROMPT,
|
|
46
|
+
feedback_key="correctness",
|
|
47
|
+
model="anthropic:claude-4-sonnet-20250514",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
trajectory_evaluator = create_trajectory_llm_as_judge(
|
|
52
|
+
prompt=TRAJECTORY_ACCURACY_PROMPT,
|
|
53
|
+
model="anthropic:claude-4-sonnet-20250514",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@run_evaluator
|
|
57
|
+
def tool_node_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
|
|
58
|
+
"""
|
|
59
|
+
A simple evaluator that checks if the agent used the required tools.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
if example is None or example.outputs is None or "required_tools" not in example.outputs:
|
|
63
|
+
return EvaluationResult(key="tool_node", score=0, comment="No required tools provided. Example: " + str(example))
|
|
64
|
+
required_tools : ToolConfig = example.outputs["required_tools"]
|
|
65
|
+
agent_response_raw : ToolConfig = run.outputs.get("tool_config", {})
|
|
66
|
+
# Flatten the tool_configs to a single set of tool_ids
|
|
67
|
+
required_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in required_tools.items() for tool_id in tools]
|
|
68
|
+
agent_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in agent_response_raw.items() for tool_id in tools]
|
|
69
|
+
if set(required_tool_ids).issubset(set(agent_tool_ids)):
|
|
70
|
+
return EvaluationResult(key="tool_node", score=1, comment="Tool usage: " + str(required_tools))
|
|
71
|
+
else:
|
|
72
|
+
return EvaluationResult(key="tool_node", score=0, comment="Tool usage: " + str(required_tools))
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"Error evaluating tool usage: {str(e)}")
|
|
75
|
+
print(run.outputs)
|
|
76
|
+
print(example.outputs)
|
|
77
|
+
return EvaluationResult(key="tool_node", score=0, comment=f"Error evaluating tool usage: {str(e)}")
|
|
@@ -2,47 +2,45 @@ import argparse
|
|
|
2
2
|
import asyncio
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from dotenv import load_dotenv
|
|
6
5
|
from langsmith import Client, aevaluate
|
|
7
6
|
from langsmith.evaluation import RunEvaluator
|
|
7
|
+
from universal_mcp.agentr.client import AgentrClient
|
|
8
|
+
from universal_mcp.agentr.registry import AgentrRegistry
|
|
8
9
|
|
|
9
10
|
from evals.dataset import load_dataset
|
|
10
|
-
from evals.evaluators import
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
from evals.evaluators import (
|
|
12
|
+
correctness_evaluator,
|
|
13
|
+
exact_match_evaluator,
|
|
14
|
+
trajectory_evaluator,
|
|
15
|
+
tool_node_evaluator,
|
|
16
|
+
)
|
|
17
|
+
from universal_mcp.agents import get_agent
|
|
13
18
|
from universal_mcp.agents.base import BaseAgent
|
|
14
|
-
from universal_mcp.agents.
|
|
15
|
-
from universal_mcp.agents.simple import SimpleAgent
|
|
16
|
-
|
|
17
|
-
load_dotenv()
|
|
19
|
+
from universal_mcp.agents.utils import messages_to_list
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
# 1. Agent Factory
|
|
21
|
-
def
|
|
23
|
+
def build_agent(agent_name: str):
|
|
22
24
|
"""
|
|
23
25
|
Factory function to get an agent instance by name.
|
|
24
26
|
"""
|
|
27
|
+
client = AgentrClient()
|
|
25
28
|
common_params = {
|
|
26
|
-
"instructions": "You are a helpful assistant.",
|
|
29
|
+
"instructions": "You are a helpful assistant. Respond to the final answer in one or two words. Eg, if the answer is 4, you should respond with '4'. Do not provide with any explanation",
|
|
27
30
|
"model": "anthropic/claude-4-sonnet-20250514",
|
|
28
|
-
"registry": AgentrRegistry() if agent_name != "simple" else None,
|
|
31
|
+
"registry": AgentrRegistry(client=client) if agent_name != "simple" else None,
|
|
29
32
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
return ReactAgent(name="react-agent", **common_params)
|
|
34
|
-
elif agent_name == "auto":
|
|
35
|
-
return AutoAgent(name="auto-agent", **common_params)
|
|
36
|
-
else:
|
|
37
|
-
raise ValueError(
|
|
38
|
-
f"Unknown agent: {agent_name}. Available agents: simple, react, auto"
|
|
39
|
-
)
|
|
33
|
+
agent = get_agent(agent_name)(name=agent_name, **common_params)
|
|
34
|
+
return agent
|
|
35
|
+
|
|
40
36
|
|
|
41
37
|
|
|
42
38
|
# 2. Evaluator Registry
|
|
43
39
|
EVALUATORS: dict[str, Any] = {
|
|
44
40
|
"llm_as_judge": correctness_evaluator,
|
|
45
41
|
"exact_match": exact_match_evaluator,
|
|
42
|
+
"trajectory": trajectory_evaluator,
|
|
43
|
+
"tool_node": tool_node_evaluator,
|
|
46
44
|
}
|
|
47
45
|
|
|
48
46
|
|
|
@@ -58,41 +56,25 @@ def get_evaluator(evaluator_name: str) -> RunEvaluator:
|
|
|
58
56
|
return evaluator
|
|
59
57
|
|
|
60
58
|
|
|
61
|
-
|
|
62
|
-
async def agent_runner(agent: BaseAgent, inputs: dict):
|
|
59
|
+
|
|
60
|
+
async def agent_runner(agent: BaseAgent, inputs: dict) -> dict:
|
|
63
61
|
"""
|
|
64
62
|
Runs the agent and returns a dictionary with the final output.
|
|
65
63
|
"""
|
|
66
64
|
result = await agent.invoke(user_input=inputs["user_input"])
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
elif isinstance(content, list):
|
|
73
|
-
# Handle list of content blocks (e.g., from Anthropic)
|
|
74
|
-
text_parts = []
|
|
75
|
-
for item in content:
|
|
76
|
-
if isinstance(item, dict) and item.get("type") == "text":
|
|
77
|
-
text_parts.append(item.get("text", ""))
|
|
78
|
-
final_response = "\n".join(text_parts)
|
|
79
|
-
else:
|
|
80
|
-
final_response = str(content)
|
|
81
|
-
else:
|
|
82
|
-
final_response = str(result)
|
|
83
|
-
return {"output": final_response}
|
|
84
|
-
|
|
65
|
+
messages = messages_to_list(result["messages"])
|
|
66
|
+
return_result = {"output": messages}
|
|
67
|
+
if "tool_config" in result:
|
|
68
|
+
return_result["tool_config"] = result["tool_config"]
|
|
69
|
+
return return_result
|
|
85
70
|
|
|
86
71
|
async def main(agent_name: str, dataset_path: str, evaluator_name: str):
|
|
87
72
|
"""
|
|
88
73
|
The main function for the evaluation CLI.
|
|
89
74
|
"""
|
|
90
|
-
print(
|
|
91
|
-
f"Starting evaluation with agent='{agent_name}', dataset='{dataset_path}', evaluator='{evaluator_name}'"
|
|
92
|
-
)
|
|
93
75
|
|
|
94
76
|
# 1. Get the agent and evaluator
|
|
95
|
-
agent =
|
|
77
|
+
agent = build_agent(agent_name)
|
|
96
78
|
evaluator = get_evaluator(evaluator_name)
|
|
97
79
|
|
|
98
80
|
# Create a callable for aevaluate
|
|
@@ -116,14 +98,14 @@ async def main(agent_name: str, dataset_path: str, evaluator_name: str):
|
|
|
116
98
|
for example in dataset_examples:
|
|
117
99
|
client.create_example(
|
|
118
100
|
inputs={"user_input": example["user_input"]},
|
|
119
|
-
outputs={
|
|
120
|
-
|
|
121
|
-
|
|
101
|
+
outputs={
|
|
102
|
+
"expected_output": example.get("expected_output", ""),
|
|
103
|
+
"required_tools": example.get("required_tools", {})
|
|
104
|
+
},
|
|
122
105
|
dataset_id=dataset.id,
|
|
123
106
|
)
|
|
124
|
-
print(f"Created and populated dataset '{dataset_name}' for this run.")
|
|
125
107
|
except Exception:
|
|
126
|
-
|
|
108
|
+
pass
|
|
127
109
|
|
|
128
110
|
# 4. Run the evaluation
|
|
129
111
|
await aevaluate(
|
|
@@ -139,7 +121,6 @@ if __name__ == "__main__":
|
|
|
139
121
|
parser.add_argument(
|
|
140
122
|
"agent",
|
|
141
123
|
type=str,
|
|
142
|
-
choices=["simple", "react", "auto"],
|
|
143
124
|
help="The name of the agent to evaluate.",
|
|
144
125
|
)
|
|
145
126
|
parser.add_argument(
|
|
@@ -22,10 +22,8 @@ def upload_runs_to_dataset(
|
|
|
22
22
|
client = Client()
|
|
23
23
|
try:
|
|
24
24
|
dataset = client.create_dataset(dataset_name, description=dataset_description)
|
|
25
|
-
print(f"Created new dataset: '{dataset_name}'")
|
|
26
25
|
except Exception:
|
|
27
26
|
dataset = client.read_dataset(dataset_name=dataset_name)
|
|
28
|
-
print(f"Using existing dataset: '{dataset_name}'")
|
|
29
27
|
|
|
30
28
|
runs = client.list_runs(project_name=project_name)
|
|
31
29
|
|
|
@@ -38,9 +36,6 @@ def upload_runs_to_dataset(
|
|
|
38
36
|
)
|
|
39
37
|
example_count += 1
|
|
40
38
|
|
|
41
|
-
print(
|
|
42
|
-
f"✅ Successfully uploaded {example_count} runs from project '{project_name}' to dataset '{dataset_name}'."
|
|
43
|
-
)
|
|
44
39
|
|
|
45
40
|
|
|
46
41
|
def upload_dataset_from_file(
|
|
@@ -65,19 +60,14 @@ def upload_dataset_from_file(
|
|
|
65
60
|
|
|
66
61
|
try:
|
|
67
62
|
dataset = client.create_dataset(dataset_name, description=dataset_description)
|
|
68
|
-
print(f"Created new dataset: '{dataset_name}'")
|
|
69
63
|
except Exception:
|
|
70
64
|
dataset = client.read_dataset(dataset_name=dataset_name)
|
|
71
|
-
print(f"Using existing dataset: '{dataset_name}'")
|
|
72
65
|
|
|
73
66
|
for example in examples:
|
|
74
67
|
inputs = {key: example[key] for key in input_keys if key in example}
|
|
75
68
|
outputs = {key: example[key] for key in output_keys if key in example}
|
|
76
69
|
client.create_example(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
|
|
77
70
|
|
|
78
|
-
print(
|
|
79
|
-
f"✅ Successfully uploaded {len(examples)} examples from '{file_path}' to dataset '{dataset_name}'."
|
|
80
|
-
)
|
|
81
71
|
|
|
82
72
|
|
|
83
73
|
if __name__ == "__main__":
|
|
@@ -3,16 +3,14 @@ from typing import Any
|
|
|
3
3
|
import pytest
|
|
4
4
|
from langchain_core.messages import HumanMessage
|
|
5
5
|
from langchain_core.tools import tool
|
|
6
|
+
from universal_mcp.tools.registry import ToolRegistry
|
|
7
|
+
from universal_mcp.types import ToolFormat
|
|
6
8
|
|
|
7
|
-
from universal_mcp.agents
|
|
9
|
+
from universal_mcp.agents import get_agent
|
|
8
10
|
from universal_mcp.agents.base import BaseAgent
|
|
9
|
-
from universal_mcp.agents.bigtool import BigToolAgent
|
|
10
11
|
from universal_mcp.agents.builder import BuilderAgent
|
|
11
12
|
from universal_mcp.agents.llm import load_chat_model
|
|
12
|
-
from universal_mcp.agents.planner import PlannerAgent
|
|
13
13
|
from universal_mcp.agents.shared.tool_node import build_tool_node_graph
|
|
14
|
-
from universal_mcp.tools.registry import ToolRegistry
|
|
15
|
-
from universal_mcp.types import ToolFormat
|
|
16
14
|
|
|
17
15
|
|
|
18
16
|
class MockToolRegistry(ToolRegistry):
|
|
@@ -214,7 +212,6 @@ class MockToolRegistry(ToolRegistry):
|
|
|
214
212
|
self, tool_name: str, tool_args: dict[str, Any]
|
|
215
213
|
) -> dict[str, Any]:
|
|
216
214
|
"""Call a tool with the given name and arguments."""
|
|
217
|
-
print(f"MockToolRegistry: Called tool '{tool_name}' with args {tool_args}")
|
|
218
215
|
return {"status": f"task has been done by tool {tool_name}"}
|
|
219
216
|
|
|
220
217
|
async def list_connected_apps(self) -> list[dict[str, str]]:
|
|
@@ -228,7 +225,7 @@ class MockToolRegistry(ToolRegistry):
|
|
|
228
225
|
class TestToolFinderGraph:
|
|
229
226
|
@pytest.fixture
|
|
230
227
|
def llm(self):
|
|
231
|
-
return load_chat_model("
|
|
228
|
+
return load_chat_model("anthropic/claude-sonnet-4-20250514", thinking=False)
|
|
232
229
|
|
|
233
230
|
@pytest.fixture
|
|
234
231
|
def registry(self):
|
|
@@ -313,22 +310,27 @@ class TestToolFinderGraph:
|
|
|
313
310
|
|
|
314
311
|
|
|
315
312
|
@pytest.mark.parametrize(
|
|
316
|
-
"
|
|
313
|
+
"agent_name",
|
|
317
314
|
[
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
315
|
+
"auto",
|
|
316
|
+
"react",
|
|
317
|
+
"simple",
|
|
318
|
+
"builder",
|
|
319
|
+
"planner",
|
|
320
|
+
"bigtool",
|
|
321
|
+
"bigtool2",
|
|
321
322
|
],
|
|
322
323
|
)
|
|
323
324
|
class TestAgents:
|
|
324
325
|
@pytest.fixture
|
|
325
|
-
def agent(self,
|
|
326
|
+
def agent(self, agent_name: str):
|
|
326
327
|
"""Set up the test environment for the agent."""
|
|
327
328
|
registry = MockToolRegistry()
|
|
329
|
+
agent_class = get_agent(agent_name)
|
|
328
330
|
agent = agent_class(
|
|
329
|
-
name=f"Test {
|
|
331
|
+
name=f"Test {agent_name}",
|
|
330
332
|
instructions="Test instructions",
|
|
331
|
-
model="
|
|
333
|
+
model="anthropic/claude-sonnet-4-20250514",
|
|
332
334
|
registry=registry,
|
|
333
335
|
)
|
|
334
336
|
return agent
|
|
@@ -358,9 +360,6 @@ class TestAgents:
|
|
|
358
360
|
)
|
|
359
361
|
|
|
360
362
|
# Print the response for manual verification and for the LLM judge
|
|
361
|
-
print("\n--- Agent's Final Response ---")
|
|
362
|
-
print(final_response)
|
|
363
|
-
print("------------------------------")
|
|
364
363
|
|
|
365
364
|
# Assert that the response is not None or empty, as per the new requirement
|
|
366
365
|
assert final_response is not None, "The final response should not be None."
|