universal-mcp-agents 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/PKG-INFO +1 -2
- universal_mcp_agents-0.1.6/builder_tools.py +34 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/bump_and_release.sh +1 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/pyproject.toml +2 -3
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/tasks.jsonl +22 -22
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/evaluators.py +24 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/run.py +11 -5
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/base.py +1 -2
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/__init__.py +10 -5
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/agent.py +0 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/graph.py +47 -10
- universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtool2/prompts.py +15 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/agent.py +0 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/graph.py +11 -13
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/builder.py +6 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/llm.py +1 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/react.py +4 -5
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/utils.py +0 -10
- universal_mcp_agents-0.1.5/src/universal_mcp/agents/ui_tools.py → universal_mcp_agents-0.1.6/src/universal_mcp/applications/ui/app.py +3 -3
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/uv.lock +60 -943
- universal_mcp_agents-0.1.5/src/universal_mcp/agents/autoagent/studio.py +0 -20
- universal_mcp_agents-0.1.5/src/universal_mcp/agents/bigtool2/prompts.py +0 -12
- universal_mcp_agents-0.1.5/src/universal_mcp/agents/tools.py +0 -40
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/.gitignore +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/.pre-commit-config.yaml +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/GEMINI.md +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/PROMPTS.md +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/README.md +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/dataset.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/exact.jsonl +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/test.jsonl +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/utils.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/tests/test_agents.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__main__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/context.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/graph.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/prompts.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/state.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/utils.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/graph.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/prompts.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/state.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/__main__.py +1 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/state.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__init__.py +2 -2
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__main__.py +1 -1
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/context.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/prompts.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/state.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_all.txt +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_important.txt +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/cli.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/sandbox.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/test.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/utils.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/hil.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/__init__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/__main__.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/graph.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/prompts.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/state.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/shared/tool_node.py +0 -0
- {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/simple.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: universal-mcp-agents
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Project-URL: Homepage, https://github.com/universal-mcp/applications
|
|
6
6
|
Project-URL: Repository, https://github.com/universal-mcp/applications
|
|
@@ -11,7 +11,6 @@ Requires-Dist: langchain-anthropic>=0.3.19
|
|
|
11
11
|
Requires-Dist: langchain-google-genai>=2.1.10
|
|
12
12
|
Requires-Dist: langchain-openai>=0.3.32
|
|
13
13
|
Requires-Dist: langgraph>=0.6.6
|
|
14
|
-
Requires-Dist: universal-mcp-applications>=0.1.4
|
|
15
14
|
Requires-Dist: universal-mcp>=0.1.24rc17
|
|
16
15
|
Provides-Extra: dev
|
|
17
16
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from universal_mcp.agentr.registry import AgentrRegistry
|
|
2
|
+
from universal_mcp.agents.builder import BuilderAgent
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_tasks():
|
|
7
|
+
with open("src/evals/datasets/tasks.jsonl", "r") as f:
|
|
8
|
+
for line in f:
|
|
9
|
+
yield json.loads(line)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def main():
|
|
13
|
+
registry = AgentrRegistry()
|
|
14
|
+
builder = BuilderAgent(
|
|
15
|
+
name="Builder Agent",
|
|
16
|
+
instructions="You are a builder agent that creates other agents.",
|
|
17
|
+
model="gemini/gemini-1.5-pro",
|
|
18
|
+
registry=registry,
|
|
19
|
+
)
|
|
20
|
+
updated_tasks = []
|
|
21
|
+
tasks = load_tasks()
|
|
22
|
+
for task in tasks:
|
|
23
|
+
print(task["user_input"])
|
|
24
|
+
result = await builder.invoke(task["user_input"])
|
|
25
|
+
tools = result["tool_config"] or {}
|
|
26
|
+
updated_tasks.append({**task, "required_tools": tools})
|
|
27
|
+
with open("src/evals/datasets/tasks_with_tools.jsonl", "w") as f:
|
|
28
|
+
for task in updated_tasks:
|
|
29
|
+
f.write(json.dumps(task) + "\n")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
import asyncio
|
|
34
|
+
asyncio.run(main())
|
|
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "universal-mcp-agents"
|
|
9
|
-
version = "0.1.
|
|
9
|
+
version = "0.1.6"
|
|
10
10
|
description = "Add your description here"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
authors = [
|
|
@@ -18,8 +18,7 @@ dependencies = [
|
|
|
18
18
|
"langchain-google-genai>=2.1.10",
|
|
19
19
|
"langchain-openai>=0.3.32",
|
|
20
20
|
"langgraph>=0.6.6",
|
|
21
|
-
"universal-mcp
|
|
22
|
-
"universal-mcp-applications>=0.1.4",
|
|
21
|
+
"universal-mcp>=0.1.24rc17",
|
|
23
22
|
]
|
|
24
23
|
|
|
25
24
|
[project.license]
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
{"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1}
|
|
2
|
-
{"user_input": "Show me events from today's Google Calendar.", "difficulty": 1}
|
|
3
|
-
{"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4}
|
|
4
|
-
{"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4}
|
|
5
|
-
{"user_input": "
|
|
6
|
-
{"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5}
|
|
7
|
-
{"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2}
|
|
8
|
-
{"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2}
|
|
9
|
-
{"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1}
|
|
10
|
-
{"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2}
|
|
11
|
-
{"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4}
|
|
12
|
-
{"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1}
|
|
13
|
-
{"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3}
|
|
14
|
-
{"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4}
|
|
15
|
-
{"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4}
|
|
16
|
-
{"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3}
|
|
17
|
-
{"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4}
|
|
18
|
-
{"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5}
|
|
19
|
-
{"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5}
|
|
20
|
-
{"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5}
|
|
21
|
-
{"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4}
|
|
22
|
-
{"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5}
|
|
1
|
+
{"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1, "required_tools": {"google_mail": ["send_email"]}}
|
|
2
|
+
{"user_input": "Show me events from today's Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["list_events"]}}
|
|
3
|
+
{"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["get_pull_request"], "google_docs": ["get_document"]}}
|
|
4
|
+
{"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["create_filters: Set up new Gmail filter with criteria and automated actions"], "google_docs": [], "tavily": []}}
|
|
5
|
+
{"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search"], "google_sheet": ["create_spreadsheet"]}}
|
|
6
|
+
{"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5, "required_tools": {"reddit": [], "google_docs": []}}
|
|
7
|
+
{"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2, "required_tools": {"perplexity": []}}
|
|
8
|
+
{"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2, "required_tools": {"google_mail": ["list_messages"]}}
|
|
9
|
+
{"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_today_events"]}}
|
|
10
|
+
{"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2, "required_tools": {"google_calendar": ["add_an_event"]}}
|
|
11
|
+
{"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4, "required_tools": {"google_calendar": ["get_event: Retrieves detailed information about a specific Google Calendar event by its ID"], "linkedin": ["get_your_info"]}}
|
|
12
|
+
{"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1, "required_tools": {"outlook": ["user_get_mail_folder"]}}
|
|
13
|
+
{"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3, "required_tools": {"google_mail": []}}
|
|
14
|
+
{"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4, "required_tools": {"google_mail": [], "clickup": []}}
|
|
15
|
+
{"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"serpapi": ["search"], "perplexity": [], "e2b": []}}
|
|
16
|
+
{"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3, "required_tools": {}}
|
|
17
|
+
{"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"serpapi": ["search"], "google_sheet": []}}
|
|
18
|
+
{"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5, "required_tools": {"serpapi": [], "google_docs": ["create_document"]}}
|
|
19
|
+
{"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_docs": []}}
|
|
20
|
+
{"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"linkedin": [], "scraper": ["linkedin_list_all_posts"], "google_sheet": [], "openai": []}}
|
|
21
|
+
{"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": [], "google_sheet": []}}
|
|
22
|
+
{"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_sheet": [], "google_calendar": []}}
|
|
@@ -3,6 +3,7 @@ from agentevals.trajectory.llm import (
|
|
|
3
3
|
TRAJECTORY_ACCURACY_PROMPT,
|
|
4
4
|
create_trajectory_llm_as_judge,
|
|
5
5
|
)
|
|
6
|
+
from google.ai.generativelanguage_v1beta import ToolConfig
|
|
6
7
|
from langsmith.evaluation import EvaluationResult, run_evaluator
|
|
7
8
|
from langsmith.schemas import Example, Run
|
|
8
9
|
from openevals.llm import create_llm_as_judge
|
|
@@ -51,3 +52,26 @@ trajectory_evaluator = create_trajectory_llm_as_judge(
|
|
|
51
52
|
prompt=TRAJECTORY_ACCURACY_PROMPT,
|
|
52
53
|
model="anthropic:claude-4-sonnet-20250514",
|
|
53
54
|
)
|
|
55
|
+
|
|
56
|
+
@run_evaluator
|
|
57
|
+
def tool_node_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
|
|
58
|
+
"""
|
|
59
|
+
A simple evaluator that checks if the agent used the required tools.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
if example is None or example.outputs is None or "required_tools" not in example.outputs:
|
|
63
|
+
return EvaluationResult(key="tool_node", score=0, comment="No required tools provided. Example: " + str(example))
|
|
64
|
+
required_tools : ToolConfig = example.outputs["required_tools"]
|
|
65
|
+
agent_response_raw : ToolConfig = run.outputs.get("tool_config", {})
|
|
66
|
+
# Flatten the tool_configs to a single set of tool_ids
|
|
67
|
+
required_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in required_tools.items() for tool_id in tools]
|
|
68
|
+
agent_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in agent_response_raw.items() for tool_id in tools]
|
|
69
|
+
if set(required_tool_ids).issubset(set(agent_tool_ids)):
|
|
70
|
+
return EvaluationResult(key="tool_node", score=1, comment="Tool usage: " + str(required_tools))
|
|
71
|
+
else:
|
|
72
|
+
return EvaluationResult(key="tool_node", score=0, comment="Tool usage: " + str(required_tools))
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"Error evaluating tool usage: {str(e)}")
|
|
75
|
+
print(run.outputs)
|
|
76
|
+
print(example.outputs)
|
|
77
|
+
return EvaluationResult(key="tool_node", score=0, comment=f"Error evaluating tool usage: {str(e)}")
|
|
@@ -12,6 +12,7 @@ from evals.evaluators import (
|
|
|
12
12
|
correctness_evaluator,
|
|
13
13
|
exact_match_evaluator,
|
|
14
14
|
trajectory_evaluator,
|
|
15
|
+
tool_node_evaluator,
|
|
15
16
|
)
|
|
16
17
|
from universal_mcp.agents import get_agent
|
|
17
18
|
from universal_mcp.agents.base import BaseAgent
|
|
@@ -39,6 +40,7 @@ EVALUATORS: dict[str, Any] = {
|
|
|
39
40
|
"llm_as_judge": correctness_evaluator,
|
|
40
41
|
"exact_match": exact_match_evaluator,
|
|
41
42
|
"trajectory": trajectory_evaluator,
|
|
43
|
+
"tool_node": tool_node_evaluator,
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
|
|
@@ -55,13 +57,16 @@ def get_evaluator(evaluator_name: str) -> RunEvaluator:
|
|
|
55
57
|
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
async def agent_runner(agent: BaseAgent, inputs: dict):
|
|
60
|
+
async def agent_runner(agent: BaseAgent, inputs: dict) -> dict:
|
|
59
61
|
"""
|
|
60
62
|
Runs the agent and returns a dictionary with the final output.
|
|
61
63
|
"""
|
|
62
64
|
result = await agent.invoke(user_input=inputs["user_input"])
|
|
63
65
|
messages = messages_to_list(result["messages"])
|
|
64
|
-
|
|
66
|
+
return_result = {"output": messages}
|
|
67
|
+
if "tool_config" in result:
|
|
68
|
+
return_result["tool_config"] = result["tool_config"]
|
|
69
|
+
return return_result
|
|
65
70
|
|
|
66
71
|
async def main(agent_name: str, dataset_path: str, evaluator_name: str):
|
|
67
72
|
"""
|
|
@@ -93,9 +98,10 @@ async def main(agent_name: str, dataset_path: str, evaluator_name: str):
|
|
|
93
98
|
for example in dataset_examples:
|
|
94
99
|
client.create_example(
|
|
95
100
|
inputs={"user_input": example["user_input"]},
|
|
96
|
-
outputs={
|
|
97
|
-
|
|
98
|
-
|
|
101
|
+
outputs={
|
|
102
|
+
"expected_output": example.get("expected_output", ""),
|
|
103
|
+
"required_tools": example.get("required_tools", {})
|
|
104
|
+
},
|
|
99
105
|
dataset_id=dataset.id,
|
|
100
106
|
)
|
|
101
107
|
except Exception:
|
|
@@ -73,7 +73,7 @@ class BaseAgent:
|
|
|
73
73
|
# Ignore intermeddite finish messages
|
|
74
74
|
if "finish_reason" in event.response_metadata:
|
|
75
75
|
# Got LLM finish reason ignore it
|
|
76
|
-
logger.
|
|
76
|
+
logger.error(f"Finish event: {event}, reason: {event.response_metadata['finish_reason']}, Metadata: {metadata}")
|
|
77
77
|
pass
|
|
78
78
|
else:
|
|
79
79
|
logger.debug(f"Event: {event}, Metadata: {metadata}")
|
|
@@ -89,7 +89,6 @@ class BaseAgent:
|
|
|
89
89
|
await self.ainit()
|
|
90
90
|
with self.cli.display_agent_response_streaming(self.name) as stream_updater:
|
|
91
91
|
async for event in self.stream(thread_id, user_input):
|
|
92
|
-
|
|
93
92
|
if isinstance(event.content, list):
|
|
94
93
|
thinking_content = "".join([c.get("thinking", "") for c in event.content])
|
|
95
94
|
stream_updater.update(thinking_content, type_="thinking")
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from langgraph.checkpoint.base import BaseCheckpointSaver
|
|
2
|
-
from universal_mcp.logger import logger
|
|
3
|
-
from universal_mcp.tools.registry import ToolRegistry
|
|
4
2
|
|
|
5
3
|
from universal_mcp.agents.base import BaseAgent
|
|
6
4
|
from universal_mcp.agents.llm import load_chat_model
|
|
7
|
-
from universal_mcp.
|
|
5
|
+
from universal_mcp.logger import logger
|
|
6
|
+
from universal_mcp.tools.registry import ToolRegistry
|
|
7
|
+
from universal_mcp.types import ToolConfig, ToolFormat
|
|
8
8
|
|
|
9
9
|
from .graph import build_graph
|
|
10
10
|
from .prompts import SYSTEM_PROMPT
|
|
@@ -18,6 +18,7 @@ class BigToolAgent2(BaseAgent):
|
|
|
18
18
|
model: str,
|
|
19
19
|
registry: ToolRegistry,
|
|
20
20
|
memory: BaseCheckpointSaver | None = None,
|
|
21
|
+
tools: ToolConfig | None = None,
|
|
21
22
|
**kwargs,
|
|
22
23
|
):
|
|
23
24
|
super().__init__(name, instructions, model, memory, **kwargs)
|
|
@@ -25,7 +26,10 @@ class BigToolAgent2(BaseAgent):
|
|
|
25
26
|
self.registry = registry
|
|
26
27
|
self.llm = load_chat_model(self.model)
|
|
27
28
|
self.recursion_limit = kwargs.get("recursion_limit", 10)
|
|
28
|
-
self.
|
|
29
|
+
self.tools = tools or {}
|
|
30
|
+
if "ui" not in self.tools:
|
|
31
|
+
# self.tools["ui"] = ["create_bar_chart", "create_line_chart", "create_pie_chart", "create_table", "http_get", "http_post", "http_put", "http_delete", "http_patch", "read_file"]
|
|
32
|
+
self.tools["ui"] = ["create_table"]
|
|
29
33
|
|
|
30
34
|
logger.info(
|
|
31
35
|
f"BigToolAgent '{self.name}' initialized with model '{self.model}'."
|
|
@@ -41,11 +45,12 @@ class BigToolAgent2(BaseAgent):
|
|
|
41
45
|
"""Build the bigtool agent graph using the existing create_agent function."""
|
|
42
46
|
logger.info(f"Building graph for BigToolAgent '{self.name}'...")
|
|
43
47
|
try:
|
|
48
|
+
default_tools = await self.registry.export_tools(self.tools, ToolFormat.LANGCHAIN)
|
|
44
49
|
graph_builder = build_graph(
|
|
45
50
|
tool_registry=self.registry,
|
|
46
51
|
llm=self.llm,
|
|
47
52
|
system_prompt=self._build_system_message(),
|
|
48
|
-
|
|
53
|
+
default_tools=default_tools,
|
|
49
54
|
)
|
|
50
55
|
|
|
51
56
|
compiled_graph = graph_builder.compile(checkpointer=self.memory)
|
{universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/graph.py
RENAMED
|
@@ -4,18 +4,18 @@ from typing import Literal, cast
|
|
|
4
4
|
|
|
5
5
|
from langchain_core.language_models import BaseChatModel
|
|
6
6
|
from langchain_core.messages import AIMessage, ToolMessage
|
|
7
|
-
from langchain_core.tools import tool
|
|
7
|
+
from langchain_core.tools import BaseTool, tool
|
|
8
8
|
from langgraph.graph import StateGraph
|
|
9
9
|
from langgraph.types import Command
|
|
10
|
+
|
|
11
|
+
from universal_mcp.agents.bigtool2.state import State
|
|
10
12
|
from universal_mcp.logger import logger
|
|
11
13
|
from universal_mcp.tools.registry import ToolRegistry
|
|
12
14
|
from universal_mcp.types import ToolFormat
|
|
13
15
|
|
|
14
|
-
from universal_mcp.agents.bigtool2.state import State
|
|
15
|
-
|
|
16
16
|
|
|
17
17
|
def build_graph(
|
|
18
|
-
tool_registry: ToolRegistry, llm: BaseChatModel, system_prompt: str,
|
|
18
|
+
tool_registry: ToolRegistry, llm: BaseChatModel, system_prompt: str, default_tools: list[BaseTool]
|
|
19
19
|
):
|
|
20
20
|
@tool
|
|
21
21
|
async def search_tools(queries: list[str]) -> str:
|
|
@@ -36,17 +36,18 @@ def build_graph(
|
|
|
36
36
|
]
|
|
37
37
|
app_tools = {}
|
|
38
38
|
for task_query in queries:
|
|
39
|
-
|
|
39
|
+
apps_list = await tool_registry.search_apps(task_query, limit=5)
|
|
40
|
+
tools_list = []
|
|
41
|
+
for app in apps_list:
|
|
42
|
+
tools_list.extend(await tool_registry.search_tools(task_query, limit=5, app_id=app["id"]))
|
|
40
43
|
tool_candidates = [
|
|
41
44
|
f"{tool['id']}: {tool['description']}" for tool in tools_list
|
|
42
45
|
]
|
|
43
46
|
for tool in tool_candidates:
|
|
44
47
|
app = tool.split("__")[0]
|
|
45
48
|
if app not in app_tools:
|
|
46
|
-
if len(app_tools.keys()) >= 10:
|
|
47
|
-
break
|
|
48
49
|
app_tools[app] = []
|
|
49
|
-
if len(app_tools[app]) <
|
|
50
|
+
if len(app_tools[app]) < 5:
|
|
50
51
|
app_tools[app].append(tool)
|
|
51
52
|
for app in app_tools:
|
|
52
53
|
app_status = "connected" if app in connected_apps else "NOT connected"
|
|
@@ -64,9 +65,19 @@ def build_graph(
|
|
|
64
65
|
|
|
65
66
|
@tool
|
|
66
67
|
async def load_tools(tool_ids: list[str]) -> list[str]:
|
|
67
|
-
"""Load the tools for the given tool ids. Returns the tool ids."""
|
|
68
|
+
"""Load the tools for the given tool ids. Returns the tool ids after loading them. Note that tool ids are the complete tool ids, with both the app name and the tool name separated by double underscore (__). e.g. google_mail__send_email"""
|
|
68
69
|
return tool_ids
|
|
69
70
|
|
|
71
|
+
@tool
|
|
72
|
+
async def web_search(query: str) -> str:
|
|
73
|
+
"""Search the web for the given query. Returns the search results."""
|
|
74
|
+
tool = await tool_registry.export_tools(
|
|
75
|
+
["exa__search"], ToolFormat.LANGCHAIN
|
|
76
|
+
)
|
|
77
|
+
response = await tool_registry.call_tool("exa__search", {"query": query, "contents": {"summary": True}})
|
|
78
|
+
return response
|
|
79
|
+
|
|
80
|
+
|
|
70
81
|
async def call_model(
|
|
71
82
|
state: State,
|
|
72
83
|
) -> Command[Literal["select_tools", "call_tools"]]:
|
|
@@ -91,8 +102,17 @@ def build_graph(
|
|
|
91
102
|
|
|
92
103
|
model = llm
|
|
93
104
|
|
|
105
|
+
tools = [search_tools, load_tools, web_search, *default_tools, *selected_tools]
|
|
106
|
+
# Remove duplicates based on tool name
|
|
107
|
+
seen_names = set()
|
|
108
|
+
unique_tools = []
|
|
109
|
+
for tool in tools:
|
|
110
|
+
if tool.name not in seen_names:
|
|
111
|
+
seen_names.add(tool.name)
|
|
112
|
+
unique_tools.append(tool)
|
|
113
|
+
tools = unique_tools
|
|
94
114
|
model_with_tools = model.bind_tools(
|
|
95
|
-
|
|
115
|
+
tools,
|
|
96
116
|
tool_choice="auto",
|
|
97
117
|
)
|
|
98
118
|
response = cast(AIMessage, await model_with_tools.ainvoke(messages))
|
|
@@ -124,6 +144,23 @@ def build_graph(
|
|
|
124
144
|
},
|
|
125
145
|
)
|
|
126
146
|
|
|
147
|
+
elif tool_call["name"] == web_search.name:
|
|
148
|
+
logger.info(f"Tool '{tool_call['name']}' is a web search tool. Proceeding to call.")
|
|
149
|
+
web_search_result = await web_search.ainvoke(input=tool_call["args"])
|
|
150
|
+
tool_msg = ToolMessage(
|
|
151
|
+
f"Web search result: {web_search_result}", tool_call_id=tool_call["id"]
|
|
152
|
+
)
|
|
153
|
+
return Command(goto="call_model", update={"messages": [response, tool_msg]})
|
|
154
|
+
|
|
155
|
+
elif "ui_tools" in tool_call["name"]:
|
|
156
|
+
logger.info(f"Tool '{tool_call['name']}' is a UI tool. Proceeding to call.")
|
|
157
|
+
ui_tool_result = await ui_tools_dict[tool_call["name"]].ainvoke(input=tool_call["args"])
|
|
158
|
+
tool_msg = ToolMessage(
|
|
159
|
+
f"UI tool result: {ui_tool_result}", tool_call_id=tool_call["id"]
|
|
160
|
+
)
|
|
161
|
+
return Command(goto="call_model", update={"messages": [response, tool_msg]})
|
|
162
|
+
|
|
163
|
+
|
|
127
164
|
elif tool_call["name"] not in state["selected_tool_ids"]:
|
|
128
165
|
try:
|
|
129
166
|
await tool_registry.export_tools(
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Default prompts used by the agent."""
|
|
2
|
+
|
|
3
|
+
SYSTEM_PROMPT = """You are a helpful AI assistant.
|
|
4
|
+
|
|
5
|
+
**Core Directives:**
|
|
6
|
+
1. **Always Use Tools for Tasks:** For any user request that requires an action (e.g., sending an email, searching for information, creating an event, displaying a chart), you MUST use a tool. Do not refuse a task if a tool might exist for it.
|
|
7
|
+
|
|
8
|
+
2. Check if your existing tools or knowledge can handle the user's request. If they can, use them. If they cannot, you must call the `search_tools` function to find the right tools for the user's request.You must not use the same/similar query multiple times in the list. The list should have multiple queries only if the task has clearly different sub-tasks. If you do not find any specific relevant tools, use the pre-loaded generic tools.
|
|
9
|
+
|
|
10
|
+
3. **Load Tools:** After looking at the output of `search_tools`, you MUST call the `load_tools` function to load only the tools you want to use. Provide the full tool ids, not just the app names. Use your judgement to eliminate irrelevant apps that came up just because of semantic similarity. However, sometimes, multiple apps might be relevant for the same task. Prefer connected apps over unconnected apps while breaking a tie. If more than one relevant app (or none of the relevant apps) are connected, you must ask the user to choose the app. In case the user asks you to use an app that is not connected, call the apps tools normally. The tool will return a link for connecting that you should pass on to the user.
|
|
11
|
+
|
|
12
|
+
4. **Strictly Follow the Process:** Your only job in your first turn is to analyze the user's request and answer using existing tools/knowledge or `search_tools` with a concise query describing the core task. Do not engage in conversation, or extend the conversation beyond the user's request.
|
|
13
|
+
|
|
14
|
+
{instructions}
|
|
15
|
+
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import UTC, datetime
|
|
3
|
-
from typing import Literal, TypedDict, cast
|
|
3
|
+
from typing import Any, Literal, TypedDict, cast
|
|
4
4
|
|
|
5
5
|
from langchain_anthropic import ChatAnthropic
|
|
6
6
|
from langchain_core.language_models import BaseChatModel
|
|
@@ -11,12 +11,11 @@ from langgraph.runtime import Runtime
|
|
|
11
11
|
from langgraph.types import Command
|
|
12
12
|
|
|
13
13
|
from universal_mcp.agents.bigtoolcache.context import Context
|
|
14
|
+
from universal_mcp.agents.bigtoolcache.prompts import TOOLS_LIST
|
|
14
15
|
from universal_mcp.agents.bigtoolcache.state import State
|
|
15
16
|
from universal_mcp.logger import logger
|
|
16
17
|
from universal_mcp.tools.registry import ToolRegistry
|
|
17
18
|
from universal_mcp.types import ToolFormat
|
|
18
|
-
from universal_mcp.agents.bigtoolcache.prompts import TOOLS_LIST
|
|
19
|
-
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class ToolSelectionOutput(TypedDict):
|
|
@@ -180,16 +179,15 @@ def build_graph(tool_registry: ToolRegistry, llm: BaseChatModel):
|
|
|
180
179
|
content=json.dumps(tool_result),
|
|
181
180
|
name=tool_id,
|
|
182
181
|
tool_call_id=tool_call["id"],
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
)
|
|
182
|
+
))
|
|
183
|
+
recent_tool_ids.append(tool_call["name"])
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.error(f"Error executing tool '{tool_call['name']}': {e}")
|
|
186
|
+
outputs.append(
|
|
187
|
+
ToolMessage(
|
|
188
|
+
content=json.dumps("Error: " + str(e)),
|
|
189
|
+
name=tool_call["name"],
|
|
190
|
+
tool_call_id=tool_call["id"],
|
|
193
191
|
)
|
|
194
192
|
)
|
|
195
193
|
return Command(
|
{universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/builder.py
RENAMED
|
@@ -14,6 +14,7 @@ from universal_mcp.types import ToolConfig
|
|
|
14
14
|
from universal_mcp.agents.base import BaseAgent
|
|
15
15
|
from universal_mcp.agents.llm import load_chat_model
|
|
16
16
|
from universal_mcp.agents.shared.tool_node import build_tool_node_graph
|
|
17
|
+
from universal_mcp.agents.utils import messages_to_list
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class Agent(BaseModel):
|
|
@@ -179,9 +180,13 @@ async def main():
|
|
|
179
180
|
model="gemini/gemini-1.5-pro",
|
|
180
181
|
registry=registry,
|
|
181
182
|
)
|
|
182
|
-
await agent.invoke(
|
|
183
|
+
result = await agent.invoke(
|
|
183
184
|
"Send a daily email to manoj@agentr.dev with daily agenda of the day",
|
|
184
185
|
)
|
|
186
|
+
from rich import print
|
|
187
|
+
print(messages_to_list(result["messages"]))
|
|
188
|
+
print(result["generated_agent"])
|
|
189
|
+
print(result["tool_config"])
|
|
185
190
|
|
|
186
191
|
|
|
187
192
|
if __name__ == "__main__":
|
|
@@ -8,7 +8,7 @@ from langchain_openai import AzureChatOpenAI
|
|
|
8
8
|
|
|
9
9
|
@lru_cache(maxsize=8)
|
|
10
10
|
def load_chat_model(
|
|
11
|
-
fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool =
|
|
11
|
+
fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool = False
|
|
12
12
|
) -> BaseChatModel:
|
|
13
13
|
"""Load a chat model from a fully specified name.
|
|
14
14
|
Args:
|
|
@@ -7,7 +7,7 @@ from universal_mcp.types import ToolConfig, ToolFormat
|
|
|
7
7
|
|
|
8
8
|
from universal_mcp.agents.base import BaseAgent
|
|
9
9
|
from universal_mcp.agents.llm import load_chat_model
|
|
10
|
-
from universal_mcp.agents.utils import
|
|
10
|
+
from universal_mcp.agents.utils import messages_to_list
|
|
11
11
|
|
|
12
12
|
DEVELOPER_PROMPT = """You are {name}.
|
|
13
13
|
|
|
@@ -38,8 +38,9 @@ class ReactAgent(BaseAgent):
|
|
|
38
38
|
):
|
|
39
39
|
super().__init__(name, instructions, model, memory, **kwargs)
|
|
40
40
|
self.llm = load_chat_model(model)
|
|
41
|
-
self.tools = tools
|
|
42
|
-
self.
|
|
41
|
+
self.tools = tools or {}
|
|
42
|
+
if "ui" not in self.tools:
|
|
43
|
+
self.tools["ui"] = ["create_bar_chart", "create_line_chart", "create_pie_chart", "create_table", "http_get", "http_post", "http_put", "http_delete", "http_patch", "read_file"]
|
|
43
44
|
self.max_iterations = max_iterations
|
|
44
45
|
self.registry = registry
|
|
45
46
|
|
|
@@ -48,13 +49,11 @@ class ReactAgent(BaseAgent):
|
|
|
48
49
|
if self.tools:
|
|
49
50
|
if not self.registry:
|
|
50
51
|
raise ValueError("Tools are configured but no registry is provided")
|
|
51
|
-
|
|
52
52
|
tools = await self.registry.export_tools(self.tools, ToolFormat.LANGCHAIN)
|
|
53
53
|
logger.debug(tools)
|
|
54
54
|
else:
|
|
55
55
|
tools = []
|
|
56
56
|
|
|
57
|
-
tools.extend(self.ui_tools)
|
|
58
57
|
|
|
59
58
|
logger.debug(f"Initialized ReactAgent: name={self.name}, model={self.model}")
|
|
60
59
|
return create_react_agent(
|
|
@@ -11,7 +11,6 @@ from rich.table import Table
|
|
|
11
11
|
from universal_mcp.tools.manager import ToolManager
|
|
12
12
|
from universal_mcp.types import ToolFormat
|
|
13
13
|
|
|
14
|
-
from universal_mcp.agents.ui_tools import UIToolsApp
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class RichCLI:
|
|
@@ -138,12 +137,3 @@ Available commands:
|
|
|
138
137
|
|
|
139
138
|
def messages_to_list(messages: list[BaseMessage]):
|
|
140
139
|
return [{"type": message.type, "content": message.content} for message in messages]
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def initialize_ui_tools() -> list:
|
|
144
|
-
"""
|
|
145
|
-
Initialize and return UI tools in a langchain compatible format.
|
|
146
|
-
"""
|
|
147
|
-
tool_manager = ToolManager(default_format=ToolFormat.LANGCHAIN)
|
|
148
|
-
tool_manager.register_tools_from_app(UIToolsApp())
|
|
149
|
-
return tool_manager.list_tools()
|
|
@@ -28,12 +28,12 @@ class ColumnDefinition(TypedDict):
|
|
|
28
28
|
type: Literal["string", "number", "date", "boolean"] | None
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class
|
|
31
|
+
class UiApp(BaseApplication):
|
|
32
32
|
"""An application for creating UI tools"""
|
|
33
33
|
|
|
34
|
-
def __init__(self):
|
|
34
|
+
def __init__(self, **kwargs):
|
|
35
35
|
"""Initialize the DefaultToolsApp"""
|
|
36
|
-
super().__init__(name="
|
|
36
|
+
super().__init__(name="ui")
|
|
37
37
|
self.markitdown = MarkItDown(enable_plugins=True)
|
|
38
38
|
|
|
39
39
|
def create_bar_chart(
|