universal-mcp-agents 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/PKG-INFO +1 -2
  2. universal_mcp_agents-0.1.6/builder_tools.py +34 -0
  3. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/bump_and_release.sh +1 -1
  4. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/pyproject.toml +2 -3
  5. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/tasks.jsonl +22 -22
  6. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/evaluators.py +24 -0
  7. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/run.py +11 -5
  8. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/base.py +1 -2
  9. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/__init__.py +10 -5
  10. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/agent.py +0 -1
  11. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/graph.py +47 -10
  12. universal_mcp_agents-0.1.6/src/universal_mcp/agents/bigtool2/prompts.py +15 -0
  13. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/agent.py +0 -1
  14. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/graph.py +11 -13
  15. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/builder.py +6 -1
  16. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/llm.py +1 -1
  17. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/react.py +4 -5
  18. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/utils.py +0 -10
  19. universal_mcp_agents-0.1.5/src/universal_mcp/agents/ui_tools.py → universal_mcp_agents-0.1.6/src/universal_mcp/applications/ui/app.py +3 -3
  20. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/uv.lock +60 -943
  21. universal_mcp_agents-0.1.5/src/universal_mcp/agents/autoagent/studio.py +0 -20
  22. universal_mcp_agents-0.1.5/src/universal_mcp/agents/bigtool2/prompts.py +0 -12
  23. universal_mcp_agents-0.1.5/src/universal_mcp/agents/tools.py +0 -40
  24. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/.gitignore +0 -0
  25. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/.pre-commit-config.yaml +0 -0
  26. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/GEMINI.md +0 -0
  27. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/PROMPTS.md +0 -0
  28. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/README.md +0 -0
  29. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/__init__.py +0 -0
  30. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/dataset.py +0 -0
  31. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/exact.jsonl +0 -0
  32. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/datasets/test.jsonl +0 -0
  33. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/evals/utils.py +0 -0
  34. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/tests/test_agents.py +0 -0
  35. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/__init__.py +0 -0
  36. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__init__.py +0 -0
  37. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/__main__.py +0 -0
  38. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/context.py +0 -0
  39. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/graph.py +0 -0
  40. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/prompts.py +0 -0
  41. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/state.py +0 -0
  42. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/autoagent/utils.py +0 -0
  43. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/__init__.py +0 -0
  44. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
  45. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/graph.py +0 -0
  46. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/prompts.py +0 -0
  47. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool/state.py +0 -0
  48. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/__main__.py +1 -1
  49. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtool2/state.py +0 -0
  50. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__init__.py +2 -2
  51. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/__main__.py +1 -1
  52. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/context.py +0 -0
  53. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/prompts.py +0 -0
  54. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/state.py +0 -0
  55. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_all.txt +0 -0
  56. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/bigtoolcache/tools_important.txt +0 -0
  57. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/cli.py +0 -0
  58. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/__init__.py +0 -0
  59. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/sandbox.py +0 -0
  60. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/test.py +0 -0
  61. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/codeact/utils.py +0 -0
  62. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/hil.py +0 -0
  63. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/__init__.py +0 -0
  64. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/__main__.py +0 -0
  65. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/graph.py +0 -0
  66. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/prompts.py +0 -0
  67. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/planner/state.py +0 -0
  68. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/shared/tool_node.py +0 -0
  69. {universal_mcp_agents-0.1.5 → universal_mcp_agents-0.1.6}/src/universal_mcp/agents/simple.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: universal-mcp-agents
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Add your description here
5
5
  Project-URL: Homepage, https://github.com/universal-mcp/applications
6
6
  Project-URL: Repository, https://github.com/universal-mcp/applications
@@ -11,7 +11,6 @@ Requires-Dist: langchain-anthropic>=0.3.19
11
11
  Requires-Dist: langchain-google-genai>=2.1.10
12
12
  Requires-Dist: langchain-openai>=0.3.32
13
13
  Requires-Dist: langgraph>=0.6.6
14
- Requires-Dist: universal-mcp-applications>=0.1.4
15
14
  Requires-Dist: universal-mcp>=0.1.24rc17
16
15
  Provides-Extra: dev
17
16
  Requires-Dist: pre-commit; extra == 'dev'
@@ -0,0 +1,34 @@
1
+ from universal_mcp.agentr.registry import AgentrRegistry
2
+ from universal_mcp.agents.builder import BuilderAgent
3
+ import json
4
+
5
+
6
+ def load_tasks():
7
+ with open("src/evals/datasets/tasks.jsonl", "r") as f:
8
+ for line in f:
9
+ yield json.loads(line)
10
+
11
+
12
+ async def main():
13
+ registry = AgentrRegistry()
14
+ builder = BuilderAgent(
15
+ name="Builder Agent",
16
+ instructions="You are a builder agent that creates other agents.",
17
+ model="gemini/gemini-1.5-pro",
18
+ registry=registry,
19
+ )
20
+ updated_tasks = []
21
+ tasks = load_tasks()
22
+ for task in tasks:
23
+ print(task["user_input"])
24
+ result = await builder.invoke(task["user_input"])
25
+ tools = result["tool_config"] or {}
26
+ updated_tasks.append({**task, "required_tools": tools})
27
+ with open("src/evals/datasets/tasks_with_tools.jsonl", "w") as f:
28
+ for task in updated_tasks:
29
+ f.write(json.dumps(task) + "\n")
30
+
31
+
32
+ if __name__ == "__main__":
33
+ import asyncio
34
+ asyncio.run(main())
@@ -9,7 +9,7 @@ uv sync --all-extras
9
9
 
10
10
  # Run tests with pytest
11
11
  echo "Running tests with pytest..."
12
- # uv run pytest # --cov=src --cov-report=term-missing
12
+ uv run pytest
13
13
 
14
14
  echo "Tests passed!"
15
15
 
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
6
6
 
7
7
  [project]
8
8
  name = "universal-mcp-agents"
9
- version = "0.1.5"
9
+ version = "0.1.6"
10
10
  description = "Add your description here"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -18,8 +18,7 @@ dependencies = [
18
18
  "langchain-google-genai>=2.1.10",
19
19
  "langchain-openai>=0.3.32",
20
20
  "langgraph>=0.6.6",
21
- "universal-mcp >= 0.1.24rc17",
22
- "universal-mcp-applications>=0.1.4",
21
+ "universal-mcp>=0.1.24rc17",
23
22
  ]
24
23
 
25
24
  [project.license]
@@ -1,22 +1,22 @@
1
- {"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1}
2
- {"user_input": "Show me events from today's Google Calendar.", "difficulty": 1}
3
- {"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4}
4
- {"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4}
5
- {"user_input": "Create a Google Sheet of the best cafes and restaurants near IIT Bombay", "difficulty": 3}
6
- {"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5}
7
- {"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2}
8
- {"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2}
9
- {"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1}
10
- {"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2}
11
- {"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4}
12
- {"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1}
13
- {"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3}
14
- {"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4}
15
- {"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4}
16
- {"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3}
17
- {"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4}
18
- {"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5}
19
- {"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5}
20
- {"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5}
21
- {"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4}
22
- {"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5}
1
+ {"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1, "required_tools": {"google_mail": ["send_email"]}}
2
+ {"user_input": "Show me events from today's Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["list_events"]}}
3
+ {"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["get_pull_request"], "google_docs": ["get_document"]}}
4
+ {"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["create_filters: Set up new Gmail filter with criteria and automated actions"], "google_docs": [], "tavily": []}}
5
+ {"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search"], "google_sheet": ["create_spreadsheet"]}}
6
+ {"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5, "required_tools": {"reddit": [], "google_docs": []}}
7
+ {"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2, "required_tools": {"perplexity": []}}
8
+ {"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2, "required_tools": {"google_mail": ["list_messages"]}}
9
+ {"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_today_events"]}}
10
+ {"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2, "required_tools": {"google_calendar": ["add_an_event"]}}
11
+ {"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4, "required_tools": {"google_calendar": ["get_event: Retrieves detailed information about a specific Google Calendar event by its ID"], "linkedin": ["get_your_info"]}}
12
+ {"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1, "required_tools": {"outlook": ["user_get_mail_folder"]}}
13
+ {"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3, "required_tools": {"google_mail": []}}
14
+ {"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4, "required_tools": {"google_mail": [], "clickup": []}}
15
+ {"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"serpapi": ["search"], "perplexity": [], "e2b": []}}
16
+ {"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3, "required_tools": {}}
17
+ {"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"serpapi": ["search"], "google_sheet": []}}
18
+ {"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5, "required_tools": {"serpapi": [], "google_docs": ["create_document"]}}
19
+ {"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_docs": []}}
20
+ {"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"linkedin": [], "scraper": ["linkedin_list_all_posts"], "google_sheet": [], "openai": []}}
21
+ {"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": [], "google_sheet": []}}
22
+ {"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_sheet": [], "google_calendar": []}}
@@ -3,6 +3,7 @@ from agentevals.trajectory.llm import (
3
3
  TRAJECTORY_ACCURACY_PROMPT,
4
4
  create_trajectory_llm_as_judge,
5
5
  )
6
+ from google.ai.generativelanguage_v1beta import ToolConfig
6
7
  from langsmith.evaluation import EvaluationResult, run_evaluator
7
8
  from langsmith.schemas import Example, Run
8
9
  from openevals.llm import create_llm_as_judge
@@ -51,3 +52,26 @@ trajectory_evaluator = create_trajectory_llm_as_judge(
51
52
  prompt=TRAJECTORY_ACCURACY_PROMPT,
52
53
  model="anthropic:claude-4-sonnet-20250514",
53
54
  )
55
+
56
+ @run_evaluator
57
+ def tool_node_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
58
+ """
59
+ A simple evaluator that checks if the agent used the required tools.
60
+ """
61
+ try:
62
+ if example is None or example.outputs is None or "required_tools" not in example.outputs:
63
+ return EvaluationResult(key="tool_node", score=0, comment="No required tools provided. Example: " + str(example))
64
+ required_tools : ToolConfig = example.outputs["required_tools"]
65
+ agent_response_raw : ToolConfig = run.outputs.get("tool_config", {})
66
+ # Flatten the tool_configs to a single set of tool_ids
67
+ required_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in required_tools.items() for tool_id in tools]
68
+ agent_tool_ids = [f"{app_id}___{tool_id}" for app_id, tools in agent_response_raw.items() for tool_id in tools]
69
+ if set(required_tool_ids).issubset(set(agent_tool_ids)):
70
+ return EvaluationResult(key="tool_node", score=1, comment="Tool usage: " + str(required_tools))
71
+ else:
72
+ return EvaluationResult(key="tool_node", score=0, comment="Tool usage: " + str(required_tools))
73
+ except Exception as e:
74
+ print(f"Error evaluating tool usage: {str(e)}")
75
+ print(run.outputs)
76
+ print(example.outputs)
77
+ return EvaluationResult(key="tool_node", score=0, comment=f"Error evaluating tool usage: {str(e)}")
@@ -12,6 +12,7 @@ from evals.evaluators import (
12
12
  correctness_evaluator,
13
13
  exact_match_evaluator,
14
14
  trajectory_evaluator,
15
+ tool_node_evaluator,
15
16
  )
16
17
  from universal_mcp.agents import get_agent
17
18
  from universal_mcp.agents.base import BaseAgent
@@ -39,6 +40,7 @@ EVALUATORS: dict[str, Any] = {
39
40
  "llm_as_judge": correctness_evaluator,
40
41
  "exact_match": exact_match_evaluator,
41
42
  "trajectory": trajectory_evaluator,
43
+ "tool_node": tool_node_evaluator,
42
44
  }
43
45
 
44
46
 
@@ -55,13 +57,16 @@ def get_evaluator(evaluator_name: str) -> RunEvaluator:
55
57
 
56
58
 
57
59
 
58
- async def agent_runner(agent: BaseAgent, inputs: dict):
60
+ async def agent_runner(agent: BaseAgent, inputs: dict) -> dict:
59
61
  """
60
62
  Runs the agent and returns a dictionary with the final output.
61
63
  """
62
64
  result = await agent.invoke(user_input=inputs["user_input"])
63
65
  messages = messages_to_list(result["messages"])
64
- return {"output": messages}
66
+ return_result = {"output": messages}
67
+ if "tool_config" in result:
68
+ return_result["tool_config"] = result["tool_config"]
69
+ return return_result
65
70
 
66
71
  async def main(agent_name: str, dataset_path: str, evaluator_name: str):
67
72
  """
@@ -93,9 +98,10 @@ async def main(agent_name: str, dataset_path: str, evaluator_name: str):
93
98
  for example in dataset_examples:
94
99
  client.create_example(
95
100
  inputs={"user_input": example["user_input"]},
96
- outputs={"expected_output": example.get("expected_output")}
97
- if "expected_output" in example
98
- else None,
101
+ outputs={
102
+ "expected_output": example.get("expected_output", ""),
103
+ "required_tools": example.get("required_tools", {})
104
+ },
99
105
  dataset_id=dataset.id,
100
106
  )
101
107
  except Exception:
@@ -73,7 +73,7 @@ class BaseAgent:
73
73
  # Ignore intermeddite finish messages
74
74
  if "finish_reason" in event.response_metadata:
75
75
  # Got LLM finish reason ignore it
76
- logger.debug(f"Finish event: {event}, Metadata: {metadata}")
76
+ logger.error(f"Finish event: {event}, reason: {event.response_metadata['finish_reason']}, Metadata: {metadata}")
77
77
  pass
78
78
  else:
79
79
  logger.debug(f"Event: {event}, Metadata: {metadata}")
@@ -89,7 +89,6 @@ class BaseAgent:
89
89
  await self.ainit()
90
90
  with self.cli.display_agent_response_streaming(self.name) as stream_updater:
91
91
  async for event in self.stream(thread_id, user_input):
92
-
93
92
  if isinstance(event.content, list):
94
93
  thinking_content = "".join([c.get("thinking", "") for c in event.content])
95
94
  stream_updater.update(thinking_content, type_="thinking")
@@ -1,10 +1,10 @@
1
1
  from langgraph.checkpoint.base import BaseCheckpointSaver
2
- from universal_mcp.logger import logger
3
- from universal_mcp.tools.registry import ToolRegistry
4
2
 
5
3
  from universal_mcp.agents.base import BaseAgent
6
4
  from universal_mcp.agents.llm import load_chat_model
7
- from universal_mcp.agents.utils import initialize_ui_tools
5
+ from universal_mcp.logger import logger
6
+ from universal_mcp.tools.registry import ToolRegistry
7
+ from universal_mcp.types import ToolConfig, ToolFormat
8
8
 
9
9
  from .graph import build_graph
10
10
  from .prompts import SYSTEM_PROMPT
@@ -18,6 +18,7 @@ class BigToolAgent2(BaseAgent):
18
18
  model: str,
19
19
  registry: ToolRegistry,
20
20
  memory: BaseCheckpointSaver | None = None,
21
+ tools: ToolConfig | None = None,
21
22
  **kwargs,
22
23
  ):
23
24
  super().__init__(name, instructions, model, memory, **kwargs)
@@ -25,7 +26,10 @@ class BigToolAgent2(BaseAgent):
25
26
  self.registry = registry
26
27
  self.llm = load_chat_model(self.model)
27
28
  self.recursion_limit = kwargs.get("recursion_limit", 10)
28
- self.ui_tools = initialize_ui_tools()
29
+ self.tools = tools or {}
30
+ if "ui" not in self.tools:
31
+ # self.tools["ui"] = ["create_bar_chart", "create_line_chart", "create_pie_chart", "create_table", "http_get", "http_post", "http_put", "http_delete", "http_patch", "read_file"]
32
+ self.tools["ui"] = ["create_table"]
29
33
 
30
34
  logger.info(
31
35
  f"BigToolAgent '{self.name}' initialized with model '{self.model}'."
@@ -41,11 +45,12 @@ class BigToolAgent2(BaseAgent):
41
45
  """Build the bigtool agent graph using the existing create_agent function."""
42
46
  logger.info(f"Building graph for BigToolAgent '{self.name}'...")
43
47
  try:
48
+ default_tools = await self.registry.export_tools(self.tools, ToolFormat.LANGCHAIN)
44
49
  graph_builder = build_graph(
45
50
  tool_registry=self.registry,
46
51
  llm=self.llm,
47
52
  system_prompt=self._build_system_message(),
48
- ui_tools=self.ui_tools,
53
+ default_tools=default_tools,
49
54
  )
50
55
 
51
56
  compiled_graph = graph_builder.compile(checkpointer=self.memory)
@@ -1,5 +1,4 @@
1
1
  from universal_mcp.agentr.registry import AgentrRegistry
2
-
3
2
  from universal_mcp.agents.bigtool2 import BigToolAgent2
4
3
 
5
4
 
@@ -4,18 +4,18 @@ from typing import Literal, cast
4
4
 
5
5
  from langchain_core.language_models import BaseChatModel
6
6
  from langchain_core.messages import AIMessage, ToolMessage
7
- from langchain_core.tools import tool
7
+ from langchain_core.tools import BaseTool, tool
8
8
  from langgraph.graph import StateGraph
9
9
  from langgraph.types import Command
10
+
11
+ from universal_mcp.agents.bigtool2.state import State
10
12
  from universal_mcp.logger import logger
11
13
  from universal_mcp.tools.registry import ToolRegistry
12
14
  from universal_mcp.types import ToolFormat
13
15
 
14
- from universal_mcp.agents.bigtool2.state import State
15
-
16
16
 
17
17
  def build_graph(
18
- tool_registry: ToolRegistry, llm: BaseChatModel, system_prompt: str, ui_tools: list
18
+ tool_registry: ToolRegistry, llm: BaseChatModel, system_prompt: str, default_tools: list[BaseTool]
19
19
  ):
20
20
  @tool
21
21
  async def search_tools(queries: list[str]) -> str:
@@ -36,17 +36,18 @@ def build_graph(
36
36
  ]
37
37
  app_tools = {}
38
38
  for task_query in queries:
39
- tools_list = await tool_registry.search_tools(task_query, limit=40)
39
+ apps_list = await tool_registry.search_apps(task_query, limit=5)
40
+ tools_list = []
41
+ for app in apps_list:
42
+ tools_list.extend(await tool_registry.search_tools(task_query, limit=5, app_id=app["id"]))
40
43
  tool_candidates = [
41
44
  f"{tool['id']}: {tool['description']}" for tool in tools_list
42
45
  ]
43
46
  for tool in tool_candidates:
44
47
  app = tool.split("__")[0]
45
48
  if app not in app_tools:
46
- if len(app_tools.keys()) >= 10:
47
- break
48
49
  app_tools[app] = []
49
- if len(app_tools[app]) < 3:
50
+ if len(app_tools[app]) < 5:
50
51
  app_tools[app].append(tool)
51
52
  for app in app_tools:
52
53
  app_status = "connected" if app in connected_apps else "NOT connected"
@@ -64,9 +65,19 @@ def build_graph(
64
65
 
65
66
  @tool
66
67
  async def load_tools(tool_ids: list[str]) -> list[str]:
67
- """Load the tools for the given tool ids. Returns the tool ids."""
68
+ """Load the tools for the given tool ids. Returns the tool ids after loading them. Note that tool ids are the complete tool ids, with both the app name and the tool name separated by double underscore (__). e.g. google_mail__send_email"""
68
69
  return tool_ids
69
70
 
71
+ @tool
72
+ async def web_search(query: str) -> str:
73
+ """Search the web for the given query. Returns the search results."""
74
+ tool = await tool_registry.export_tools(
75
+ ["exa__search"], ToolFormat.LANGCHAIN
76
+ )
77
+ response = await tool_registry.call_tool("exa__search", {"query": query, "contents": {"summary": True}})
78
+ return response
79
+
80
+
70
81
  async def call_model(
71
82
  state: State,
72
83
  ) -> Command[Literal["select_tools", "call_tools"]]:
@@ -91,8 +102,17 @@ def build_graph(
91
102
 
92
103
  model = llm
93
104
 
105
+ tools = [search_tools, load_tools, web_search, *default_tools, *selected_tools]
106
+ # Remove duplicates based on tool name
107
+ seen_names = set()
108
+ unique_tools = []
109
+ for tool in tools:
110
+ if tool.name not in seen_names:
111
+ seen_names.add(tool.name)
112
+ unique_tools.append(tool)
113
+ tools = unique_tools
94
114
  model_with_tools = model.bind_tools(
95
- [search_tools, load_tools, *selected_tools, *ui_tools],
115
+ tools,
96
116
  tool_choice="auto",
97
117
  )
98
118
  response = cast(AIMessage, await model_with_tools.ainvoke(messages))
@@ -124,6 +144,23 @@ def build_graph(
124
144
  },
125
145
  )
126
146
 
147
+ elif tool_call["name"] == web_search.name:
148
+ logger.info(f"Tool '{tool_call['name']}' is a web search tool. Proceeding to call.")
149
+ web_search_result = await web_search.ainvoke(input=tool_call["args"])
150
+ tool_msg = ToolMessage(
151
+ f"Web search result: {web_search_result}", tool_call_id=tool_call["id"]
152
+ )
153
+ return Command(goto="call_model", update={"messages": [response, tool_msg]})
154
+
155
+ elif "ui_tools" in tool_call["name"]:
156
+ logger.info(f"Tool '{tool_call['name']}' is a UI tool. Proceeding to call.")
157
+ ui_tool_result = await ui_tools_dict[tool_call["name"]].ainvoke(input=tool_call["args"])
158
+ tool_msg = ToolMessage(
159
+ f"UI tool result: {ui_tool_result}", tool_call_id=tool_call["id"]
160
+ )
161
+ return Command(goto="call_model", update={"messages": [response, tool_msg]})
162
+
163
+
127
164
  elif tool_call["name"] not in state["selected_tool_ids"]:
128
165
  try:
129
166
  await tool_registry.export_tools(
@@ -0,0 +1,15 @@
1
+ """Default prompts used by the agent."""
2
+
3
+ SYSTEM_PROMPT = """You are a helpful AI assistant.
4
+
5
+ **Core Directives:**
6
+ 1. **Always Use Tools for Tasks:** For any user request that requires an action (e.g., sending an email, searching for information, creating an event, displaying a chart), you MUST use a tool. Do not refuse a task if a tool might exist for it.
7
+
8
+ 2. Check if your existing tools or knowledge can handle the user's request. If they can, use them. If they cannot, you must call the `search_tools` function to find the right tools for the user's request.You must not use the same/similar query multiple times in the list. The list should have multiple queries only if the task has clearly different sub-tasks. If you do not find any specific relevant tools, use the pre-loaded generic tools.
9
+
10
+ 3. **Load Tools:** After looking at the output of `search_tools`, you MUST call the `load_tools` function to load only the tools you want to use. Provide the full tool ids, not just the app names. Use your judgement to eliminate irrelevant apps that came up just because of semantic similarity. However, sometimes, multiple apps might be relevant for the same task. Prefer connected apps over unconnected apps while breaking a tie. If more than one relevant app (or none of the relevant apps) are connected, you must ask the user to choose the app. In case the user asks you to use an app that is not connected, call the apps tools normally. The tool will return a link for connecting that you should pass on to the user.
11
+
12
+ 4. **Strictly Follow the Process:** Your only job in your first turn is to analyze the user's request and answer using existing tools/knowledge or `search_tools` with a concise query describing the core task. Do not engage in conversation, or extend the conversation beyond the user's request.
13
+
14
+ {instructions}
15
+ """
@@ -1,5 +1,4 @@
1
1
  from universal_mcp.agentr.registry import AgentrRegistry
2
-
3
2
  from universal_mcp.agents.bigtoolcache import BigToolAgentCache
4
3
 
5
4
 
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  from datetime import UTC, datetime
3
- from typing import Literal, TypedDict, cast
3
+ from typing import Any, Literal, TypedDict, cast
4
4
 
5
5
  from langchain_anthropic import ChatAnthropic
6
6
  from langchain_core.language_models import BaseChatModel
@@ -11,12 +11,11 @@ from langgraph.runtime import Runtime
11
11
  from langgraph.types import Command
12
12
 
13
13
  from universal_mcp.agents.bigtoolcache.context import Context
14
+ from universal_mcp.agents.bigtoolcache.prompts import TOOLS_LIST
14
15
  from universal_mcp.agents.bigtoolcache.state import State
15
16
  from universal_mcp.logger import logger
16
17
  from universal_mcp.tools.registry import ToolRegistry
17
18
  from universal_mcp.types import ToolFormat
18
- from universal_mcp.agents.bigtoolcache.prompts import TOOLS_LIST
19
-
20
19
 
21
20
 
22
21
  class ToolSelectionOutput(TypedDict):
@@ -180,16 +179,15 @@ def build_graph(tool_registry: ToolRegistry, llm: BaseChatModel):
180
179
  content=json.dumps(tool_result),
181
180
  name=tool_id,
182
181
  tool_call_id=tool_call["id"],
183
- )
184
- recent_tool_ids.append(tool_call["name"])
185
- except Exception as e:
186
- logger.error(f"Error executing tool '{tool_call['name']}': {e}")
187
- outputs.append(
188
- ToolMessage(
189
- content=json.dumps("Error: " + str(e)),
190
- name=tool_call["name"],
191
- tool_call_id=tool_call["id"],
192
- )
182
+ ))
183
+ recent_tool_ids.append(tool_call["name"])
184
+ except Exception as e:
185
+ logger.error(f"Error executing tool '{tool_call['name']}': {e}")
186
+ outputs.append(
187
+ ToolMessage(
188
+ content=json.dumps("Error: " + str(e)),
189
+ name=tool_call["name"],
190
+ tool_call_id=tool_call["id"],
193
191
  )
194
192
  )
195
193
  return Command(
@@ -14,6 +14,7 @@ from universal_mcp.types import ToolConfig
14
14
  from universal_mcp.agents.base import BaseAgent
15
15
  from universal_mcp.agents.llm import load_chat_model
16
16
  from universal_mcp.agents.shared.tool_node import build_tool_node_graph
17
+ from universal_mcp.agents.utils import messages_to_list
17
18
 
18
19
 
19
20
  class Agent(BaseModel):
@@ -179,9 +180,13 @@ async def main():
179
180
  model="gemini/gemini-1.5-pro",
180
181
  registry=registry,
181
182
  )
182
- await agent.invoke(
183
+ result = await agent.invoke(
183
184
  "Send a daily email to manoj@agentr.dev with daily agenda of the day",
184
185
  )
186
+ from rich import print
187
+ print(messages_to_list(result["messages"]))
188
+ print(result["generated_agent"])
189
+ print(result["tool_config"])
185
190
 
186
191
 
187
192
  if __name__ == "__main__":
@@ -8,7 +8,7 @@ from langchain_openai import AzureChatOpenAI
8
8
 
9
9
  @lru_cache(maxsize=8)
10
10
  def load_chat_model(
11
- fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool = True
11
+ fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool = False
12
12
  ) -> BaseChatModel:
13
13
  """Load a chat model from a fully specified name.
14
14
  Args:
@@ -7,7 +7,7 @@ from universal_mcp.types import ToolConfig, ToolFormat
7
7
 
8
8
  from universal_mcp.agents.base import BaseAgent
9
9
  from universal_mcp.agents.llm import load_chat_model
10
- from universal_mcp.agents.utils import initialize_ui_tools, messages_to_list
10
+ from universal_mcp.agents.utils import messages_to_list
11
11
 
12
12
  DEVELOPER_PROMPT = """You are {name}.
13
13
 
@@ -38,8 +38,9 @@ class ReactAgent(BaseAgent):
38
38
  ):
39
39
  super().__init__(name, instructions, model, memory, **kwargs)
40
40
  self.llm = load_chat_model(model)
41
- self.tools = tools
42
- self.ui_tools = initialize_ui_tools()
41
+ self.tools = tools or {}
42
+ if "ui" not in self.tools:
43
+ self.tools["ui"] = ["create_bar_chart", "create_line_chart", "create_pie_chart", "create_table", "http_get", "http_post", "http_put", "http_delete", "http_patch", "read_file"]
43
44
  self.max_iterations = max_iterations
44
45
  self.registry = registry
45
46
 
@@ -48,13 +49,11 @@ class ReactAgent(BaseAgent):
48
49
  if self.tools:
49
50
  if not self.registry:
50
51
  raise ValueError("Tools are configured but no registry is provided")
51
-
52
52
  tools = await self.registry.export_tools(self.tools, ToolFormat.LANGCHAIN)
53
53
  logger.debug(tools)
54
54
  else:
55
55
  tools = []
56
56
 
57
- tools.extend(self.ui_tools)
58
57
 
59
58
  logger.debug(f"Initialized ReactAgent: name={self.name}, model={self.model}")
60
59
  return create_react_agent(
@@ -11,7 +11,6 @@ from rich.table import Table
11
11
  from universal_mcp.tools.manager import ToolManager
12
12
  from universal_mcp.types import ToolFormat
13
13
 
14
- from universal_mcp.agents.ui_tools import UIToolsApp
15
14
 
16
15
 
17
16
  class RichCLI:
@@ -138,12 +137,3 @@ Available commands:
138
137
 
139
138
  def messages_to_list(messages: list[BaseMessage]):
140
139
  return [{"type": message.type, "content": message.content} for message in messages]
141
-
142
-
143
- def initialize_ui_tools() -> list:
144
- """
145
- Initialize and return UI tools in a langchain compatible format.
146
- """
147
- tool_manager = ToolManager(default_format=ToolFormat.LANGCHAIN)
148
- tool_manager.register_tools_from_app(UIToolsApp())
149
- return tool_manager.list_tools()
@@ -28,12 +28,12 @@ class ColumnDefinition(TypedDict):
28
28
  type: Literal["string", "number", "date", "boolean"] | None
29
29
 
30
30
 
31
- class UIToolsApp(BaseApplication):
31
+ class UiApp(BaseApplication):
32
32
  """An application for creating UI tools"""
33
33
 
34
- def __init__(self):
34
+ def __init__(self, **kwargs):
35
35
  """Initialize the DefaultToolsApp"""
36
- super().__init__(name="ui_tools")
36
+ super().__init__(name="ui")
37
37
  self.markitdown = MarkItDown(enable_plugins=True)
38
38
 
39
39
  def create_bar_chart(