universal-mcp-agents 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of universal-mcp-agents might be problematic. Click here for more details.

Files changed (68) hide show
  1. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/PKG-INFO +3 -2
  2. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/pyproject.toml +3 -2
  3. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/datasets/tasks.jsonl +20 -20
  4. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/run.py +2 -1
  5. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/tests/test_agents.py +72 -101
  6. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/graph.py +51 -8
  7. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/builder.py +29 -8
  8. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/llm.py +1 -1
  9. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/planner/graph.py +1 -1
  10. universal_mcp_agents-0.1.8/src/universal_mcp/agents/shared/prompts.py +118 -0
  11. universal_mcp_agents-0.1.8/src/universal_mcp/agents/shared/tool_node.py +245 -0
  12. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/applications/ui/app.py +1 -1
  13. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/uv.lock +902 -3
  14. universal_mcp_agents-0.1.6/builder_tools.py +0 -34
  15. universal_mcp_agents-0.1.6/src/universal_mcp/agents/shared/tool_node.py +0 -251
  16. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/.gitignore +0 -0
  17. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/.pre-commit-config.yaml +0 -0
  18. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/GEMINI.md +0 -0
  19. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/PROMPTS.md +0 -0
  20. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/README.md +0 -0
  21. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/bump_and_release.sh +0 -0
  22. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/__init__.py +0 -0
  23. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/dataset.py +0 -0
  24. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/datasets/exact.jsonl +0 -0
  25. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/datasets/test.jsonl +0 -0
  26. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/evaluators.py +0 -0
  27. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/evals/utils.py +0 -0
  28. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/__init__.py +0 -0
  29. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/__init__.py +0 -0
  30. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/__main__.py +0 -0
  31. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/context.py +0 -0
  32. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/graph.py +0 -0
  33. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/prompts.py +0 -0
  34. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/state.py +0 -0
  35. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/autoagent/utils.py +0 -0
  36. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/base.py +0 -0
  37. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool/__init__.py +0 -0
  38. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
  39. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool/graph.py +0 -0
  40. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool/prompts.py +0 -0
  41. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool/state.py +0 -0
  42. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/__init__.py +0 -0
  43. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/__main__.py +0 -0
  44. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/agent.py +0 -0
  45. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/prompts.py +0 -0
  46. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtool2/state.py +0 -0
  47. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/__init__.py +0 -0
  48. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/__main__.py +0 -0
  49. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/agent.py +0 -0
  50. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/context.py +0 -0
  51. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/graph.py +0 -0
  52. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/prompts.py +0 -0
  53. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/state.py +0 -0
  54. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/tools_all.txt +0 -0
  55. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/bigtoolcache/tools_important.txt +0 -0
  56. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/cli.py +0 -0
  57. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/codeact/__init__.py +0 -0
  58. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/codeact/sandbox.py +0 -0
  59. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/codeact/test.py +0 -0
  60. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/codeact/utils.py +0 -0
  61. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/hil.py +0 -0
  62. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/planner/__init__.py +0 -0
  63. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/planner/__main__.py +0 -0
  64. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/planner/prompts.py +0 -0
  65. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/planner/state.py +0 -0
  66. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/react.py +0 -0
  67. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/simple.py +0 -0
  68. {universal_mcp_agents-0.1.6 → universal_mcp_agents-0.1.8}/src/universal_mcp/agents/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: universal-mcp-agents
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Add your description here
5
5
  Project-URL: Homepage, https://github.com/universal-mcp/applications
6
6
  Project-URL: Repository, https://github.com/universal-mcp/applications
@@ -11,7 +11,8 @@ Requires-Dist: langchain-anthropic>=0.3.19
11
11
  Requires-Dist: langchain-google-genai>=2.1.10
12
12
  Requires-Dist: langchain-openai>=0.3.32
13
13
  Requires-Dist: langgraph>=0.6.6
14
- Requires-Dist: universal-mcp>=0.1.24rc17
14
+ Requires-Dist: universal-mcp-applications>=0.1.14
15
+ Requires-Dist: universal-mcp>=0.1.24rc21
15
16
  Provides-Extra: dev
16
17
  Requires-Dist: pre-commit; extra == 'dev'
17
18
  Requires-Dist: ruff; extra == 'dev'
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
6
6
 
7
7
  [project]
8
8
  name = "universal-mcp-agents"
9
- version = "0.1.6"
9
+ version = "0.1.8"
10
10
  description = "Add your description here"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -18,7 +18,8 @@ dependencies = [
18
18
  "langchain-google-genai>=2.1.10",
19
19
  "langchain-openai>=0.3.32",
20
20
  "langgraph>=0.6.6",
21
- "universal-mcp>=0.1.24rc17",
21
+ "universal-mcp>=0.1.24rc21",
22
+ "universal-mcp-applications>=0.1.14",
22
23
  ]
23
24
 
24
25
  [project.license]
@@ -1,22 +1,22 @@
1
1
  {"user_input": "Send an email to manoj@agentr.dev from my Gmail account", "difficulty": 1, "required_tools": {"google_mail": ["send_email"]}}
2
- {"user_input": "Show me events from today's Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["list_events"]}}
3
- {"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["get_pull_request"], "google_docs": ["get_document"]}}
4
- {"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["create_filters: Set up new Gmail filter with criteria and automated actions"], "google_docs": [], "tavily": []}}
5
- {"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search"], "google_sheet": ["create_spreadsheet"]}}
6
- {"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5, "required_tools": {"reddit": [], "google_docs": []}}
7
- {"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2, "required_tools": {"perplexity": []}}
2
+ {"user_input": "Show me events from today's Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_upcoming_events"]}}
3
+ {"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["list_pull_requests", "list_recent_commits"], "google_docs": ["create_document", "insert_text", "apply_text_style"]}}
4
+ {"user_input": "Summarize the key insights from all marketing emails received this week from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["list_messages"], "google_docs": ["create_document"]}}
5
+ {"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search_with_filters"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet", "add_table"]}}
6
+ {"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 5, "required_tools": {"reddit": ["get_subreddit_posts", "get_subreddit_top_posts"], "google_docs": ["create_document", "insert_text", "apply_text_style"]}}
7
+ {"user_input": "Find the best restaurants in Goa using perplexity web search", "difficulty": 2, "required_tools": {"perplexity": ["answer_with_search"]}}
8
8
  {"user_input": "List the unread emails from the last 24 hours from my Gmail, sorted by sender.", "difficulty": 2, "required_tools": {"google_mail": ["list_messages"]}}
9
- {"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_today_events"]}}
10
- {"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2, "required_tools": {"google_calendar": ["add_an_event"]}}
11
- {"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4, "required_tools": {"google_calendar": ["get_event: Retrieves detailed information about a specific Google Calendar event by its ID"], "linkedin": ["get_your_info"]}}
12
- {"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1, "required_tools": {"outlook": ["user_get_mail_folder"]}}
13
- {"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3, "required_tools": {"google_mail": []}}
14
- {"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4, "required_tools": {"google_mail": [], "clickup": []}}
15
- {"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"serpapi": ["search"], "perplexity": [], "e2b": []}}
16
- {"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3, "required_tools": {}}
17
- {"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"serpapi": ["search"], "google_sheet": []}}
18
- {"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5, "required_tools": {"serpapi": [], "google_docs": ["create_document"]}}
19
- {"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_docs": []}}
20
- {"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"linkedin": [], "scraper": ["linkedin_list_all_posts"], "google_sheet": [], "openai": []}}
21
- {"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": [], "google_sheet": []}}
22
- {"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"serpapi": ["search"], "google_sheet": [], "google_calendar": []}}
9
+ {"user_input": "Tell me how many meetings I have tomorrow and when they start from my Google Calendar.", "difficulty": 1, "required_tools": {"google_calendar": ["get_upcoming_events", "list_events"]}}
10
+ {"user_input": "Create a meeting with aditakarsh@example.com on the topic of the latest trends in AI at 8PM today using Google Calendar.", "difficulty": 2, "required_tools": {"google_calendar": ["create_event", "create_event_from_text"]}}
11
+ {"user_input": "What are the topics of my meetings today from Google Calendar and who are the attendees? Give a 1-line context for each attendee using LinkedIn or web search.", "difficulty": 4, "required_tools": {"google_calendar": ["get_upcoming_events", "list_events"], "scraper": ["linkedin_retrieve_profile"]}}
12
+ {"user_input": "Fetch my last inbox mail from Microsoft Outlook", "difficulty": 1, "required_tools": {"outlook": ["list_user_messages"]}}
13
+ {"user_input": "Fetch unsubscribe links from my Gmail inbox for promo emails I have received in the last 7 days", "difficulty": 3, "required_tools": {"google_mail": ["list_messages"]}}
14
+ {"user_input": "Fetch all unread emails from Gmail and new tickets from ClickUp for me from last night", "difficulty": 4, "required_tools": {"google_mail": ["list_messages"], "clickup": ["tasks_get_list_tasks", "tasks_filter_team_tasks"]}}
15
+ {"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"tavily": ["search_and_summarize"]}}
16
+ {"user_input": "Create a weekly expense report from my credit card transactions and categorize spending by type (food, transport, entertainment, etc.) in a Google Sheet", "difficulty": 3, "required_tools": {"google_sheet" : ["create_spreadsheet", "add_table"]}}
17
+ {"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"tavily": ["search_and_summarize"], "google_sheet": ["create_spreadsheet", "add_table"]}}
18
+ {"user_input": "Research the top 10 Y Combinator startups from the latest batch using web search and create a report on their industries and funding status in Google Docs", "difficulty": 5, "required_tools": {"tavily": ["search_and_summarize"], "google_docs": ["create_document", "insert_text", "insert_table"]}}
19
+ {"user_input": "Find and summarize the key takeaways from the latest earnings calls of FAANG companies using web search and create a report in Google Docs", "difficulty": 5, "required_tools": {"tavily": ["search_and_summarize"], "google_docs": ["create_document", "insert_text", "insert_table"]}}
20
+ {"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"scraper": ["linkedin_retrieve_profile", "linkedin_list_profile_posts"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet"]}}
21
+ {"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": ["get_user_mentions", "get_dm_events_by_conversation_id"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet", "set_basic_filter"]}}
22
+ {"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"tavily": ["search_and_summarize"], "google_sheet": ["get_values", "batch_get_values_by_range", "get_spreadsheet_metadata" , "create_spreadsheet", "add_sheet", "add_table"]}}
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import asyncio
3
+ from datetime import datetime
3
4
  from typing import Any
4
5
 
5
6
  from langsmith import Client, aevaluate
@@ -86,7 +87,7 @@ async def main(agent_name: str, dataset_path: str, evaluator_name: str):
86
87
 
87
88
  # 3. Upload dataset to LangSmith for the evaluation run
88
89
  client = Client()
89
- dataset_name = dataset_path.split("/")[-1].split(".")[0]
90
+ dataset_name = f"{dataset_path.split('/')[-1].split('.')[0]}"
90
91
  # dataset_name = f"{agent_name}-{evaluator_name}-eval-dataset"
91
92
  try:
92
93
  # If dataset with same name and examples exists, read it.
@@ -1,4 +1,4 @@
1
- from typing import Any
1
+ from typing import Any, Dict, List
2
2
 
3
3
  import pytest
4
4
  from langchain_core.messages import HumanMessage
@@ -20,8 +20,8 @@ class MockToolRegistry(ToolRegistry):
20
20
  """Initialize the MockToolRegistry."""
21
21
  self._apps = [
22
22
  {
23
- "id": "google-mail",
24
- "name": "google-mail",
23
+ "id": "google_mail",
24
+ "name": "google_mail",
25
25
  "description": "Send and manage emails.",
26
26
  },
27
27
  {
@@ -30,8 +30,8 @@ class MockToolRegistry(ToolRegistry):
30
30
  "description": "Team communication and messaging.",
31
31
  },
32
32
  {
33
- "id": "google-calendar",
34
- "name": "google-calendar",
33
+ "id": "google_calendar",
34
+ "name": "google_calendar",
35
35
  "description": "Schedule and manage calendar events.",
36
36
  },
37
37
  {
@@ -45,9 +45,9 @@ class MockToolRegistry(ToolRegistry):
45
45
  "description": "Code hosting, version control, and collaboration.",
46
46
  },
47
47
  ]
48
- self._connected_apps = ["google-mail", "google-calendar", "github"]
48
+ self._connected_apps = ["google_mail", "google_calendar", "github"]
49
49
  self._tools = {
50
- "google-mail": [
50
+ "google_mail": [
51
51
  {
52
52
  "id": "send_email",
53
53
  "name": "send_email",
@@ -76,7 +76,7 @@ class MockToolRegistry(ToolRegistry):
76
76
  "description": "Read messages from a channel.",
77
77
  },
78
78
  ],
79
- "google-calendar": [
79
+ "google_calendar": [
80
80
  {
81
81
  "id": "create_event",
82
82
  "name": "create_event",
@@ -112,7 +112,7 @@ class MockToolRegistry(ToolRegistry):
112
112
  ],
113
113
  }
114
114
  self._tool_mappings = {
115
- "google-mail": {
115
+ "google_mail": {
116
116
  "email": ["send_email", "read_email", "create_draft"],
117
117
  "send": ["send_email"],
118
118
  },
@@ -120,7 +120,7 @@ class MockToolRegistry(ToolRegistry):
120
120
  "message": ["send_message", "read_channel"],
121
121
  "team": ["send_message"],
122
122
  },
123
- "google-calendar": {
123
+ "google_calendar": {
124
124
  "meeting": ["create_event", "find_event"],
125
125
  "schedule": ["create_event"],
126
126
  },
@@ -146,14 +146,14 @@ class MockToolRegistry(ToolRegistry):
146
146
  query: str,
147
147
  limit: int = 10,
148
148
  ) -> list[dict[str, Any]]:
149
- """Search for apps by a query."""
150
- query = query.lower()
151
- results = [
152
- app
153
- for app in self._apps
154
- if query in app["name"].lower() or query in app["description"].lower()
155
- ]
156
- return results[:limit]
149
+ """
150
+ Search for apps by a query.
151
+ MODIFIED: This mock implementation now returns ALL available apps to ensure
152
+ the graph always has candidates to work with. This makes the test more
153
+ robust by focusing on the agent's selection logic rather than a brittle
154
+ mock search.
155
+ """
156
+ return self._apps[:limit]
157
157
 
158
158
  async def list_tools(
159
159
  self,
@@ -168,30 +168,18 @@ class MockToolRegistry(ToolRegistry):
168
168
  limit: int = 10,
169
169
  app_id: str | None = None,
170
170
  ) -> list[dict[str, Any]]:
171
- """Search for tools by a query."""
171
+ """
172
+ Search for tools by a query.
173
+ MODIFIED: This mock implementation now returns all available tools for the given app_id
174
+ to ensure robust testing of the tool selection logic, avoiding failures from a
175
+ brittle keyword search.
176
+ """
172
177
  if not app_id:
173
178
  return []
174
179
 
175
- tools_for_app = self._tool_mappings.get(app_id, {})
176
- found_tool_names = set()
177
- for keyword, tools in tools_for_app.items():
178
- if keyword in query.lower():
179
- for tool in tools:
180
- found_tool_names.add(tool)
181
-
180
+ # Return all tools for the given app, letting the LLM choose.
182
181
  all_app_tools = self._tools.get(app_id, [])
183
-
184
- results = [tool for tool in all_app_tools if tool["name"] in found_tool_names]
185
-
186
- if not results:
187
- results = [
188
- {
189
- "name": "general_purpose_tool",
190
- "description": "A general purpose tool.",
191
- }
192
- ]
193
-
194
- return results[:limit]
182
+ return all_app_tools[:limit]
195
183
 
196
184
  async def export_tools(
197
185
  self,
@@ -231,82 +219,68 @@ class TestToolFinderGraph:
231
219
  def registry(self):
232
220
  return MockToolRegistry()
233
221
 
234
- @pytest.mark.asyncio
235
- async def test_simple_case_connected_app(self, llm, registry):
236
- """Test Case 1: Simple case (Connected App)"""
237
- task = "Send an email to my manager about the project update."
238
- graph = build_tool_node_graph(llm, registry)
239
- final_state = await graph.ainvoke(
240
- {"task": task, "messages": [HumanMessage(content=task)]}
241
- )
242
- assert final_state["apps_required"] is True
243
- assert "google-mail" in final_state["relevant_apps"]
244
- assert "google-mail" in final_state["apps_with_tools"]
245
- assert "send_email" in final_state["apps_with_tools"]["google-mail"]
222
+ def _get_tool_config_from_plan(self, plan: List[Dict[str, Any]]) -> Dict[str, List[str]]:
223
+ """
224
+ Helper function to convert a consolidated execution plan to a tool_config dict.
225
+ MODIFIED: This now correctly handles the already-consolidated plan from the graph.
226
+ """
227
+ if not plan:
228
+ return {}
229
+
230
+ config = {
231
+ step["app_id"]: step["tool_ids"]
232
+ for step in plan if step.get("app_id") and step.get("tool_ids")
233
+ }
234
+ return config
246
235
 
247
236
  @pytest.mark.asyncio
248
- async def test_multiple_apps_found(self, llm, registry):
249
- """Test Case 2: Multiple apps found"""
250
- task = "Send a message to my team about the new design."
237
+ async def test_simple_case(self, llm, registry):
238
+ """Test Case 1: Simple task requiring a single app and tool."""
239
+ task = "Send an email to my manager about the project update."
251
240
  graph = build_tool_node_graph(llm, registry)
252
241
  final_state = await graph.ainvoke(
253
- {"task": task, "messages": [HumanMessage(content=task)]}
242
+ {"original_task": task, "messages": [HumanMessage(content=task)], "decomposition_attempts": 0}
254
243
  )
255
- assert final_state["apps_required"] is True
256
- assert "google-mail" in final_state["relevant_apps"]
257
- assert "slack" in final_state["relevant_apps"]
258
- assert "google-mail" in final_state["apps_with_tools"]
259
- assert "slack" in final_state["apps_with_tools"]
244
+
245
+ plan = final_state.get("execution_plan")
246
+
247
+ tool_config = self._get_tool_config_from_plan(plan)
248
+
249
+ # FIX: Assert against the correct, hyphenated app ID.
250
+ assert "google_mail" in tool_config
251
+ assert "send_email" in tool_config["google_mail"]
260
252
 
261
253
  @pytest.mark.asyncio
262
- async def test_no_relevant_app(self, llm, registry):
263
- """Test Case 3: No relevant app"""
264
- task = "Can you create a blog post on my wordpress site?"
254
+ async def test_multi_step_task(self, llm, registry):
255
+ """Test Case 2: A task requiring multiple tools from different apps."""
256
+ task = "Create a new issue for a bug in our github repository, and send a message on slack about the issue."
265
257
  graph = build_tool_node_graph(llm, registry)
266
258
  final_state = await graph.ainvoke(
267
- {"task": task, "messages": [HumanMessage(content=task)]}
259
+ {"original_task": task, "messages": [HumanMessage(content=task)], "decomposition_attempts": 0}
268
260
  )
269
- assert final_state["apps_required"] is True
270
- assert not final_state["relevant_apps"]
271
- assert not final_state["apps_with_tools"]
272
261
 
273
- @pytest.mark.asyncio
274
- async def test_multiple_tools_in_one_app(self, llm, registry):
275
- """Test Case 4: Multiple tools in one app"""
276
- task = "Create a new issue for a bug in our github repository, and send message on slack about the issue."
277
- graph = build_tool_node_graph(llm, registry)
278
- final_state = await graph.ainvoke(
279
- {"task": task, "messages": [HumanMessage(content=task)]}
280
- )
281
- assert final_state["apps_required"] is True
282
- assert "github" in final_state["relevant_apps"]
283
- assert "slack" in final_state["relevant_apps"]
284
- assert "github" in final_state["apps_with_tools"]
285
- assert "slack" in final_state["apps_with_tools"]
286
- assert "create_issue" in final_state["apps_with_tools"]["github"]
287
- assert "send_message" in final_state["apps_with_tools"]["slack"]
262
+ plan = final_state.get("execution_plan")
263
+ assert plan, "Execution plan should not be empty"
264
+
265
+ tool_config = self._get_tool_config_from_plan(plan)
288
266
 
289
- @pytest.mark.asyncio
290
- async def test_unavailable_app(self, llm, registry):
291
- """Test Case 5: Unavailable App"""
292
- task = "Create a new design file in Figma."
293
- graph = build_tool_node_graph(llm, registry)
294
- final_state = await graph.ainvoke(
295
- {"task": task, "messages": [HumanMessage(content=task)]}
296
- )
297
- assert final_state["apps_required"] is True
298
- assert not final_state["relevant_apps"]
299
- assert not final_state["apps_with_tools"]
267
+ assert "github" in tool_config
268
+ assert "create_issue" in tool_config["github"]
269
+ assert "slack" in tool_config
270
+ assert "send_message" in tool_config["slack"]
300
271
 
301
272
  @pytest.mark.asyncio
302
- async def test_no_app_needed(self, llm, registry):
303
- """Test Case 6: No App Needed"""
304
- task = "hello"
273
+ async def test_no_relevant_app(self, llm, registry):
274
+ """Test Case 3: A task for which no tools or apps are available."""
275
+ task = "Can you create a blog post on my wordpress site?"
305
276
  graph = build_tool_node_graph(llm, registry)
306
277
  final_state = await graph.ainvoke(
307
- {"task": task, "messages": [HumanMessage(content=task)]}
278
+ {"original_task": task, "messages": [HumanMessage(content=task)], "decomposition_attempts": 0}
308
279
  )
309
- assert final_state["apps_required"] is False
280
+ plan = final_state.get("execution_plan")
281
+ assert not plan
282
+ last_message = final_state.get("messages", [])[-1].content
283
+ assert "unable to create a complete plan" in last_message.lower()
310
284
 
311
285
 
312
286
  @pytest.mark.parametrize(
@@ -359,9 +333,6 @@ class TestAgents:
359
333
  else str(last_message)
360
334
  )
361
335
 
362
- # Print the response for manual verification and for the LLM judge
363
-
364
- # Assert that the response is not None or empty, as per the new requirement
365
336
  assert final_response is not None, "The final response should not be None."
366
337
  assert final_response != "", "The final response should not be an empty string."
367
338
 
@@ -397,4 +368,4 @@ class TestAgentBuilder:
397
368
 
398
369
  assert "tool_config" in result
399
370
  tool_config = result["tool_config"]
400
- assert "google-mail" in tool_config
371
+ assert "google_mail" in tool_config
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from datetime import UTC, datetime
3
3
  from typing import Literal, cast
4
+ import asyncio
4
5
 
5
6
  from langchain_core.language_models import BaseChatModel
6
7
  from langchain_core.messages import AIMessage, ToolMessage
@@ -65,16 +66,58 @@ def build_graph(
65
66
 
66
67
  @tool
67
68
  async def load_tools(tool_ids: list[str]) -> list[str]:
68
- """Load the tools for the given tool ids. Returns the tool ids after loading them. Note that tool ids are the complete tool ids, with both the app name and the tool name separated by double underscore (__). e.g. google_mail__send_email"""
69
- return tool_ids
69
+ """
70
+ Load the tools for the given tool ids. Returns the valid tool ids after loading.
71
+ Tool ids are of form 'appid__toolid'. Example: 'google_mail__send_email'
72
+ """
73
+ correct, incorrect = [], []
74
+ app_tool_list: dict[str, list[str]] = {}
75
+
76
+ # Group tool_ids by app for fewer registry calls
77
+ app_to_tools: dict[str, list[str]] = {}
78
+ for tool_id in tool_ids:
79
+ if "__" not in tool_id:
80
+ incorrect.append(tool_id)
81
+ continue
82
+ app, tool = tool_id.split("__", 1)
83
+ app_to_tools.setdefault(app, []).append((tool_id, tool))
84
+
85
+ # Fetch all apps concurrently
86
+ async def fetch_tools(app: str):
87
+ try:
88
+ tools_dict = await tool_registry.list_tools(app)
89
+ return app, {tool_unit["name"] for tool_unit in tools_dict}
90
+ except Exception as e:
91
+ return app, None
92
+
93
+ results = await asyncio.gather(*(fetch_tools(app) for app in app_to_tools))
94
+
95
+ # Build map of available tools per app
96
+ for app, tools in results:
97
+ if tools is not None:
98
+ app_tool_list[app] = tools
99
+
100
+ # Validate tool_ids
101
+ for app, tool_entries in app_to_tools.items():
102
+ available = app_tool_list.get(app)
103
+ if available is None:
104
+ incorrect.extend(tool_id for tool_id, _ in tool_entries)
105
+ continue
106
+ for tool_id, tool in tool_entries:
107
+ if tool in available:
108
+ correct.append(tool_id)
109
+ else:
110
+ incorrect.append(tool_id)
111
+
112
+ return correct
70
113
 
71
114
  @tool
72
115
  async def web_search(query: str) -> str:
73
- """Search the web for the given query. Returns the search results."""
116
+ """Search the web for the given query. Returns the search results. Do not use for app-specific searches (for example, reddit or linkedin searches should be done using the app's tools)"""
74
117
  tool = await tool_registry.export_tools(
75
- ["exa__search"], ToolFormat.LANGCHAIN
118
+ ["exa__search_with_filters"], ToolFormat.LANGCHAIN
76
119
  )
77
- response = await tool_registry.call_tool("exa__search", {"query": query, "contents": {"summary": True}})
120
+ response = await tool_registry.call_tool("exa__search_with_filters", {"query": query, "contents": {"summary": True}})
78
121
  return response
79
122
 
80
123
 
@@ -131,10 +174,10 @@ def build_graph(
131
174
  return Command(goto="select_tools", update={"messages": [response]})
132
175
  elif tool_call["name"] == load_tools.name:
133
176
  logger.info("Model requested to load tools.")
177
+ selected_tool_ids = await load_tools.ainvoke(tool_call["args"])
134
178
  tool_msg = ToolMessage(
135
- "Loaded tools.", tool_call_id=tool_call["id"]
179
+ f"Loaded tools- {selected_tool_ids}", tool_call_id=tool_call["id"]
136
180
  )
137
- selected_tool_ids = tool_call["args"]["tool_ids"]
138
181
  logger.info(f"Loaded tools: {selected_tool_ids}")
139
182
  return Command(
140
183
  goto="call_model",
@@ -194,7 +237,7 @@ def build_graph(
194
237
  tool_call = state["messages"][-1].tool_calls[0]
195
238
  searched_tools = await search_tools.ainvoke(input=tool_call["args"])
196
239
  tool_msg = ToolMessage(
197
- f"Available tools: {searched_tools}", tool_call_id=tool_call["id"]
240
+ f"Available tool_ids: {searched_tools}. Call load_tools to select the required tools only.", tool_call_id=tool_call["id"]
198
241
  )
199
242
  return Command(goto="call_model", update={"messages": [tool_msg]})
200
243
  except Exception as e:
@@ -15,7 +15,7 @@ from universal_mcp.agents.base import BaseAgent
15
15
  from universal_mcp.agents.llm import load_chat_model
16
16
  from universal_mcp.agents.shared.tool_node import build_tool_node_graph
17
17
  from universal_mcp.agents.utils import messages_to_list
18
-
18
+ from collections import defaultdict
19
19
 
20
20
  class Agent(BaseModel):
21
21
  """Agent that can be created by the builder."""
@@ -146,16 +146,37 @@ class BuilderAgent(BaseAgent):
146
146
  ]
147
147
  }
148
148
  tool_finder_graph = build_tool_node_graph(self.llm, self.registry)
149
- tool_config = await tool_finder_graph.ainvoke(
150
- {"task": task, "messages": [HumanMessage(content=task)]}
151
- )
152
- tool_config = tool_config.get("apps_with_tools", {})
149
+
150
+ initial_state = {
151
+ "original_task": task,
152
+ "messages": [HumanMessage(content=task)],
153
+ "decomposition_attempts": 0,
154
+ }
155
+ final_state = await tool_finder_graph.ainvoke(initial_state)
156
+ execution_plan = final_state.get("execution_plan")
157
+ tool_config = {}
158
+ if execution_plan:
159
+ # Use defaultdict to easily group tools by app_id
160
+ apps_with_tools = defaultdict(list)
161
+ for step in execution_plan:
162
+ app_id = step.get("app_id")
163
+ tool_ids = step.get("tool_ids")
164
+ if app_id and tool_ids:
165
+ apps_with_tools[app_id].extend(tool_ids)
166
+
167
+ # Convert to a regular dict and remove any duplicate tool_ids for the same app
168
+ tool_config = {
169
+ app_id: list(set(tools)) for app_id, tools in apps_with_tools.items()
170
+ }
171
+ final_message = "I have selected the necessary tools for the agent. The agent is ready!"
172
+ else:
173
+ # Handle the case where the graph failed to create a plan
174
+ final_message = "I was unable to find the right tools for this task. Please try rephrasing your request."
175
+
153
176
  yield {
154
177
  "tool_config": tool_config,
155
178
  "messages": [
156
- AIMessage(
157
- content="I have selected the necessary tools for the agent. The agent is ready!"
158
- )
179
+ AIMessage(content=final_message)
159
180
  ],
160
181
  }
161
182
 
@@ -8,7 +8,7 @@ from langchain_openai import AzureChatOpenAI
8
8
 
9
9
  @lru_cache(maxsize=8)
10
10
  def load_chat_model(
11
- fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool = False
11
+ fully_specified_name: str, temperature: float = 1.0, tags: list[str] | None = None, thinking: bool = True
12
12
  ) -> BaseChatModel:
13
13
  """Load a chat model from a fully specified name.
14
14
  Args:
@@ -19,7 +19,7 @@ def build_graph(llm, registry, instructions, model, executor_agent_cls):
19
19
  logger.info(f"Running tool finder for task: {task}")
20
20
  tool_finder_graph = build_tool_node_graph(llm, registry)
21
21
  tool_finder_state = await tool_finder_graph.ainvoke(
22
- {"task": task, "messages": state["messages"]}
22
+ {"original_task": task, "messages": state["messages"]}
23
23
  )
24
24
 
25
25
  if not tool_finder_state.get("apps_required"):