universal-mcp-agents 0.1.13__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of universal-mcp-agents might be problematic. Click here for more details.
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/PKG-INFO +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/pyproject.toml +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/datasets/tasks.jsonl +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/evaluators.py +4 -54
- universal_mcp_agents-0.1.14/src/evals/prompts.py +66 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/run.py +3 -2
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/tests/test_agents.py +2 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/base.py +2 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/__init__.py +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/agent.py +2 -2
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/bigtool/graph.py +149 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/prompts.py +2 -2
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/tools.py +18 -4
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/builder/__main__.py +200 -0
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/builder/builder.py +214 -0
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/builder/helper.py +73 -0
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/builder/prompts.py +54 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/builder/state.py +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/agent.py +5 -4
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/codeact0/langgraph_agent.py +17 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/llm_tool.py +1 -1
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/prompts.py +34 -23
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/11-github.yaml +6 -5
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/utils.py +42 -63
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/shared/__main__.py +43 -0
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/shared/prompts.py +83 -0
- universal_mcp_agents-0.1.14/src/universal_mcp/agents/shared/tool_node.py +206 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/utils.py +65 -0
- universal_mcp_agents-0.1.14/test.py +61 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/uv.lock +377 -307
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool/graph.py +0 -115
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/__main__.py +0 -125
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/builder.py +0 -225
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/prompts.py +0 -173
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/shared/prompts.py +0 -132
- universal_mcp_agents-0.1.13/src/universal_mcp/agents/shared/tool_node.py +0 -227
- universal_mcp_agents-0.1.13/test.py +0 -49
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/.gitignore +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/.pre-commit-config.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/GEMINI.md +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/PROMPTS.md +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/README.md +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/bump_and_release.sh +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/dataset.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/datasets/codeact.jsonl +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/datasets/exact.jsonl +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/evals/utils.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/context.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/bigtool/state.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/cli.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/__main__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/agent.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/models.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/prompts.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/sandbox.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/state.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact/utils.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/__main__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/config.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/sandbox.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/state.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/1-unsubscribe.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/10-reddit2.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/2-reddit.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/2.1-instructions.md +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/2.2-instructions.md +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/3-earnings.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/4-maps.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/5-gmailreply.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/6-contract.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/7-overnight.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/8-sheets_chart.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/codeact0/usecases/9-learning.yaml +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/hil.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/llm.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/planner/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/planner/__main__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/planner/graph.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/planner/prompts.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/planner/state.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/react.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/agents/simple.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/applications/llm/__init__.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/applications/llm/app.py +0 -0
- {universal_mcp_agents-0.1.13 → universal_mcp_agents-0.1.14}/src/universal_mcp/applications/ui/app.py +2 -2
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
{"user_input": "search reddit for posts on elon musk and then post a meme on him on linkedin", "difficulty": 3, "required_tools": {"reddit" : ["search_reddit"], "linkedin": ["create_post"]}}
|
|
11
11
|
{"user_input": "Search for best cafes near IIT bombay using exa and make a google sheet out of it", "difficulty": 3, "required_tools": {"exa": ["search_with_filters"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet", "add_table"]}}
|
|
12
12
|
{"user_input": "Create a Google Doc summarizing the last 5 merged pull requests in my GitHub repo- universal-mcp/universal-mcp, including links and commit highlights.", "difficulty": 4, "required_tools": {"github": ["list_pull_requests", "list_recent_commits"], "google_docs": ["create_document", "insert_text", "apply_text_style"]}}
|
|
13
|
-
{"user_input": "Summarize the key insights from all marketing emails received yesterday from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["list_messages"], "google_docs": ["create_document"]}}
|
|
13
|
+
{"user_input": "Summarize the key insights from all marketing emails received yesterday from my Gmail and add a section in a Google Doc with action points.", "difficulty": 4, "required_tools": {"google_mail": ["list_messages"], "google_docs": ["create_document", "insert_text", "apply_text_style"]}}
|
|
14
14
|
{"user_input": "Give me a report on the earnings of Oklo using web search, and projections for the company revenue, stock price", "difficulty": 4, "required_tools": {"tavily": ["search_and_summarize"]}}
|
|
15
15
|
{"user_input": "Track the top posts in r/startups over the past 7 days using Reddit and create a trend report on what's being discussed most (e.g., hiring, funding, MVPs) in a Google Doc.", "difficulty": 4, "required_tools": {"reddit": ["get_subreddit_posts", "get_subreddit_top_posts"], "google_docs": ["create_document", "insert_text", "apply_text_style"]}}
|
|
16
16
|
{"user_input": "Generate a comparison table of SaaS tools for project management using web search, including pricing, features, and user ratings in a Google Sheet", "difficulty": 4, "required_tools": {"tavily": ["search_and_summarize"], "google_sheet": ["create_spreadsheet", "add_table"]}}
|
|
@@ -7,6 +7,8 @@ from langsmith.evaluation import EvaluationResult, run_evaluator
|
|
|
7
7
|
from langsmith.schemas import Example, Run
|
|
8
8
|
from openevals.llm import create_llm_as_judge
|
|
9
9
|
|
|
10
|
+
from evals.prompts import CODEACT_EVALUATOR_PROMPT, CORRECTNESS_PROMPT
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
@run_evaluator
|
|
12
14
|
def exact_match_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
|
|
@@ -38,58 +40,6 @@ def exact_match_evaluator(run: Run, example: Example | None = None) -> Evaluatio
|
|
|
38
40
|
return EvaluationResult(key="exact_match", score=score, comment=comment)
|
|
39
41
|
|
|
40
42
|
|
|
41
|
-
CORRECTNESS_PROMPT = """You are an expert data labeler evaluating model outputs for correctness. Your task is to assign a score based on the following rubric:
|
|
42
|
-
|
|
43
|
-
<Rubric>
|
|
44
|
-
A correct answer:
|
|
45
|
-
- Provides accurate and complete information
|
|
46
|
-
- Contains no factual errors
|
|
47
|
-
- Addresses all parts of the question
|
|
48
|
-
- Is logically consistent
|
|
49
|
-
- Uses precise and accurate terminology
|
|
50
|
-
|
|
51
|
-
When scoring, you should penalize:
|
|
52
|
-
- Factual errors or inaccuracies
|
|
53
|
-
- Incomplete or partial answers
|
|
54
|
-
- Misleading or ambiguous statements
|
|
55
|
-
- Incorrect terminology
|
|
56
|
-
- Logical inconsistencies
|
|
57
|
-
- Missing key information
|
|
58
|
-
|
|
59
|
-
Ignore the following:
|
|
60
|
-
- If the answer is not in the same language as the question.
|
|
61
|
-
- use the specifically requested tool, as the tool name can be different
|
|
62
|
-
- Do not penalize for incorrect third party data coming from the tool.
|
|
63
|
-
</Rubric>
|
|
64
|
-
|
|
65
|
-
<Instructions>
|
|
66
|
-
- Carefully read the input and output
|
|
67
|
-
- Check for factual accuracy and completeness
|
|
68
|
-
- Focus on correctness of information rather than style or verbosity
|
|
69
|
-
- If the user tool is not authorized, give a partial credit of `0.5`
|
|
70
|
-
- Give partial credit if tools and called correctly, but the data is incorrect from tools.
|
|
71
|
-
</Instructions>
|
|
72
|
-
|
|
73
|
-
<Reminder>
|
|
74
|
-
The goal is to evaluate factual correctness and completeness of the response.
|
|
75
|
-
</Reminder>
|
|
76
|
-
|
|
77
|
-
<input>
|
|
78
|
-
{inputs}
|
|
79
|
-
</input>
|
|
80
|
-
|
|
81
|
-
<output>
|
|
82
|
-
{outputs}
|
|
83
|
-
</output>
|
|
84
|
-
|
|
85
|
-
Use the reference outputs below to help you evaluate the correctness of the response:
|
|
86
|
-
|
|
87
|
-
<reference_outputs>
|
|
88
|
-
{reference_outputs}
|
|
89
|
-
</reference_outputs>
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
|
|
93
43
|
correctness_evaluator = create_llm_as_judge(
|
|
94
44
|
prompt=CORRECTNESS_PROMPT,
|
|
95
45
|
feedback_key="correctness",
|
|
@@ -103,8 +53,8 @@ trajectory_evaluator = create_trajectory_llm_as_judge(
|
|
|
103
53
|
)
|
|
104
54
|
|
|
105
55
|
|
|
106
|
-
codeact_evaluator =
|
|
107
|
-
prompt=
|
|
56
|
+
codeact_evaluator = create_llm_as_judge(
|
|
57
|
+
prompt=CODEACT_EVALUATOR_PROMPT,
|
|
108
58
|
feedback_key="codeact_accuracy",
|
|
109
59
|
model="anthropic:claude-4-sonnet-20250514",
|
|
110
60
|
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
CORRECTNESS_PROMPT = """You are an expert data labeler evaluating model outputs for correctness. Your task is to assign a score based on the following rubric:
|
|
2
|
+
|
|
3
|
+
<Rubric>
|
|
4
|
+
A correct answer:
|
|
5
|
+
- Provides accurate and complete information
|
|
6
|
+
- Contains no factual errors
|
|
7
|
+
- Addresses all parts of the question
|
|
8
|
+
- Is logically consistent
|
|
9
|
+
- Uses precise and accurate terminology
|
|
10
|
+
|
|
11
|
+
When scoring, you should penalize:
|
|
12
|
+
- Factual errors or inaccuracies
|
|
13
|
+
- Incomplete or partial answers
|
|
14
|
+
- Misleading or ambiguous statements
|
|
15
|
+
- Incorrect terminology
|
|
16
|
+
- Logical inconsistencies
|
|
17
|
+
- Missing key information
|
|
18
|
+
|
|
19
|
+
Ignore the following:
|
|
20
|
+
- If the answer is not in the same language as the question.
|
|
21
|
+
- use the specifically requested tool, as the tool name can be different
|
|
22
|
+
- Do not penalize for incorrect third party data coming from the tool.
|
|
23
|
+
</Rubric>
|
|
24
|
+
|
|
25
|
+
<Instructions>
|
|
26
|
+
- Carefully read the input and output
|
|
27
|
+
- Check for factual accuracy and completeness
|
|
28
|
+
- Focus on correctness of information rather than style or verbosity
|
|
29
|
+
- If the user tool is not authorized, give a partial credit of `0.5`
|
|
30
|
+
- Give partial credit if tools and called correctly, but the data is incorrect from tools.
|
|
31
|
+
</Instructions>
|
|
32
|
+
|
|
33
|
+
<Reminder>
|
|
34
|
+
The goal is to evaluate factual correctness and completeness of the response.
|
|
35
|
+
</Reminder>
|
|
36
|
+
|
|
37
|
+
<input>
|
|
38
|
+
{inputs}
|
|
39
|
+
</input>
|
|
40
|
+
|
|
41
|
+
<output>
|
|
42
|
+
{outputs}
|
|
43
|
+
</output>
|
|
44
|
+
|
|
45
|
+
Use the reference outputs below to help you evaluate the correctness of the response:
|
|
46
|
+
|
|
47
|
+
<reference_outputs>
|
|
48
|
+
{reference_outputs}
|
|
49
|
+
</reference_outputs>
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
CODEACT_EVALUATOR_PROMPT = """
|
|
53
|
+
You are a code execution evaluator. You will be given the entire run of an agent, starting with a human input task, the intermediate steps taken, and the final output of the agent given to the user. These steps will contain code written by the agent to solve the problem as well as its outputs. Your job is to check ONLY if the code executes correctly.
|
|
54
|
+
Keep in mind that the agent has access to tools like- ai_classify, call_llm, creative_writer, data_extractor. These calls are to be treated as valid if they run without errors.
|
|
55
|
+
These are the only criteria you should evaluate-
|
|
56
|
+
|
|
57
|
+
<Rubric>
|
|
58
|
+
- The code written by the agent in tool calls should be syntactically correct and use existing objects.
|
|
59
|
+
- The code outputs should not have an error or empty/unexpected outputs
|
|
60
|
+
</Rubric>
|
|
61
|
+
If either of the above are not satisfied, you should give 0.
|
|
62
|
+
|
|
63
|
+
<Reminder>
|
|
64
|
+
You must not judge whether the code is helpful to the task or not, only if the code itself is correct or not.
|
|
65
|
+
</Reminder>
|
|
66
|
+
"""
|
|
@@ -15,6 +15,7 @@ from evals.evaluators import (
|
|
|
15
15
|
exact_match_evaluator,
|
|
16
16
|
tool_node_evaluator,
|
|
17
17
|
trajectory_evaluator,
|
|
18
|
+
codeact_evaluator,
|
|
18
19
|
)
|
|
19
20
|
from universal_mcp.agents import get_agent
|
|
20
21
|
from universal_mcp.agents.utils import messages_to_list
|
|
@@ -62,12 +63,12 @@ async def agent_runner(agent_name: str, inputs: dict) -> dict:
|
|
|
62
63
|
registry = AgentrRegistry(client=client) if agent_name != "simple" else None
|
|
63
64
|
common_params = {
|
|
64
65
|
"instructions": f"You are a helpful assistant. Keep your responses short and concise. Do not provide with any explanation. The current date and time is {current_date_time}",
|
|
65
|
-
"model": "
|
|
66
|
+
"model": "azure/gpt-4.1",
|
|
66
67
|
"registry": registry,
|
|
67
68
|
"tools": inputs.get("tools", {}),
|
|
68
69
|
}
|
|
69
70
|
agent = get_agent(agent_name)(name=agent_name, **common_params)
|
|
70
|
-
result = await agent.invoke(user_input=inputs["user_input"])
|
|
71
|
+
result = await agent.invoke(user_input=inputs["user_input"], thread_id="evals")
|
|
71
72
|
messages = messages_to_list(result["messages"])
|
|
72
73
|
return_result = {"output": messages}
|
|
73
74
|
if "tool_config" in result:
|
|
@@ -145,6 +145,7 @@ class MockToolRegistry(ToolRegistry):
|
|
|
145
145
|
self,
|
|
146
146
|
query: str,
|
|
147
147
|
limit: int = 10,
|
|
148
|
+
**kwargs: Any,
|
|
148
149
|
) -> list[dict[str, Any]]:
|
|
149
150
|
"""
|
|
150
151
|
Search for apps by a query.
|
|
@@ -167,6 +168,7 @@ class MockToolRegistry(ToolRegistry):
|
|
|
167
168
|
query: str,
|
|
168
169
|
limit: int = 10,
|
|
169
170
|
app_id: str | None = None,
|
|
171
|
+
**kwargs: Any,
|
|
170
172
|
) -> list[dict[str, Any]]:
|
|
171
173
|
"""
|
|
172
174
|
Search for tools by a query.
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from universal_mcp.agentr.registry import AgentrRegistry
|
|
2
|
-
from universal_mcp.agents.
|
|
2
|
+
from universal_mcp.agents.bigtool import BigToolAgent
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
async def agent():
|
|
6
|
-
agent_object = await
|
|
6
|
+
agent_object = await BigToolAgent(
|
|
7
7
|
registry=AgentrRegistry(),
|
|
8
8
|
)._build_graph()
|
|
9
9
|
return agent_object
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Literal, cast
|
|
3
|
+
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from langchain_anthropic import ChatAnthropic
|
|
6
|
+
from langchain_core.language_models import BaseChatModel
|
|
7
|
+
from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
|
|
8
|
+
from langchain_core.tools import BaseTool
|
|
9
|
+
from langgraph.graph import StateGraph
|
|
10
|
+
from langgraph.types import Command, RetryPolicy
|
|
11
|
+
from universal_mcp.tools.registry import ToolRegistry
|
|
12
|
+
from universal_mcp.types import ToolFormat
|
|
13
|
+
|
|
14
|
+
from .state import State
|
|
15
|
+
from .tools import get_valid_tools
|
|
16
|
+
from universal_mcp.agents.utils import filter_retry_on
|
|
17
|
+
|
|
18
|
+
load_dotenv()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_graph(
|
|
22
|
+
registry: ToolRegistry,
|
|
23
|
+
base_model: BaseChatModel,
|
|
24
|
+
system_prompt: str,
|
|
25
|
+
default_tools: list[BaseTool],
|
|
26
|
+
meta_tools: dict[str, BaseTool],
|
|
27
|
+
):
|
|
28
|
+
"""Build the LangGraph workflow"""
|
|
29
|
+
|
|
30
|
+
async def agent_node(state: State) -> Command[Literal["execute_tools"]]:
|
|
31
|
+
"""Main agent reasoning node"""
|
|
32
|
+
|
|
33
|
+
# Combine meta tools with currently loaded tools
|
|
34
|
+
if len(state["selected_tool_ids"]) > 0:
|
|
35
|
+
try:
|
|
36
|
+
current_tools = await registry.export_tools(
|
|
37
|
+
tools=state["selected_tool_ids"], format=ToolFormat.LANGCHAIN
|
|
38
|
+
)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
raise Exception(f"Failed to export selected tools: {e}")
|
|
41
|
+
else:
|
|
42
|
+
current_tools = []
|
|
43
|
+
all_tools = (
|
|
44
|
+
[meta_tools["search_tools"], meta_tools["load_tools"], meta_tools.get("web_search")]
|
|
45
|
+
+ default_tools
|
|
46
|
+
+ current_tools
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Remove duplicates based on tool name
|
|
50
|
+
seen_names = set()
|
|
51
|
+
unique_tools = []
|
|
52
|
+
for tool in all_tools:
|
|
53
|
+
if tool.name not in seen_names:
|
|
54
|
+
seen_names.add(tool.name)
|
|
55
|
+
unique_tools.append(tool)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
if isinstance(base_model, ChatAnthropic):
|
|
59
|
+
model_with_tools = base_model.bind_tools(
|
|
60
|
+
unique_tools,
|
|
61
|
+
tool_choice="auto",
|
|
62
|
+
parallel_tool_calls=False,
|
|
63
|
+
cache_control={"type": "ephemeral", "ttl": "1h"},
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
model_with_tools = base_model.bind_tools(
|
|
67
|
+
unique_tools,
|
|
68
|
+
tool_choice="auto",
|
|
69
|
+
parallel_tool_calls=False,
|
|
70
|
+
)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
raise Exception(f"Failed to bind tools to model: {e}")
|
|
73
|
+
|
|
74
|
+
# Get response from model
|
|
75
|
+
messages = [SystemMessage(content=system_prompt), *state["messages"]]
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
response = cast(AIMessage, await model_with_tools.ainvoke(messages))
|
|
79
|
+
except Exception as e:
|
|
80
|
+
raise Exception(f"Model invocation failed: {e}")
|
|
81
|
+
|
|
82
|
+
if response.tool_calls:
|
|
83
|
+
return Command(goto="execute_tools", update={"messages": [response]})
|
|
84
|
+
else:
|
|
85
|
+
return Command(update={"messages": [response], "model_with_tools": model_with_tools})
|
|
86
|
+
|
|
87
|
+
async def execute_tools_node(state: State) -> Command[Literal["agent"]]:
|
|
88
|
+
"""Execute tool calls"""
|
|
89
|
+
last_message = state["messages"][-1]
|
|
90
|
+
tool_calls = last_message.tool_calls if isinstance(last_message, AIMessage) else []
|
|
91
|
+
|
|
92
|
+
tool_messages = []
|
|
93
|
+
new_tool_ids = []
|
|
94
|
+
ask_user = False
|
|
95
|
+
|
|
96
|
+
for tool_call in tool_calls:
|
|
97
|
+
try:
|
|
98
|
+
if tool_call["name"] == "load_tools": # Handle load_tools separately
|
|
99
|
+
valid_tools, unconnected_links = await get_valid_tools(tool_ids=tool_call["args"]["tool_ids"], registry=registry)
|
|
100
|
+
new_tool_ids.extend(valid_tools)
|
|
101
|
+
# Create tool message response
|
|
102
|
+
tool_result = f"Successfully loaded {len(valid_tools)} tools: {valid_tools}"
|
|
103
|
+
if unconnected_links:
|
|
104
|
+
ask_user = True
|
|
105
|
+
ai_msg = f"Please login to the following app(s) using the following links and let me know in order to proceed:\n {'\n'.join(unconnected_links)} "
|
|
106
|
+
|
|
107
|
+
elif tool_call["name"] == "search_tools":
|
|
108
|
+
tool_result = await meta_tools["search_tools"].ainvoke(tool_call["args"])
|
|
109
|
+
elif tool_call["name"] == "web_search":
|
|
110
|
+
tool_result = await meta_tools["web_search"].ainvoke(tool_call["args"])
|
|
111
|
+
else:
|
|
112
|
+
# Load tools first
|
|
113
|
+
await registry.export_tools([tool_call["name"]], ToolFormat.LANGCHAIN)
|
|
114
|
+
tool_result = await registry.call_tool(tool_call["name"], tool_call["args"])
|
|
115
|
+
except Exception as e:
|
|
116
|
+
tool_result = f"Error during {tool_call}: {e}"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
tool_message = ToolMessage(
|
|
120
|
+
content=json.dumps(tool_result),
|
|
121
|
+
name=tool_call["name"],
|
|
122
|
+
tool_call_id=tool_call["id"],
|
|
123
|
+
)
|
|
124
|
+
tool_messages.append(tool_message)
|
|
125
|
+
if ask_user:
|
|
126
|
+
tool_messages.append(AIMessage(content=ai_msg))
|
|
127
|
+
return Command(update={"messages": tool_messages, "selected_tool_ids": new_tool_ids})
|
|
128
|
+
|
|
129
|
+
return Command(goto="agent", update={"messages": tool_messages, "selected_tool_ids": new_tool_ids})
|
|
130
|
+
|
|
131
|
+
# Define the graph
|
|
132
|
+
workflow = StateGraph(State)
|
|
133
|
+
|
|
134
|
+
# Add nodes
|
|
135
|
+
workflow.add_node(
|
|
136
|
+
"agent",
|
|
137
|
+
agent_node,
|
|
138
|
+
retry_policy=RetryPolicy(max_attempts=3, retry_on=filter_retry_on, initial_interval=2, backoff_factor=2),
|
|
139
|
+
)
|
|
140
|
+
workflow.add_node(
|
|
141
|
+
"execute_tools",
|
|
142
|
+
execute_tools_node,
|
|
143
|
+
retry_policy=RetryPolicy(max_attempts=3, retry_on=filter_retry_on, initial_interval=2, backoff_factor=2),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Set entry point
|
|
147
|
+
workflow.set_entry_point("agent")
|
|
148
|
+
|
|
149
|
+
return workflow
|
|
@@ -5,9 +5,9 @@ SYSTEM_PROMPT = """You are a helpful AI assistant, called {name}.
|
|
|
5
5
|
**Core Directives:**
|
|
6
6
|
1. **Always Use Tools for Tasks:** For any user request that requires an action (e.g., sending an email, searching for information, creating an event, displaying a chart), you MUST use a tool. Do not refuse a task if a tool might exist for it.
|
|
7
7
|
|
|
8
|
-
2. Check if your existing tools or knowledge can handle the user's request. If they can, use them. If they cannot, you must call the `search_tools` function to find the right tools for the user's request.You must not use the same/similar query multiple times in the list. The list should have multiple queries only if the task has clearly different sub-tasks. If you do not find any specific relevant tools, use the pre-loaded generic tools.
|
|
8
|
+
2. Check if your existing tools or knowledge can handle the user's request. If they can, use them. If they cannot, you must call the `search_tools` function to find the right tools for the user's request. You must not use the same/similar query multiple times in the list. The list should have multiple queries only if the task has clearly different sub-tasks. If you do not find any specific relevant tools, use the pre-loaded generic tools. Only use `search_tools` if your existing capabilities cannot handle the request.
|
|
9
9
|
|
|
10
|
-
3. **Load Tools:** After looking at the output of `search_tools`, you MUST call the `load_tools` function to load only the tools you want to use. Provide the full tool ids, not just the app names. Use your judgement to eliminate irrelevant apps that came up just because of semantic similarity. However, sometimes, multiple apps might be relevant for the same task. Prefer connected apps over unconnected apps while breaking a tie. If more than one relevant app (or none of the relevant apps) are connected, you must ask the user to choose the app. In case the user asks you to use an app that is not connected, call the apps tools normally. The tool will return a link for connecting that you should pass on to the user.
|
|
10
|
+
3. **Load Tools:** After looking at the output of `search_tools`, you MUST call the `load_tools` function to load only the tools you want to use. Provide the full tool ids, not just the app names. Use your judgement to eliminate irrelevant apps that came up just because of semantic similarity. However, sometimes, multiple apps might be relevant for the same task. Prefer connected apps over unconnected apps while breaking a tie. If more than one relevant app (or none of the relevant apps) are connected, you must ask the user to choose the app. In case the user asks you to use an app that is not connected, call the apps tools normally. The tool will return a link for connecting that you should pass on to the user. Only load tools if your existing capabilities cannot handle the request.
|
|
11
11
|
|
|
12
12
|
4. **Strictly Follow the Process:** Your only job in your first turn is to analyze the user's request and answer using existing tools/knowledge or `search_tools` with a concise query describing the core task. Do not engage in conversation, or extend the conversation beyond the user's request.
|
|
13
13
|
|
|
@@ -35,7 +35,8 @@ def create_meta_tools(tool_registry: ToolRegistry) -> dict[str, Any]:
|
|
|
35
35
|
for tool in tools_list:
|
|
36
36
|
app = tool["id"].split("__")[0]
|
|
37
37
|
if len(app_tools[app]) < 5:
|
|
38
|
-
|
|
38
|
+
cleaned_desc = tool['description'].split("Context:")[0].strip()
|
|
39
|
+
app_tools[app].append(f"{tool['id']}: {cleaned_desc}")
|
|
39
40
|
|
|
40
41
|
# Build result string efficiently
|
|
41
42
|
result_parts = []
|
|
@@ -98,8 +99,13 @@ def create_meta_tools(tool_registry: ToolRegistry) -> dict[str, Any]:
|
|
|
98
99
|
return {"search_tools": search_tools, "load_tools": load_tools, "web_search": web_search}
|
|
99
100
|
|
|
100
101
|
|
|
101
|
-
async def get_valid_tools(tool_ids: list[str], registry: ToolRegistry) -> list[str]:
|
|
102
|
+
async def get_valid_tools(tool_ids: list[str], registry: ToolRegistry) -> tuple[list[str], list[str]]:
|
|
103
|
+
"""For a given list of tool_ids, validates the tools and returns a list of links for the apps that have not been logged in"""
|
|
102
104
|
correct, incorrect = [], []
|
|
105
|
+
connections = await registry.list_connected_apps()
|
|
106
|
+
connected_apps = {connection["app_id"] for connection in connections}
|
|
107
|
+
unconnected = set()
|
|
108
|
+
unconnected_links = []
|
|
103
109
|
app_tool_list: dict[str, set[str]] = {}
|
|
104
110
|
|
|
105
111
|
# Group tool_ids by app for fewer registry calls
|
|
@@ -127,15 +133,23 @@ async def get_valid_tools(tool_ids: list[str], registry: ToolRegistry) -> list[s
|
|
|
127
133
|
app_tool_list[app] = tools
|
|
128
134
|
|
|
129
135
|
# Validate tool_ids
|
|
130
|
-
for app, tool_entries in app_to_tools.items():
|
|
136
|
+
for app, tool_entries in app_to_tools.items():
|
|
131
137
|
available = app_tool_list.get(app)
|
|
132
138
|
if available is None:
|
|
133
139
|
incorrect.extend(tool_id for tool_id, _ in tool_entries)
|
|
134
140
|
continue
|
|
141
|
+
if app not in connected_apps and app not in unconnected:
|
|
142
|
+
unconnected.add(app)
|
|
143
|
+
text = registry.client.get_authorization_url(app)
|
|
144
|
+
start = text.find(":") + 1
|
|
145
|
+
end = text.find(".", start)
|
|
146
|
+
url = text[start:end].strip()
|
|
147
|
+
markdown_link = f"[{app}]({url})"
|
|
148
|
+
unconnected_links.append(markdown_link)
|
|
135
149
|
for tool_id, tool_name in tool_entries:
|
|
136
150
|
if tool_name in available:
|
|
137
151
|
correct.append(tool_id)
|
|
138
152
|
else:
|
|
139
153
|
incorrect.append(tool_id)
|
|
140
154
|
|
|
141
|
-
return correct
|
|
155
|
+
return correct, unconnected_links
|