universal-mcp-agents 0.1.12__tar.gz → 0.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/.gitignore +1 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/PKG-INFO +1 -1
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/pyproject.toml +1 -1
- universal_mcp_agents-0.1.13/src/evals/datasets/codeact.jsonl +11 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/evaluators.py +7 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/run.py +2 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/agent.py +1 -1
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/sandbox.py +1 -5
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/shared/tool_node.py +1 -34
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/uv.lock +1 -1
- universal_mcp_agents-0.1.12/dataset_code.py +0 -83
- universal_mcp_agents-0.1.12/src/evals/datasets/test.jsonl +0 -1
- universal_mcp_agents-0.1.12/src/universal_mcp/agents/codeact0/langgraph_graph.py +0 -17
- universal_mcp_agents-0.1.12/src/universal_mcp/agents/codeact0/legacy_codeact.py +0 -104
- universal_mcp_agents-0.1.12/test_code.py +0 -78
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/.pre-commit-config.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/GEMINI.md +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/PROMPTS.md +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/README.md +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/bump_and_release.sh +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/dataset.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/datasets/exact.jsonl +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/datasets/tasks.jsonl +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/evals/utils.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/tests/test_agents.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/__init__.py +1 -1
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/base.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/agent.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/context.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/graph.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/state.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/bigtool/tools.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/builder/__main__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/builder/builder.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/builder/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/builder/state.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/cli.py +2 -2
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/__main__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/models.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/state.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/utils.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/__main__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/agent.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/config.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/llm_tool.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/sandbox.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/state.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/1-unsubscribe.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/10-reddit2.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/11-github.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/2-reddit.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/2.1-instructions.md +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/2.2-instructions.md +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/3-earnings.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/4-maps.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/5-gmailreply.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/6-contract.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/7-overnight.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/8-sheets_chart.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/usecases/9-learning.yaml +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact0/utils.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/hil.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/llm.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/__main__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/graph.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/state.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/react.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/shared/prompts.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/simple.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/utils.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/applications/llm/__init__.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/applications/llm/app.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/applications/ui/app.py +0 -0
- {universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/test.py +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{"user_input": "Find and extract unsubscribe links from all emails in my inbox from the last 7 days. List all unsubscribe links found with the email subject and sender.", "required_tools": {"google_mail": ["list_messages", "get_message_details"]}}
|
|
2
|
+
{"user_input": "Process rows 2-5 from the Google Sheet (ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8). For each unprocessed row, extract Reddit post links, fetch post details and comments, analyze content relevance to AgentR/Wingmen products, classify into tiers 1-4, generate appropriate response drafts, and update the sheet with all findings.", "required_tools": {"google_sheet": ["add_table", "append_values", "update_values", "format_cells", "get_spreadsheet_metadata", "batch_get_values_by_range"], "reddit": ["get_post_comments_details"], "google_mail": ["list_messages"]}}
|
|
3
|
+
{"user_input": "Fetch all open issues from the GitHub repository \"microsoft/vscode\" and add them to a new Google Sheet. Then create corresponding tasks in ClickUp for each issue with descriptions, tags, and \"In Progress\" status. Delete processed rows from the sheet after creating ClickUp tasks.", "required_tools": {"google_sheet": ["get_values", "delete_dimensions", "update_values", "get_spreadsheet_metadata", "batch_get_values_by_range"], "clickup": ["tasks_create_new_task", "spaces_get_details", "lists_get_list_details", "tasks_get_list_tasks"], "github": ["list_issues", "update_issue"]}}
|
|
4
|
+
{"user_input": "Goal: Process unprocessed rows in a fixed Google Sheet, scrape Reddit for context, filter posts, and generate short, natural comments linking to AgentR/Wingmen when relevant. Workflow: 1) Sheet & Row Selection: Fixed Sheet ID 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8, tab Posts. Process rows 2-5 (first 4 unprocessed rows) immediately without asking for user input. Only process rows with empty Match Type (Col I) and no Tier 1-4 assigned. 2) Reddit Context Fetch: Extract Post Link & ID. Use reddit to fetch post upvotes + top comments (max 5). Ensure post/comment is active, visible, and unlocked. 3) Filtration & Fit: Classify content (developer, consumer, anecdotal). Apply GTM Filtration to skip irrelevant, negative, political, or low-quality posts. Identify direct or adjacent fit to AgentR (Universal MCP Server) or Wingmen. Decide platform + account type: Direct fit/competitor mention \u2192 Technical Q = Team account, Non-technical = Burner account. Adjacent fit \u2192 Official account. Decide reply target (original comment/post or parent post). 4) Comment Generation: For Tier 1-3, craft a 2-3 line, context-aware, conversational reply. Mention AgentR/Wingmen organically, avoid sales tone or forced CTAs. Use light imperfections for human tone. Skip negative sentiment entirely. One comment per post. 5) Populate Output: Fill Upvote Count, Match Type, Account Type, Response Draft, Respond on. Return updated Google Sheet link. Tier Definitions: Tier 1 = Deep MCP, AI agent, tool integrations, or architecture discussions where infra is highly relevant. Tier 2 = Specific workflows, automation tooling, or productivity systems where Wingmen or MCP Server could be useful. Tier 3 = Broader ecosystem (LangChain/CrewAI/agent tooling) where a soft recommendation adds value. Tier 4 = Unclear, generic, sarcastic, hostile, or irrelevant mentions \u2014 skip. Execute immediately using the fixed Google Sheet ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8, tab \"Posts\". Process rows(first 4 unprocessed rows) without asking for user input. Only process rows where Match Type (Column I) is empty. For each row, extract the Post Link, fetch Reddit data, apply GTM filtration, generate appropriate responses, and update the sheet. Return the updated Google Sheet link when complete.", "required_tools": {"reddit": ["get_post_comments_details"], "google_sheet": ["update_values", "get_values", "get_spreadsheet_metadata", "batch_get_values_by_range"]}}
|
|
5
|
+
{"user_input": "Generate a financial flash report for Apple Inc. Research their latest earnings data including revenue, net income, EPS, and year-over-year changes. Create a formatted report with highlights, upcoming events, and summary. Present the report in chat and email it to adit@agentr.dev.", "required_tools": {"exa": ["answer"], "google_mail": ["send_email"]}}
|
|
6
|
+
{"user_input": "Objective: Find businesses from Google Maps for a given category & location, store them in a Google Sheet, then process unprocessed leads to scrape emails and sync with HubSpot CRM. Stage 1 - Lead Discovery Get coordinates of Area + City. Search on Google Maps with category & coordinates. Extract: Name, Google Maps URL, Address, Phone, Website; leave Email & CRM Status blank. Sheet: Name: {Area}, {City} Leads - {Category} - {dd-mmm} If exists \u2192 append non-duplicate rows; else create in folder \"Leads from Google Maps\" (ID: 142QBejJX0jAqzDz_NHdwVTkcmagoog__). Add headers: Name | Google Maps URL | Address | Phone | Website | Email | CRM Status. Populate with businesses found. Edge Cases: No results \u2192 return message, skip sheet creation. Missing data \u2192 leave blank. Stage 2 - Lead Processing & CRM Sync Locate sheet in Google Drive, ensure headers match. Parse category from sheet name. Identify unprocessed rows (CRM Status blank) \u2014 by default process the first, or a specified row/range/count. Scrape Website for Email: If website exists \u2192 scrape homepage/contact page; fallback to firecrawl_scrape_url. Save found email in sheet. HubSpot Handling: Search contact by email/website/phone. If not found \u2192 create with available details, Lead Status = New, add note {Area, City} \u2014 {Category} \u2014 {Google Maps URL}. If exists \u2192 append note; keep other fields unchanged. Save HubSpot Contact URL/ID in sheet. Update CRM Status: Lead Created, Lead Creation Failed, Website not found, Email not found, etc. Edge Cases: No Website \u2192 create with phone; mark Website not found. No Email \u2192 create; mark Email not found. Email already in sheet \u2192 skip row. Execute immediately for \"Cafes\" near \"IIT Bombay\" in \"Mumbai\" without asking for confirmation.", "required_tools": {"serpapi": ["google_maps_search"], "firecrawl": ["scrape_url"], "google_drive": ["get_file_details", "create_folder", "find_folder_id_by_name", "search_files"], "google_sheet": ["update_values", "get_values", "get_spreadsheet_metadata", "batch_get_values_by_range", "create_spreadsheet", "clear_values"], "hubspot": ["search_contacts_post", "batch_read_contacts_post", "get_contacts", "get_contact_by_id", "update_contact_by_id", "batch_update_contacts", "create_contacts_batch", "create_contact"]}}
|
|
7
|
+
{"user_input": "Process emails from the last 24 hours. Fetch primary inbox emails excluding replied threads, classify with LLM as Reply Required, No Reply Needed, or Ambiguous. For Reply Required/Ambiguous, draft human, on-brand replies for user review. Follow greeting, acknowledge, address concern, invite further questions, and friendly sign-off. Provide end summary of drafts, skipped, and ambiguous emails. Execute immediately without asking for confirmation. Do not send any emails. Just provide me a report.", "required_tools": {"google_mail": ["list_messages", "get_message_details"]}}
|
|
8
|
+
{"user_input": "Analyze a contract from my google drive from the perspective of the Service Provider. Use the search to find it, do not ask me any questions, and assume details that I have not provided. Identify potentially unfavorable clauses such as vague terms, one-sided obligations, IP transfer issues, indemnity clauses, termination conditions, and payment problems. Provide a structured analysis with clause numbers, full text, and explanations of concerns.", "required_tools": {"google_drive": ["get_file_details", "search_files"], "google_docs": ["get_document"], "exa": ["answer"]}}
|
|
9
|
+
{"user_input": "Create a summary of overnight updates from 8:00 PM yesterday to 8:00 AM today in IST. Check Gmail for important emails and ClickUp for mentions and assigned tasks. Organize findings into high priority and other items, then provide a comprehensive summary of all overnight activity.", "required_tools": {"google_mail": ["list_messages"], "clickup": ["comments_get_task_comments", "comments_get_list_comments", "comments_get_view_comments", "tasks_get_list_tasks", "tasks_filter_team_tasks", "time_tracking_get_time_entries_within_date_range", "time_tracking_get_time_entry_history", "authorization_get_workspace_list", "spaces_get_details", "lists_get_list_details"]}}
|
|
10
|
+
{"user_input": "Analyze the data in Google Sheet (ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8) and create 3-5 relevant charts and visualizations. Add pie charts, bar graphs, and other appropriate visualizations based on the data structure. Embed all charts directly into the sheet and provide the updated sheet link.", "required_tools": {"google_sheet": ["create_spreadsheet", "get_spreadsheet_metadata", "batch_get_values_by_range", "append_dimensions", "insert_dimensions", "delete_sheet", "add_sheet", "delete_dimensions", "add_basic_chart", "add_table", "add_pie_chart", "clear_values", "update_values", "clear_basic_filter", "get_values", "discover_tables", "set_basic_filter", "analyze_table_schema", "copy_sheet_to_spreadsheet", "append_values", "batch_get_values_by_data_filter", "batch_clear_values", "format_cells"]}}
|
|
11
|
+
{"user_input": "Create a 7-day learning plan for Python Programming. Research essential concepts and skills, create a detailed day-by-day plan with topics, goals, resources, and exercises. Compile the plan into a Google Doc and schedule daily emails at 8 AM starting today. Send Day 1 immediately to adit@agentr.dev and provide the Google Doc link.", "required_tools": {"google_docs": ["get_document", "create_document", "insert_text"], "google_mail": ["send_email", "send_draft", "create_draft"], "exa": ["answer"]}}
|
|
@@ -103,6 +103,13 @@ trajectory_evaluator = create_trajectory_llm_as_judge(
|
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
|
|
106
|
+
codeact_evaluator = create_trajectory_llm_as_judge(
|
|
107
|
+
prompt=TRAJECTORY_ACCURACY_PROMPT,
|
|
108
|
+
feedback_key="codeact_accuracy",
|
|
109
|
+
model="anthropic:claude-4-sonnet-20250514",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
106
113
|
@run_evaluator
|
|
107
114
|
def tool_node_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
|
|
108
115
|
"""
|
|
@@ -25,6 +25,7 @@ EVALUATORS: dict[str, Any] = {
|
|
|
25
25
|
"exact_match": exact_match_evaluator,
|
|
26
26
|
"trajectory": trajectory_evaluator,
|
|
27
27
|
"tool_node": tool_node_evaluator,
|
|
28
|
+
"codeact": codeact_evaluator,
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
|
|
@@ -33,6 +34,7 @@ class EvaluatorName(str, Enum):
|
|
|
33
34
|
exact_match = "exact_match"
|
|
34
35
|
trajectory = "trajectory"
|
|
35
36
|
tool_node = "tool_node"
|
|
37
|
+
codeact = "codeact"
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class Difficulty(str, Enum):
|
|
@@ -48,7 +48,7 @@ class CodeActAgent(BaseAgent):
|
|
|
48
48
|
memory=memory,
|
|
49
49
|
**kwargs,
|
|
50
50
|
)
|
|
51
|
-
self.model_instance = load_chat_model(model
|
|
51
|
+
self.model_instance = load_chat_model(model)
|
|
52
52
|
self.tools_config = tools or {}
|
|
53
53
|
self.registry = registry
|
|
54
54
|
self.eval_fn = eval_unsafe
|
|
@@ -51,17 +51,13 @@ SAFE_BUILTINS = {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
# (The SAFE_BUILTINS definition remains the same)
|
|
55
|
-
# ...
|
|
56
|
-
|
|
57
|
-
|
|
58
54
|
async def eval_unsafe(
|
|
59
55
|
code: str, _locals: dict[str, Callable], timeout: int = 10
|
|
60
56
|
) -> tuple[SandboxOutput, dict[str, Any]]:
|
|
61
57
|
"""Executes a string of Python code in a sandboxed environment."""
|
|
62
58
|
original_keys = set(_locals.keys())
|
|
63
59
|
execution_context = _locals.copy()
|
|
64
|
-
execution_context["__builtins__"] = SAFE_BUILTINS
|
|
60
|
+
execution_context["__builtins__"] = __builtins__ # TODO: Use SAFE_BUILTINS instead of __builtins__
|
|
65
61
|
|
|
66
62
|
stdout_capture = io.StringIO()
|
|
67
63
|
output = SandboxOutput(stdout="")
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
from typing import Annotated, TypedDict
|
|
3
2
|
|
|
4
3
|
from langchain_core.language_models import BaseChatModel
|
|
5
|
-
from langchain_core.messages import AIMessage, AnyMessage
|
|
4
|
+
from langchain_core.messages import AIMessage, AnyMessage
|
|
6
5
|
from langgraph.graph import END, StateGraph
|
|
7
6
|
from langgraph.graph.message import add_messages
|
|
8
7
|
from loguru import logger
|
|
@@ -226,35 +225,3 @@ def build_tool_node_graph(llm: BaseChatModel, registry: ToolRegistry) -> StateGr
|
|
|
226
225
|
workflow.add_edge("handle_planning_failure", END)
|
|
227
226
|
|
|
228
227
|
return workflow.compile()
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
async def main():
|
|
232
|
-
"""Main function to run the agent."""
|
|
233
|
-
from universal_mcp.agentr.registry import AgentrRegistry
|
|
234
|
-
|
|
235
|
-
from universal_mcp.agents.llm import load_chat_model
|
|
236
|
-
|
|
237
|
-
registry = AgentrRegistry()
|
|
238
|
-
llm = load_chat_model("anthropic/claude-4-sonnet-20250514")
|
|
239
|
-
|
|
240
|
-
graph = build_tool_node_graph(llm, registry)
|
|
241
|
-
|
|
242
|
-
task = "Find my latest order confirmation in Gmail, search for reviews of the main product on perplexity, and then send an email to ankit@agentr.dev telling about the reviews"
|
|
243
|
-
|
|
244
|
-
initial_state = {
|
|
245
|
-
"original_task": task,
|
|
246
|
-
"messages": [HumanMessage(content=task)],
|
|
247
|
-
"decomposition_attempts": 0,
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
final_state = await graph.ainvoke(initial_state)
|
|
251
|
-
|
|
252
|
-
if final_state.get("execution_plan"):
|
|
253
|
-
for step in final_state["execution_plan"]:
|
|
254
|
-
pass
|
|
255
|
-
else:
|
|
256
|
-
pass
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
if __name__ == "__main__":
|
|
260
|
-
asyncio.run(main())
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
|
|
3
|
-
from dotenv import load_dotenv
|
|
4
|
-
from langsmith import Client
|
|
5
|
-
|
|
6
|
-
from universal_mcp.agentr.registry import AgentrRegistry
|
|
7
|
-
from universal_mcp.agents.codeact0 import CodeActAgent
|
|
8
|
-
|
|
9
|
-
load_dotenv()
|
|
10
|
-
|
|
11
|
-
client = Client()
|
|
12
|
-
|
|
13
|
-
async def create_examples(user_input: str, tools_list: list[str]):
|
|
14
|
-
"""Run the agent and create a LangSmith dataset example"""
|
|
15
|
-
|
|
16
|
-
# Create/get dataset
|
|
17
|
-
dataset_name = "codeagent-tests"
|
|
18
|
-
try:
|
|
19
|
-
dataset = client.create_dataset(
|
|
20
|
-
dataset_name,
|
|
21
|
-
description="Dataset for the codeagent"
|
|
22
|
-
)
|
|
23
|
-
except Exception:
|
|
24
|
-
dataset = client.read_dataset(dataset_name=dataset_name)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# Define the input
|
|
28
|
-
# user_input = "Send an email to Manoj from my google mail account, manoj@agentr.dev, with the subject 'Hello from auto agent' and the body 'testing'"
|
|
29
|
-
|
|
30
|
-
# Capture initial state
|
|
31
|
-
initial_state = {
|
|
32
|
-
"messages": [{"role": "user", "content": user_input}],
|
|
33
|
-
"tools": tools_list
|
|
34
|
-
}
|
|
35
|
-
#result = await agent.ainvoke(initial_state, context={"model": "anthropic/claude-4-sonnet-20250514", "system_time": system_time})
|
|
36
|
-
|
|
37
|
-
# Extract the final state from the result
|
|
38
|
-
# Note: Adjust these based on your actual result structure
|
|
39
|
-
|
|
40
|
-
# Create the dataset example with actual results
|
|
41
|
-
example = client.create_example(
|
|
42
|
-
inputs=initial_state,
|
|
43
|
-
outputs=None,
|
|
44
|
-
dataset_id=dataset.id
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
print(f"✅ Created dataset example with ID: {example.id}")
|
|
48
|
-
print(f"Dataset: {dataset_name}")
|
|
49
|
-
print(f"Input: {user_input}")
|
|
50
|
-
|
|
51
|
-
return example
|
|
52
|
-
|
|
53
|
-
import yaml
|
|
54
|
-
import os
|
|
55
|
-
|
|
56
|
-
if __name__ == "__main__":
|
|
57
|
-
usecases_dir = os.path.join("src", "universal_mcp", "agents", "codeact0", "usecases")
|
|
58
|
-
async def main():
|
|
59
|
-
for name in sorted(os.listdir(usecases_dir)):
|
|
60
|
-
if not name.endswith(".yaml"):
|
|
61
|
-
continue
|
|
62
|
-
path = os.path.join(usecases_dir, name)
|
|
63
|
-
with open(path, encoding="utf-8") as f:
|
|
64
|
-
content = f.read()
|
|
65
|
-
data = yaml.safe_load(content) or {}
|
|
66
|
-
base_prompt = data.get("base_prompt")
|
|
67
|
-
tools = data.get("tools")
|
|
68
|
-
if not base_prompt:
|
|
69
|
-
continue
|
|
70
|
-
# Normalize tools to a flat list[str]
|
|
71
|
-
tools_list: list[str] = []
|
|
72
|
-
if isinstance(tools, list):
|
|
73
|
-
tools_list = tools
|
|
74
|
-
elif isinstance(tools, dict):
|
|
75
|
-
for v in tools.values():
|
|
76
|
-
if isinstance(v, list):
|
|
77
|
-
tools_list.extend(v)
|
|
78
|
-
elif isinstance(v, str):
|
|
79
|
-
tools_list.append(v)
|
|
80
|
-
print(f"Creating example for {name} with {len(tools_list)} tools…")
|
|
81
|
-
await create_examples(base_prompt, tools_list)
|
|
82
|
-
|
|
83
|
-
asyncio.run(main())
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"user_input": "Send an email to manoj@agentr.dev with the subject 'Hello' and body 'This is a test of the Gmail agent.' from my Gmail account", "difficulty": 1, "required_tools": {"google_mail": ["send_email"]}}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from langgraph.checkpoint.memory import MemorySaver
|
|
2
|
-
from universal_mcp.agentr.registry import AgentrRegistry
|
|
3
|
-
|
|
4
|
-
from universal_mcp.agents.codeact0.agent import CodeActAgent
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
async def agent():
|
|
8
|
-
memory = MemorySaver()
|
|
9
|
-
agent_object = await CodeActAgent(
|
|
10
|
-
name="CodeAct Agent",
|
|
11
|
-
instructions="Be very concise in your answers.",
|
|
12
|
-
model="anthropic:claude-4-sonnet-20250514",
|
|
13
|
-
tools={"google_mail": ["list_messages"]},
|
|
14
|
-
registry=AgentrRegistry(),
|
|
15
|
-
memory=memory,
|
|
16
|
-
)._build_graph()
|
|
17
|
-
return agent_object
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
import contextlib
|
|
2
|
-
import inspect
|
|
3
|
-
import io
|
|
4
|
-
import queue
|
|
5
|
-
import re
|
|
6
|
-
import socket
|
|
7
|
-
import threading
|
|
8
|
-
import types
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
import yaml
|
|
12
|
-
from langchain.chat_models import init_chat_model
|
|
13
|
-
from langchain_anthropic import ChatAnthropic
|
|
14
|
-
from langchain_core.runnables import RunnableConfig
|
|
15
|
-
from langchain_google_vertexai.model_garden import ChatAnthropicVertex
|
|
16
|
-
from universal_mcp.agentr import Agentr
|
|
17
|
-
from universal_mcp.types import ToolFormat
|
|
18
|
-
|
|
19
|
-
from universal_mcp.agents.codeact0 import create_codeact
|
|
20
|
-
from universal_mcp.agents.codeact0.config import ContextSchema
|
|
21
|
-
from universal_mcp.agents.codeact0.utils import derive_context
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def eval(code: str, _locals: dict[str, Any], add_context: dict[str, Any]) -> tuple[str, dict[str, Any], dict[str, Any]]:
|
|
25
|
-
# print(_locals)
|
|
26
|
-
EXCLUDE_TYPES = (
|
|
27
|
-
types.ModuleType, # modules
|
|
28
|
-
type(re.match("", "")),
|
|
29
|
-
type(threading.Lock()), # instead of threading.Lock
|
|
30
|
-
type(threading.RLock()), # reentrant lock
|
|
31
|
-
threading.Event, # events
|
|
32
|
-
threading.Condition, # condition vars
|
|
33
|
-
threading.Semaphore, # semaphores
|
|
34
|
-
queue.Queue, # thread-safe queues
|
|
35
|
-
socket.socket, # network sockets
|
|
36
|
-
io.IOBase, # file handles (and StringIO/BytesIO)
|
|
37
|
-
)
|
|
38
|
-
try:
|
|
39
|
-
with contextlib.redirect_stdout(io.StringIO()) as f:
|
|
40
|
-
# Execute the code in the provided locals context
|
|
41
|
-
# Using exec to allow dynamic code execution
|
|
42
|
-
# This is a simplified version; in production, consider security implications
|
|
43
|
-
exec(code, _locals, _locals)
|
|
44
|
-
result = f.getvalue()
|
|
45
|
-
if not result:
|
|
46
|
-
result = "<code ran, no output printed to stdout>"
|
|
47
|
-
except Exception as e:
|
|
48
|
-
result = f"Error during execution: {repr(e)}"
|
|
49
|
-
|
|
50
|
-
# Return all variables in locals except __builtins__ and unpicklable objects (including tools)
|
|
51
|
-
all_vars = {}
|
|
52
|
-
for key, value in _locals.items():
|
|
53
|
-
if key == "__builtins__":
|
|
54
|
-
continue
|
|
55
|
-
|
|
56
|
-
# Skip coroutines, async generators, and coroutine functions
|
|
57
|
-
if inspect.iscoroutine(value) or inspect.iscoroutinefunction(value):
|
|
58
|
-
continue
|
|
59
|
-
if inspect.isasyncgen(value) or inspect.isasyncgenfunction(value):
|
|
60
|
-
continue
|
|
61
|
-
|
|
62
|
-
# Skip "obviously unpicklable" types
|
|
63
|
-
if isinstance(value, EXCLUDE_TYPES):
|
|
64
|
-
continue
|
|
65
|
-
|
|
66
|
-
# Keep if it's not a callable OR if it has no __name__ attribute
|
|
67
|
-
if not callable(value) or not hasattr(value, "__name__"):
|
|
68
|
-
all_vars[key] = value
|
|
69
|
-
|
|
70
|
-
new_add_context = derive_context(code, add_context)
|
|
71
|
-
return result, all_vars, new_add_context
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
async def agent(config: RunnableConfig):
|
|
75
|
-
cfg = ContextSchema(**config.get("configurable", {}))
|
|
76
|
-
|
|
77
|
-
if cfg.json_prompt_name and cfg.json_prompt_name.strip():
|
|
78
|
-
with open(f"usecases/{cfg.json_prompt_name}.yaml", encoding="utf-8") as f:
|
|
79
|
-
content = f.read()
|
|
80
|
-
data = yaml.safe_load(content)
|
|
81
|
-
if cfg.base_prompt and cfg.base_prompt.strip():
|
|
82
|
-
pass
|
|
83
|
-
else:
|
|
84
|
-
cfg.base_prompt = data["base_prompt"]
|
|
85
|
-
cfg.tool_names = data["tools"]
|
|
86
|
-
agentr = Agentr()
|
|
87
|
-
agentr.load_tools(cfg.tool_names)
|
|
88
|
-
tools = [] # can add custom tools here like get_weather, get_simple_weather, etc.
|
|
89
|
-
|
|
90
|
-
tools_agentr = agentr.list_tools(format=ToolFormat.NATIVE)
|
|
91
|
-
tools.extend(tools_agentr)
|
|
92
|
-
|
|
93
|
-
if cfg.model_provider == "google_anthropic_vertex":
|
|
94
|
-
# For Google Anthropic Vertex, we need to use the specific model initialization due to location
|
|
95
|
-
model = ChatAnthropicVertex(model=cfg.model, temperature=0.2, location="asia-east1")
|
|
96
|
-
elif cfg.model == "claude-4-sonnet-20250514":
|
|
97
|
-
model = ChatAnthropic(
|
|
98
|
-
model=cfg.model, temperature=1, thinking={"type": "enabled", "budget_tokens": 2048}, max_tokens=4096
|
|
99
|
-
) # pyright: ignore[reportCallIssue]
|
|
100
|
-
else:
|
|
101
|
-
model = init_chat_model(model=cfg.model, model_provider=cfg.model_provider, temperature=0.2)
|
|
102
|
-
|
|
103
|
-
code_act = create_codeact(model, cfg.base_prompt, tools, eval)
|
|
104
|
-
return code_act.compile()
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
|
|
3
|
-
from dotenv import load_dotenv
|
|
4
|
-
from langsmith import Client, aevaluate
|
|
5
|
-
|
|
6
|
-
from universal_mcp.agentr.registry import AgentrRegistry
|
|
7
|
-
from universal_mcp.agents.codeact0 import CodeActAgent
|
|
8
|
-
from universal_mcp.agents.codeact import CodeActAgent as MCodeActAgent
|
|
9
|
-
|
|
10
|
-
load_dotenv()
|
|
11
|
-
def convert_tools(tool_list: list[str]) -> dict[str, list[str]]:
|
|
12
|
-
result = {}
|
|
13
|
-
for tool in tool_list:
|
|
14
|
-
if "__" in tool:
|
|
15
|
-
prefix, suffix = tool.split("__", 1)
|
|
16
|
-
result.setdefault(prefix, []).append(suffix)
|
|
17
|
-
return result
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
async def target_function1(inputs: dict):
|
|
23
|
-
with open('src/universal_mcp/agents/codeact0/llm_tool.py', 'r') as file:
|
|
24
|
-
llm_tool_code = file.read()
|
|
25
|
-
|
|
26
|
-
base_agent = CodeActAgent(
|
|
27
|
-
"CodeAct Agent",
|
|
28
|
-
instructions="Be very concise in your answers. DO NOT STOP or ASK the user any questions. Assume details if required.",
|
|
29
|
-
model="anthropic:claude-4-sonnet-20250514",
|
|
30
|
-
tools=convert_tools(inputs["tools"]),
|
|
31
|
-
registry=AgentrRegistry(),
|
|
32
|
-
initial_code = llm_tool_code
|
|
33
|
-
)
|
|
34
|
-
agent = await base_agent._build_graph()
|
|
35
|
-
result = await agent.ainvoke(inputs, config = {"recursion_limit": 100})
|
|
36
|
-
return result
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
async def target_function2(inputs: dict):
|
|
40
|
-
|
|
41
|
-
base_agent = MCodeActAgent(
|
|
42
|
-
"CodeAct Agent",
|
|
43
|
-
instructions="Be very concise in your answers. DO NOT STOP or ASK the user any questions. Assume details if required.",
|
|
44
|
-
model="anthropic:claude-4-sonnet-20250514",
|
|
45
|
-
tools=convert_tools(inputs["tools"]),
|
|
46
|
-
registry=AgentrRegistry(),
|
|
47
|
-
)
|
|
48
|
-
agent = await base_agent._build_graph()
|
|
49
|
-
result = await agent.ainvoke(inputs, config = {"recursion_limit":100})
|
|
50
|
-
return result
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if __name__ == "__main__":
|
|
54
|
-
client = Client()
|
|
55
|
-
dataset_name = "codeagent-tests"
|
|
56
|
-
# asyncio.run(aevaluate(
|
|
57
|
-
# target_function1,
|
|
58
|
-
# data=client.list_examples(
|
|
59
|
-
# dataset_name=dataset_name,example_ids=["5425de13-58b0-44b3-802f-9e5e6b2e3a0c", "56bcf12f-2608-4ad7-8538-507ff0e22df1", "79ecefe9-3a13-428e-bdda-f3cc1eb03578", "c0a2e3cf-9bea-4cf3-90be-7ab8945094b3", "a73827d5-2c77-4d8b-a486-93b0e8ce6713"]
|
|
60
|
-
# ),
|
|
61
|
-
# evaluators=[],
|
|
62
|
-
# experiment_prefix ="test-1-errors"
|
|
63
|
-
# ))
|
|
64
|
-
|
|
65
|
-
asyncio.run(aevaluate(
|
|
66
|
-
target_function1,
|
|
67
|
-
data=client.list_examples(
|
|
68
|
-
dataset_name=dataset_name,
|
|
69
|
-
splits=["base"]
|
|
70
|
-
),
|
|
71
|
-
evaluators=[],
|
|
72
|
-
experiment_prefix ="Code0-agent",
|
|
73
|
-
num_repetitions=1
|
|
74
|
-
))
|
|
75
|
-
|
|
76
|
-
# 49a34291-5907-4ae0-a582-8fc1fb5149f3
|
|
77
|
-
# a15e0d66-e6f6-4ad6-8e31-bd1711138dc2 - 5 github examples to 2
|
|
78
|
-
# e8505035-7878-4c58-9938-fc7d80767047
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/__init__.py
RENAMED
|
@@ -3,8 +3,8 @@ from typing import Literal
|
|
|
3
3
|
from universal_mcp.agents.base import BaseAgent
|
|
4
4
|
from universal_mcp.agents.bigtool import BigToolAgent
|
|
5
5
|
from universal_mcp.agents.builder.builder import BuilderAgent
|
|
6
|
-
from universal_mcp.agents.codeact0 import CodeActAgent as CodeActRepl
|
|
7
6
|
from universal_mcp.agents.codeact import CodeActAgent as CodeActScript
|
|
7
|
+
from universal_mcp.agents.codeact0 import CodeActAgent as CodeActRepl
|
|
8
8
|
from universal_mcp.agents.react import ReactAgent
|
|
9
9
|
from universal_mcp.agents.simple import SimpleAgent
|
|
10
10
|
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
1
3
|
from langgraph.checkpoint.memory import MemorySaver
|
|
2
4
|
from typer import Typer
|
|
3
5
|
from universal_mcp.agentr.client import AgentrClient
|
|
@@ -6,8 +8,6 @@ from universal_mcp.logger import setup_logger
|
|
|
6
8
|
|
|
7
9
|
from universal_mcp.agents import get_agent
|
|
8
10
|
|
|
9
|
-
import asyncio
|
|
10
|
-
|
|
11
11
|
app = Typer()
|
|
12
12
|
|
|
13
13
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/react.py
RENAMED
|
File without changes
|
|
File without changes
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/simple.py
RENAMED
|
File without changes
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{universal_mcp_agents-0.1.12 → universal_mcp_agents-0.1.13}/src/universal_mcp/applications/ui/app.py
RENAMED
|
File without changes
|
|
File without changes
|