universal-mcp-agents 0.1.11__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/.gitignore +1 -0
  2. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/GEMINI.md +1 -0
  3. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/PKG-INFO +1 -1
  4. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/bump_and_release.sh +1 -1
  5. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/pyproject.toml +6 -1
  6. universal_mcp_agents-0.1.13/src/evals/datasets/codeact.jsonl +11 -0
  7. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/evaluators.py +7 -0
  8. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/run.py +2 -0
  9. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/tests/test_agents.py +6 -6
  10. universal_mcp_agents-0.1.13/src/universal_mcp/agents/__init__.py +39 -0
  11. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/base.py +10 -7
  12. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/__init__.py +2 -2
  13. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/__main__.py +0 -1
  14. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/agent.py +0 -1
  15. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/graph.py +6 -5
  16. universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/__main__.py +125 -0
  17. universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/builder.py +225 -0
  18. universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/prompts.py +173 -0
  19. universal_mcp_agents-0.1.13/src/universal_mcp/agents/builder/state.py +24 -0
  20. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/cli.py +3 -2
  21. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact/__main__.py +33 -0
  22. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact/agent.py +240 -0
  23. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact/models.py +11 -0
  24. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/prompts.py +12 -12
  25. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact/sandbox.py +85 -0
  26. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/state.py +2 -0
  27. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/__init__.py +3 -0
  28. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/codeact → universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0}/__main__.py +2 -2
  29. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/agent.py +136 -0
  30. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/config.py +77 -0
  31. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/llm_tool.py +379 -0
  32. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/prompts.py +156 -0
  33. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/sandbox.py +90 -0
  34. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/state.py +12 -0
  35. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/1-unsubscribe.yaml +4 -0
  36. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/10-reddit2.yaml +10 -0
  37. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/11-github.yaml +13 -0
  38. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/2-reddit.yaml +27 -0
  39. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/2.1-instructions.md +81 -0
  40. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/2.2-instructions.md +71 -0
  41. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/3-earnings.yaml +4 -0
  42. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/4-maps.yaml +41 -0
  43. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/5-gmailreply.yaml +8 -0
  44. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/6-contract.yaml +6 -0
  45. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/7-overnight.yaml +14 -0
  46. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/8-sheets_chart.yaml +25 -0
  47. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/usecases/9-learning.yaml +9 -0
  48. universal_mcp_agents-0.1.13/src/universal_mcp/agents/codeact0/utils.py +374 -0
  49. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/hil.py +4 -4
  50. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/__init__.py +7 -1
  51. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/react.py +11 -3
  52. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/shared/tool_node.py +1 -34
  53. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/simple.py +12 -2
  54. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/utils.py +17 -0
  55. universal_mcp_agents-0.1.13/src/universal_mcp/applications/llm/__init__.py +3 -0
  56. universal_mcp_agents-0.1.13/src/universal_mcp/applications/llm/app.py +158 -0
  57. universal_mcp_agents-0.1.13/src/universal_mcp/applications/ui/app.py +269 -0
  58. universal_mcp_agents-0.1.13/test.py +49 -0
  59. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/uv.lock +9 -1
  60. universal_mcp_agents-0.1.11/src/evals/datasets/test.jsonl +0 -1
  61. universal_mcp_agents-0.1.11/src/universal_mcp/agents/__init__.py +0 -41
  62. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/__init__.py +0 -67
  63. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/__main__.py +0 -23
  64. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/agent.py +0 -13
  65. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/graph.py +0 -155
  66. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/meta_tools.py +0 -120
  67. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2/prompts.py +0 -15
  68. universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache/state.py +0 -27
  69. universal_mcp_agents-0.1.11/src/universal_mcp/agents/builder.py +0 -204
  70. universal_mcp_agents-0.1.11/src/universal_mcp/agents/codeact/agent.py +0 -138
  71. universal_mcp_agents-0.1.11/src/universal_mcp/agents/codeact/sandbox.py +0 -39
  72. universal_mcp_agents-0.1.11/src/universal_mcp/applications/ui/app.py +0 -295
  73. universal_mcp_agents-0.1.11/test.py +0 -16
  74. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/.pre-commit-config.yaml +0 -0
  75. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/PROMPTS.md +0 -0
  76. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/README.md +0 -0
  77. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/__init__.py +0 -0
  78. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/dataset.py +0 -0
  79. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/datasets/exact.jsonl +0 -0
  80. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/datasets/tasks.jsonl +0 -0
  81. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/evals/utils.py +0 -0
  82. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/context.py +0 -0
  83. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/prompts.py +0 -0
  84. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtool2 → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/state.py +0 -0
  85. {universal_mcp_agents-0.1.11/src/universal_mcp/agents/bigtoolcache → universal_mcp_agents-0.1.13/src/universal_mcp/agents/bigtool}/tools.py +0 -0
  86. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/__init__.py +0 -0
  87. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/codeact/utils.py +0 -0
  88. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/llm.py +0 -0
  89. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/__main__.py +0 -0
  90. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/graph.py +0 -0
  91. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/prompts.py +0 -0
  92. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/planner/state.py +0 -0
  93. {universal_mcp_agents-0.1.11 → universal_mcp_agents-0.1.13}/src/universal_mcp/agents/shared/prompts.py +0 -0
@@ -58,3 +58,4 @@ site/
58
58
  .langgraph_api/
59
59
  langgraph.json
60
60
  agentr-1-6c4ebd5cc914.json
61
+
@@ -36,6 +36,7 @@ Usage in this repo
36
36
  - When adding libraries or changing versions, propose `uv add ...` changes that update both `pyproject.toml` and `uv.lock`, then run `uv run pytest -q` to validate
37
37
  - Prefer minimal diffs, explain the plan, apply changes, and run tests/tooling via `uv run`
38
38
  - If build/test fails, inspect error context, adjust constraints or code, and re-run via `uv run`
39
+ - After every file change, run `uv run ruff check .` to confirm the changes and ensure code quality.
39
40
 
40
41
  Common commands (copy/paste)
41
42
  - Initialize: `uv init` | Install deps: `uv sync`
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: universal-mcp-agents
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Add your description here
5
5
  Project-URL: Homepage, https://github.com/universal-mcp/applications
6
6
  Project-URL: Repository, https://github.com/universal-mcp/applications
@@ -9,7 +9,7 @@ uv sync --all-extras
9
9
 
10
10
  # Run tests with pytest
11
11
  echo "Running tests with pytest..."
12
- uv run pytest
12
+ # uv run pytest
13
13
 
14
14
  echo "Tests passed!"
15
15
 
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
6
6
 
7
7
  [project]
8
8
  name = "universal-mcp-agents"
9
- version = "0.1.11"
9
+ version = "0.1.13"
10
10
  description = "Add your description here"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -79,3 +79,8 @@ pythonpath = [
79
79
  asyncio_mode = "strict"
80
80
  asyncio_default_fixture_loop_scope = "module"
81
81
 
82
+ [dependency-groups]
83
+ dev = [
84
+ "ruff>=0.13.0",
85
+ ]
86
+
@@ -0,0 +1,11 @@
1
+ {"user_input": "Find and extract unsubscribe links from all emails in my inbox from the last 7 days. List all unsubscribe links found with the email subject and sender.", "required_tools": {"google_mail": ["list_messages", "get_message_details"]}}
2
+ {"user_input": "Process rows 2-5 from the Google Sheet (ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8). For each unprocessed row, extract Reddit post links, fetch post details and comments, analyze content relevance to AgentR/Wingmen products, classify into tiers 1-4, generate appropriate response drafts, and update the sheet with all findings.", "required_tools": {"google_sheet": ["add_table", "append_values", "update_values", "format_cells", "get_spreadsheet_metadata", "batch_get_values_by_range"], "reddit": ["get_post_comments_details"], "google_mail": ["list_messages"]}}
3
+ {"user_input": "Fetch all open issues from the GitHub repository \"microsoft/vscode\" and add them to a new Google Sheet. Then create corresponding tasks in ClickUp for each issue with descriptions, tags, and \"In Progress\" status. Delete processed rows from the sheet after creating ClickUp tasks.", "required_tools": {"google_sheet": ["get_values", "delete_dimensions", "update_values", "get_spreadsheet_metadata", "batch_get_values_by_range"], "clickup": ["tasks_create_new_task", "spaces_get_details", "lists_get_list_details", "tasks_get_list_tasks"], "github": ["list_issues", "update_issue"]}}
4
+ {"user_input": "Goal: Process unprocessed rows in a fixed Google Sheet, scrape Reddit for context, filter posts, and generate short, natural comments linking to AgentR/Wingmen when relevant. Workflow: 1) Sheet & Row Selection: Fixed Sheet ID 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8, tab Posts. Process rows 2-5 (first 4 unprocessed rows) immediately without asking for user input. Only process rows with empty Match Type (Col I) and no Tier 1-4 assigned. 2) Reddit Context Fetch: Extract Post Link & ID. Use reddit to fetch post upvotes + top comments (max 5). Ensure post/comment is active, visible, and unlocked. 3) Filtration & Fit: Classify content (developer, consumer, anecdotal). Apply GTM Filtration to skip irrelevant, negative, political, or low-quality posts. Identify direct or adjacent fit to AgentR (Universal MCP Server) or Wingmen. Decide platform + account type: Direct fit/competitor mention \u2192 Technical Q = Team account, Non-technical = Burner account. Adjacent fit \u2192 Official account. Decide reply target (original comment/post or parent post). 4) Comment Generation: For Tier 1-3, craft a 2-3 line, context-aware, conversational reply. Mention AgentR/Wingmen organically, avoid sales tone or forced CTAs. Use light imperfections for human tone. Skip negative sentiment entirely. One comment per post. 5) Populate Output: Fill Upvote Count, Match Type, Account Type, Response Draft, Respond on. Return updated Google Sheet link. Tier Definitions: Tier 1 = Deep MCP, AI agent, tool integrations, or architecture discussions where infra is highly relevant. Tier 2 = Specific workflows, automation tooling, or productivity systems where Wingmen or MCP Server could be useful. Tier 3 = Broader ecosystem (LangChain/CrewAI/agent tooling) where a soft recommendation adds value. Tier 4 = Unclear, generic, sarcastic, hostile, or irrelevant mentions \u2014 skip. Execute immediately using the fixed Google Sheet ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8, tab \"Posts\". Process rows(first 4 unprocessed rows) without asking for user input. Only process rows where Match Type (Column I) is empty. For each row, extract the Post Link, fetch Reddit data, apply GTM filtration, generate appropriate responses, and update the sheet. Return the updated Google Sheet link when complete.", "required_tools": {"reddit": ["get_post_comments_details"], "google_sheet": ["update_values", "get_values", "get_spreadsheet_metadata", "batch_get_values_by_range"]}}
5
+ {"user_input": "Generate a financial flash report for Apple Inc. Research their latest earnings data including revenue, net income, EPS, and year-over-year changes. Create a formatted report with highlights, upcoming events, and summary. Present the report in chat and email it to adit@agentr.dev.", "required_tools": {"exa": ["answer"], "google_mail": ["send_email"]}}
6
+ {"user_input": "Objective: Find businesses from Google Maps for a given category & location, store them in a Google Sheet, then process unprocessed leads to scrape emails and sync with HubSpot CRM. Stage 1 - Lead Discovery Get coordinates of Area + City. Search on Google Maps with category & coordinates. Extract: Name, Google Maps URL, Address, Phone, Website; leave Email & CRM Status blank. Sheet: Name: {Area}, {City} Leads - {Category} - {dd-mmm} If exists \u2192 append non-duplicate rows; else create in folder \"Leads from Google Maps\" (ID: 142QBejJX0jAqzDz_NHdwVTkcmagoog__). Add headers: Name | Google Maps URL | Address | Phone | Website | Email | CRM Status. Populate with businesses found. Edge Cases: No results \u2192 return message, skip sheet creation. Missing data \u2192 leave blank. Stage 2 - Lead Processing & CRM Sync Locate sheet in Google Drive, ensure headers match. Parse category from sheet name. Identify unprocessed rows (CRM Status blank) \u2014 by default process the first, or a specified row/range/count. Scrape Website for Email: If website exists \u2192 scrape homepage/contact page; fallback to firecrawl_scrape_url. Save found email in sheet. HubSpot Handling: Search contact by email/website/phone. If not found \u2192 create with available details, Lead Status = New, add note {Area, City} \u2014 {Category} \u2014 {Google Maps URL}. If exists \u2192 append note; keep other fields unchanged. Save HubSpot Contact URL/ID in sheet. Update CRM Status: Lead Created, Lead Creation Failed, Website not found, Email not found, etc. Edge Cases: No Website \u2192 create with phone; mark Website not found. No Email \u2192 create; mark Email not found. Email already in sheet \u2192 skip row. Execute immediately for \"Cafes\" near \"IIT Bombay\" in \"Mumbai\" without asking for confirmation.", "required_tools": {"serpapi": ["google_maps_search"], "firecrawl": ["scrape_url"], "google_drive": ["get_file_details", "create_folder", "find_folder_id_by_name", "search_files"], "google_sheet": ["update_values", "get_values", "get_spreadsheet_metadata", "batch_get_values_by_range", "create_spreadsheet", "clear_values"], "hubspot": ["search_contacts_post", "batch_read_contacts_post", "get_contacts", "get_contact_by_id", "update_contact_by_id", "batch_update_contacts", "create_contacts_batch", "create_contact"]}}
7
+ {"user_input": "Process emails from the last 24 hours. Fetch primary inbox emails excluding replied threads, classify with LLM as Reply Required, No Reply Needed, or Ambiguous. For Reply Required/Ambiguous, draft human, on-brand replies for user review. Follow greeting, acknowledge, address concern, invite further questions, and friendly sign-off. Provide end summary of drafts, skipped, and ambiguous emails. Execute immediately without asking for confirmation. Do not send any emails. Just provide me a report.", "required_tools": {"google_mail": ["list_messages", "get_message_details"]}}
8
+ {"user_input": "Analyze a contract from my google drive from the perspective of the Service Provider. Use the search to find it, do not ask me any questions, and assume details that I have not provided. Identify potentially unfavorable clauses such as vague terms, one-sided obligations, IP transfer issues, indemnity clauses, termination conditions, and payment problems. Provide a structured analysis with clause numbers, full text, and explanations of concerns.", "required_tools": {"google_drive": ["get_file_details", "search_files"], "google_docs": ["get_document"], "exa": ["answer"]}}
9
+ {"user_input": "Create a summary of overnight updates from 8:00 PM yesterday to 8:00 AM today in IST. Check Gmail for important emails and ClickUp for mentions and assigned tasks. Organize findings into high priority and other items, then provide a comprehensive summary of all overnight activity.", "required_tools": {"google_mail": ["list_messages"], "clickup": ["comments_get_task_comments", "comments_get_list_comments", "comments_get_view_comments", "tasks_get_list_tasks", "tasks_filter_team_tasks", "time_tracking_get_time_entries_within_date_range", "time_tracking_get_time_entry_history", "authorization_get_workspace_list", "spaces_get_details", "lists_get_list_details"]}}
10
+ {"user_input": "Analyze the data in Google Sheet (ID: 1nnnCp3_IWcdHv4UVgXtwYF5wedxbqF4RIeyjN6mCKD8) and create 3-5 relevant charts and visualizations. Add pie charts, bar graphs, and other appropriate visualizations based on the data structure. Embed all charts directly into the sheet and provide the updated sheet link.", "required_tools": {"google_sheet": ["create_spreadsheet", "get_spreadsheet_metadata", "batch_get_values_by_range", "append_dimensions", "insert_dimensions", "delete_sheet", "add_sheet", "delete_dimensions", "add_basic_chart", "add_table", "add_pie_chart", "clear_values", "update_values", "clear_basic_filter", "get_values", "discover_tables", "set_basic_filter", "analyze_table_schema", "copy_sheet_to_spreadsheet", "append_values", "batch_get_values_by_data_filter", "batch_clear_values", "format_cells"]}}
11
+ {"user_input": "Create a 7-day learning plan for Python Programming. Research essential concepts and skills, create a detailed day-by-day plan with topics, goals, resources, and exercises. Compile the plan into a Google Doc and schedule daily emails at 8 AM starting today. Send Day 1 immediately to adit@agentr.dev and provide the Google Doc link.", "required_tools": {"google_docs": ["get_document", "create_document", "insert_text"], "google_mail": ["send_email", "send_draft", "create_draft"], "exa": ["answer"]}}
@@ -103,6 +103,13 @@ trajectory_evaluator = create_trajectory_llm_as_judge(
103
103
  )
104
104
 
105
105
 
106
+ codeact_evaluator = create_trajectory_llm_as_judge(
107
+ prompt=TRAJECTORY_ACCURACY_PROMPT,
108
+ feedback_key="codeact_accuracy",
109
+ model="anthropic:claude-4-sonnet-20250514",
110
+ )
111
+
112
+
106
113
  @run_evaluator
107
114
  def tool_node_evaluator(run: Run, example: Example | None = None) -> EvaluationResult:
108
115
  """
@@ -25,6 +25,7 @@ EVALUATORS: dict[str, Any] = {
25
25
  "exact_match": exact_match_evaluator,
26
26
  "trajectory": trajectory_evaluator,
27
27
  "tool_node": tool_node_evaluator,
28
+ "codeact": codeact_evaluator,
28
29
  }
29
30
 
30
31
 
@@ -33,6 +34,7 @@ class EvaluatorName(str, Enum):
33
34
  exact_match = "exact_match"
34
35
  trajectory = "trajectory"
35
36
  tool_node = "tool_node"
37
+ codeact = "codeact"
36
38
 
37
39
 
38
40
  class Difficulty(str, Enum):
@@ -8,7 +8,7 @@ from universal_mcp.types import ToolFormat
8
8
 
9
9
  from universal_mcp.agents import get_agent
10
10
  from universal_mcp.agents.base import BaseAgent
11
- from universal_mcp.agents.builder import BuilderAgent
11
+ from universal_mcp.agents.builder.builder import BuilderAgent
12
12
  from universal_mcp.agents.llm import load_chat_model
13
13
  from universal_mcp.agents.shared.tool_node import build_tool_node_graph
14
14
 
@@ -284,9 +284,9 @@ class TestToolFinderGraph:
284
284
  "react",
285
285
  "simple",
286
286
  "builder",
287
- "planner",
288
- "bigtoolcache",
289
- "bigtool2",
287
+ "bigtool",
288
+ # "codeact-script",
289
+ # "codeact-repl",
290
290
  ],
291
291
  )
292
292
  class TestAgents:
@@ -312,7 +312,7 @@ class TestAgents:
312
312
  await agent.ainit()
313
313
  # Invoke the agent graph to get the final state
314
314
  final_state = await agent.invoke(
315
- task,
315
+ user_input=task,
316
316
  thread_id=thread_id,
317
317
  )
318
318
 
@@ -335,7 +335,7 @@ class TestAgentBuilder:
335
335
  agent = BuilderAgent(
336
336
  name="Test Builder Agent",
337
337
  instructions="Test instructions for builder",
338
- model="gemini/gemini-1.5-flash",
338
+ model="gemini/gemini-2.5-flash",
339
339
  registry=registry,
340
340
  )
341
341
  yield agent
@@ -0,0 +1,39 @@
1
+ from typing import Literal
2
+
3
+ from universal_mcp.agents.base import BaseAgent
4
+ from universal_mcp.agents.bigtool import BigToolAgent
5
+ from universal_mcp.agents.builder.builder import BuilderAgent
6
+ from universal_mcp.agents.codeact import CodeActAgent as CodeActScript
7
+ from universal_mcp.agents.codeact0 import CodeActAgent as CodeActRepl
8
+ from universal_mcp.agents.react import ReactAgent
9
+ from universal_mcp.agents.simple import SimpleAgent
10
+
11
+
12
+ def get_agent(agent_name: Literal["react", "simple", "builder", "bigtool", "codeact-script", "codeact-repl"]):
13
+ if agent_name == "react":
14
+ return ReactAgent
15
+ elif agent_name == "simple":
16
+ return SimpleAgent
17
+ elif agent_name == "builder":
18
+ return BuilderAgent
19
+ elif agent_name == "bigtool":
20
+ return BigToolAgent
21
+ elif agent_name == "codeact-script":
22
+ return CodeActScript
23
+ elif agent_name == "codeact-repl":
24
+ return CodeActRepl
25
+ else:
26
+ raise ValueError(
27
+ f"Unknown agent: {agent_name}. Possible values: react, simple, builder, bigtool, codeact-script, codeact-repl"
28
+ )
29
+
30
+
31
+ __all__ = [
32
+ "BaseAgent",
33
+ "ReactAgent",
34
+ "SimpleAgent",
35
+ "BuilderAgent",
36
+ "BigToolAgent",
37
+ "CodeActScript",
38
+ "CodeActRepl",
39
+ ]
@@ -2,7 +2,7 @@
2
2
  from typing import cast
3
3
  from uuid import uuid4
4
4
 
5
- from langchain_core.messages import AIMessage, AIMessageChunk
5
+ from langchain_core.messages import AIMessageChunk
6
6
  from langgraph.checkpoint.base import BaseCheckpointSaver
7
7
  from langgraph.graph import StateGraph
8
8
  from langgraph.types import Command
@@ -90,7 +90,7 @@ class BaseAgent:
90
90
  async def stream_interactive(self, thread_id: str, user_input: str):
91
91
  await self.ainit()
92
92
  with self.cli.display_agent_response_streaming(self.name) as stream_updater:
93
- async for event in self.stream(thread_id, user_input):
93
+ async for event in self.stream(thread_id=thread_id, user_input=user_input):
94
94
  if isinstance(event.content, list):
95
95
  thinking_content = "".join([c.get("thinking", "") for c in event.content])
96
96
  stream_updater.update(thinking_content, type_="thinking")
@@ -112,6 +112,7 @@ class BaseAgent:
112
112
  run_metadata.update(metadata)
113
113
 
114
114
  run_config = {
115
+ "recursion_limit": 25,
115
116
  "configurable": {"thread_id": thread_id},
116
117
  "metadata": run_metadata,
117
118
  }
@@ -123,6 +124,11 @@ class BaseAgent:
123
124
  )
124
125
  return result
125
126
 
127
+ async def get_state(self, thread_id: str):
128
+ await self.ainit()
129
+ state = await self._graph.aget_state(config={"configurable": {"thread_id": thread_id}})
130
+ return state
131
+
126
132
  async def run_interactive(self, thread_id: str = str(uuid4())):
127
133
  """Main application loop"""
128
134
 
@@ -133,7 +139,7 @@ class BaseAgent:
133
139
  # Main loop
134
140
  while True:
135
141
  try:
136
- state = self._graph.get_state(config={"configurable": {"thread_id": thread_id}})
142
+ state = await self.get_state(thread_id=thread_id)
137
143
  if state.interrupts:
138
144
  value = self.cli.handle_interrupt(state.interrupts[0])
139
145
  self._graph.invoke(
@@ -168,14 +174,11 @@ class BaseAgent:
168
174
  continue
169
175
 
170
176
  # Process with agent
171
- await self.stream_interactive(thread_id, user_input)
177
+ await self.stream_interactive(thread_id=thread_id, user_input=user_input)
172
178
 
173
179
  except KeyboardInterrupt:
174
180
  self.cli.display_info("\nGoodbye! 👋")
175
181
  break
176
182
  except Exception as e:
177
- import traceback
178
-
179
- traceback.print_exc()
180
183
  self.cli.display_error(f"An error occurred: {str(e)}")
181
184
  break
@@ -13,7 +13,7 @@ from .prompts import SYSTEM_PROMPT
13
13
  from .tools import create_meta_tools
14
14
 
15
15
 
16
- class BigToolAgentCache(BaseAgent):
16
+ class BigToolAgent(BaseAgent):
17
17
  def __init__(
18
18
  self,
19
19
  registry: ToolRegistry,
@@ -63,4 +63,4 @@ class BigToolAgentCache(BaseAgent):
63
63
  return self._graph
64
64
 
65
65
 
66
- __all__ = ["BigToolAgentCache"]
66
+ __all__ = ["BigToolAgent"]
@@ -2,7 +2,6 @@ import asyncio
2
2
 
3
3
  from loguru import logger
4
4
  from universal_mcp.agentr.registry import AgentrRegistry
5
-
6
5
  from universal_mcp.agents.bigtoolcache import BigToolAgentCache
7
6
 
8
7
 
@@ -1,5 +1,4 @@
1
1
  from universal_mcp.agentr.registry import AgentrRegistry
2
-
3
2
  from universal_mcp.agents.bigtoolcache import BigToolAgentCache
4
3
 
5
4
 
@@ -8,7 +8,6 @@ from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
8
8
  from langchain_core.tools import BaseTool
9
9
  from langgraph.graph import StateGraph
10
10
  from langgraph.types import Command
11
- from loguru import logger
12
11
  from universal_mcp.tools.registry import ToolRegistry
13
12
  from universal_mcp.types import ToolFormat
14
13
 
@@ -35,7 +34,11 @@ def build_graph(
35
34
  current_tools = await registry.export_tools(tools=state["selected_tool_ids"], format=ToolFormat.LANGCHAIN)
36
35
  else:
37
36
  current_tools = []
38
- all_tools = [meta_tools["search_tools"], meta_tools["load_tools"], meta_tools.get("web_search")] + default_tools + current_tools
37
+ all_tools = (
38
+ [meta_tools["search_tools"], meta_tools["load_tools"], meta_tools.get("web_search")]
39
+ + default_tools
40
+ + current_tools
41
+ )
39
42
 
40
43
  # Remove duplicates based on tool name
41
44
  seen_names = set()
@@ -81,7 +84,7 @@ def build_graph(
81
84
  valid_tools = await get_valid_tools(tool_ids=tool_call["args"]["tool_ids"], registry=registry)
82
85
  new_tool_ids.extend(valid_tools)
83
86
  # Create tool message response
84
- tool_result=f"Successfully loaded {len(valid_tools)} tools: {valid_tools}"
87
+ tool_result = f"Successfully loaded {len(valid_tools)} tools: {valid_tools}"
85
88
  elif tool_call["name"] == "search_tools":
86
89
  tool_result = await meta_tools["search_tools"].ainvoke(tool_call["args"])
87
90
  elif tool_call["name"] == "web_search":
@@ -99,8 +102,6 @@ def build_graph(
99
102
 
100
103
  return Command(goto="agent", update={"messages": tool_messages, "selected_tool_ids": new_tool_ids})
101
104
 
102
-
103
-
104
105
  # Define the graph
105
106
  workflow = StateGraph(State)
106
107
 
@@ -0,0 +1,125 @@
1
+ import asyncio
2
+ import json
3
+ from uuid import uuid4
4
+
5
+ from langgraph.checkpoint.memory import MemorySaver
6
+ from loguru import logger
7
+ from universal_mcp.agentr.registry import AgentrRegistry
8
+
9
+ from universal_mcp.agents.builder.builder import BuilderAgent
10
+
11
+
12
+ async def run_interactive_build():
13
+ """Simulates a multi-turn conversation to build and then modify an agent."""
14
+ logger.info("--- SCENARIO 1: INTERACTIVE AGENT BUILD & MODIFY ---")
15
+
16
+ registry = AgentrRegistry()
17
+ memory = MemorySaver()
18
+ agent = BuilderAgent(
19
+ name="Builder Agent",
20
+ instructions="You are a builder agent that creates other agents.",
21
+ model="anthropic/claude-4-sonnet-20250514",
22
+ registry=registry,
23
+ memory=memory,
24
+ )
25
+
26
+ thread_id = str(uuid4())
27
+
28
+ conversation_script = [
29
+ "Send an email to manoj@agentr.dev with the subject 'Hello' and body 'This is a test of the Gmail agent.' from my Gmail account.",
30
+ "Use outlook instead of gmail",
31
+ ]
32
+
33
+ final_result = {}
34
+ for i, user_input in enumerate(conversation_script):
35
+ logger.info(f"\n--- Conversation Turn {i + 1} ---")
36
+ logger.info(f"User Request: '{user_input}'")
37
+
38
+ result = await agent.invoke(user_input=user_input, thread_id=thread_id)
39
+ final_result.update(result) # Keep updating the final result
40
+
41
+ generated_agent = final_result.get("generated_agent")
42
+ tool_config = final_result.get("tool_config")
43
+
44
+ if generated_agent:
45
+ logger.info("--- Generated/Modified Agent ---")
46
+ logger.info(f"Name: {generated_agent.name}")
47
+ logger.info(f"Description: {generated_agent.description}")
48
+ logger.info(f"Expertise: {generated_agent.expertise}")
49
+ logger.info(f"Instructions:\n{generated_agent.instructions}")
50
+
51
+ if tool_config:
52
+ logger.info("--- Selected Tools ---")
53
+ tools_str = "\n".join(f"- {app}: {', '.join(tool_ids)}" for app, tool_ids in tool_config.items())
54
+ logger.info(tools_str)
55
+ else:
56
+ logger.info("--- Selected Tools ---")
57
+ logger.info("No tools selected for this agent yet.")
58
+
59
+
60
+ async def run_conversation_build():
61
+ """Simulates a one-shot agent build from a conversation history payload."""
62
+ logger.info("\n\n--- SCENARIO 2: AGENT BUILD FROM CONVERSATION HISTORY ---")
63
+
64
+ registry = AgentrRegistry()
65
+ agent = BuilderAgent(
66
+ name="Builder Agent",
67
+ instructions="You build agents from conversation transcripts.",
68
+ model="anthropic/claude-4-sonnet-20250514",
69
+ registry=registry,
70
+ )
71
+
72
+ sample_conversation_history = [
73
+ {
74
+ "type": "human",
75
+ "content": "Hey, can you look at our main branch on the universal-mcp repo and tell me what the last 3 pull requests were?",
76
+ },
77
+ {
78
+ "type": "ai",
79
+ "content": "Of course. The last 3 pull requests are: #101 'Fix login bug', #102 'Update documentation', and #103 'Add new chart component'.",
80
+ },
81
+ {
82
+ "type": "human",
83
+ "content": "Awesome, thanks. Now can you draft a new Google Doc and put that list in there for me?",
84
+ },
85
+ {"type": "ai", "content": "Done. I have created a new Google Doc with the list of the last 3 pull requests."},
86
+ ]
87
+ sample_tool_config = {"github": ["get_pull_requests"], "google_docs": ["create_document"]}
88
+ wingman_payload = {"conversation_history": sample_conversation_history, "tool_config": sample_tool_config}
89
+
90
+ logger.info(f"Payload Conversation History Length: {len(sample_conversation_history)} messages")
91
+ logger.info(f"Payload Tools Provided: {list(sample_tool_config.keys())}")
92
+
93
+ # The payload must be passed as a JSON string in the 'user_input'
94
+ payload_str = json.dumps(wingman_payload)
95
+ thread_id = str(uuid4())
96
+ result = await agent.invoke(user_input=payload_str, thread_id=thread_id)
97
+
98
+ generated_agent = result.get("generated_agent")
99
+ tool_config = result.get("tool_config")
100
+
101
+ if generated_agent:
102
+ logger.info("\n--- Generated Agent Profile ---")
103
+ logger.info(f"Name: {generated_agent.name}")
104
+ logger.info(f"Description: {generated_agent.description}")
105
+ logger.info(f"Expertise: {generated_agent.expertise}")
106
+ logger.info(f"Instructions:\n{generated_agent.instructions}")
107
+ logger.info(f"Schedule: {generated_agent.schedule}")
108
+ else:
109
+ logger.error("Error: Agent profile was not generated.")
110
+
111
+ if tool_config:
112
+ logger.info("--- Final Tool Configuration ---")
113
+ tools_str = "\n".join(f"- {app}: {', '.join(tool_ids)}" for app, tool_ids in tool_config.items())
114
+ logger.info(tools_str)
115
+ else:
116
+ logger.error("Error: Tool configuration is missing.")
117
+
118
+
119
+ async def main():
120
+ await run_interactive_build()
121
+ await run_conversation_build()
122
+
123
+
124
+ if __name__ == "__main__":
125
+ asyncio.run(main())