universal-mcp-agents 0.1.8__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of universal-mcp-agents might be problematic. Click here for more details.

Files changed (72) hide show
  1. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/PKG-INFO +1 -1
  2. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/bump_and_release.sh +0 -1
  3. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/pyproject.toml +1 -1
  4. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/datasets/tasks.jsonl +1 -0
  5. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/__init__.py +4 -1
  6. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/__init__.py +3 -0
  7. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/__main__.py +25 -0
  8. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/agent.py +171 -0
  9. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/prompts.py +92 -0
  10. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/sandbox.py +48 -0
  11. universal_mcp_agents-0.1.9/src/universal_mcp/agents/codeact/state.py +12 -0
  12. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/shared/prompts.py +29 -15
  13. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/shared/tool_node.py +28 -13
  14. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/uv.lock +165 -164
  15. universal_mcp_agents-0.1.8/src/universal_mcp/agents/codeact/__init__.py +0 -255
  16. universal_mcp_agents-0.1.8/src/universal_mcp/agents/codeact/sandbox.py +0 -27
  17. universal_mcp_agents-0.1.8/src/universal_mcp/agents/codeact/test.py +0 -16
  18. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/.gitignore +0 -0
  19. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/.pre-commit-config.yaml +0 -0
  20. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/GEMINI.md +0 -0
  21. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/PROMPTS.md +0 -0
  22. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/README.md +0 -0
  23. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/__init__.py +0 -0
  24. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/dataset.py +0 -0
  25. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/datasets/exact.jsonl +0 -0
  26. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/datasets/test.jsonl +0 -0
  27. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/evaluators.py +0 -0
  28. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/run.py +0 -0
  29. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/evals/utils.py +0 -0
  30. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/tests/test_agents.py +0 -0
  31. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/__init__.py +0 -0
  32. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/__main__.py +0 -0
  33. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/context.py +0 -0
  34. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/graph.py +0 -0
  35. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/prompts.py +0 -0
  36. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/state.py +0 -0
  37. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/autoagent/utils.py +0 -0
  38. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/base.py +0 -0
  39. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool/__init__.py +0 -0
  40. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool/__main__.py +0 -0
  41. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool/graph.py +0 -0
  42. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool/prompts.py +0 -0
  43. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool/state.py +0 -0
  44. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/__init__.py +0 -0
  45. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/__main__.py +0 -0
  46. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/agent.py +0 -0
  47. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/graph.py +0 -0
  48. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/prompts.py +0 -0
  49. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtool2/state.py +0 -0
  50. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/__init__.py +0 -0
  51. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/__main__.py +0 -0
  52. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/agent.py +0 -0
  53. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/context.py +0 -0
  54. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/graph.py +0 -0
  55. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/prompts.py +0 -0
  56. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/state.py +0 -0
  57. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/tools_all.txt +0 -0
  58. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/bigtoolcache/tools_important.txt +0 -0
  59. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/builder.py +0 -0
  60. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/cli.py +0 -0
  61. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/codeact/utils.py +0 -0
  62. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/hil.py +0 -0
  63. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/llm.py +0 -0
  64. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/planner/__init__.py +0 -0
  65. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/planner/__main__.py +0 -0
  66. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/planner/graph.py +0 -0
  67. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/planner/prompts.py +0 -0
  68. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/planner/state.py +0 -0
  69. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/react.py +0 -0
  70. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/simple.py +0 -0
  71. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/agents/utils.py +0 -0
  72. {universal_mcp_agents-0.1.8 → universal_mcp_agents-0.1.9}/src/universal_mcp/applications/ui/app.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: universal-mcp-agents
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Add your description here
5
5
  Project-URL: Homepage, https://github.com/universal-mcp/applications
6
6
  Project-URL: Repository, https://github.com/universal-mcp/applications
@@ -81,7 +81,6 @@ if [ "$1" = "release" ]; then
81
81
  rm -rf .pytest_cache
82
82
  rm -rf .ruff_cache
83
83
  rm -rf .mypy_cache
84
- rm -rf .venv
85
84
  rm -rf .cache
86
85
  rm -rf .DS_Store
87
86
  rm -rf .idea
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
6
6
 
7
7
  [project]
8
8
  name = "universal-mcp-agents"
9
- version = "0.1.8"
9
+ version = "0.1.9"
10
10
  description = "Add your description here"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -20,3 +20,4 @@
20
20
  {"user_input": "Draft personalized LinkedIn outreach messages for 10 potential collaborators in the fintech space based on their recent posts using LinkedIn data in a Google Sheet", "difficulty": 5, "required_tools": {"scraper": ["linkedin_retrieve_profile", "linkedin_list_profile_posts"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet"]}}
21
21
  {"user_input": "Monitor my Twitter mentions and DMs from the past 48 hours and create a response priority list in Google Sheets", "difficulty": 4, "required_tools": {"twitter": ["get_user_mentions", "get_dm_events_by_conversation_id"], "google_sheet": ["create_spreadsheet", "write_values_to_sheet", "set_basic_filter"]}}
22
22
  {"user_input": "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets", "difficulty": 5, "required_tools": {"tavily": ["search_and_summarize"], "google_sheet": ["get_values", "batch_get_values_by_range", "get_spreadsheet_metadata" , "create_spreadsheet", "add_sheet", "add_table"]}}
23
+ {"user_input": "search reddit for posts on elon musk and then post a meme on him on linkedin", "difficulty": 3, "required_tools": {"reddit" : ["search_reddit"], "linkedin": ["create_post"]}}
@@ -6,6 +6,7 @@ from universal_mcp.agents.builder import BuilderAgent
6
6
  from universal_mcp.agents.planner import PlannerAgent
7
7
  from universal_mcp.agents.react import ReactAgent
8
8
  from universal_mcp.agents.simple import SimpleAgent
9
+ from universal_mcp.agents.codeact import CodeActAgent
9
10
 
10
11
 
11
12
  def get_agent(agent_name: str):
@@ -23,8 +24,10 @@ def get_agent(agent_name: str):
23
24
  return BigToolAgent
24
25
  elif agent_name == "bigtool2":
25
26
  return BigToolAgent2
27
+ elif agent_name == "codeact":
28
+ return CodeActAgent
26
29
  else:
27
- raise ValueError(f"Unknown agent: {agent_name}. Possible values: auto, react, simple, builder, planner, bigtool, bigtool2")
30
+ raise ValueError(f"Unknown agent: {agent_name}. Possible values: auto, react, simple, builder, planner, bigtool, bigtool2, codeact")
28
31
 
29
32
  __all__ = [
30
33
  "BaseAgent",
@@ -0,0 +1,3 @@
1
+ from .agent import CodeActAgent
2
+
3
+ __all__ = ["CodeActAgent"]
@@ -0,0 +1,25 @@
1
+ import asyncio
2
+
3
+ from universal_mcp.agentr.registry import AgentrRegistry
4
+ from universal_mcp.agents.codeact.agent import CodeActAgent
5
+ from universal_mcp.agents.utils import messages_to_list
6
+
7
+
8
+ async def main():
9
+ agent = CodeActAgent(
10
+ "CodeAct Agent",
11
+ instructions="Be very concise in your answers.",
12
+ model="azure/gpt-4o",
13
+ tools={"google_mail": ["send_email"]},
14
+ registry=AgentrRegistry(),
15
+ )
16
+ result = await agent.invoke(
17
+ "Send an email to manoj@agentr.dev from my Gmail account with a subject 'testing codeact agent' and body 'This is a test of the codeact agent.'"
18
+ )
19
+ from rich import print
20
+
21
+ print(messages_to_list(result["messages"]))
22
+
23
+
24
+ if __name__ == "__main__":
25
+ asyncio.run(main())
@@ -0,0 +1,171 @@
1
+ import inspect
2
+ from typing import Callable, Union
3
+
4
+ from langchain_core.language_models import BaseChatModel
5
+ from langchain_core.tools import StructuredTool, tool as create_tool
6
+ from langgraph.checkpoint.base import BaseCheckpointSaver
7
+ from langgraph.graph import END, StateGraph
8
+ from loguru import logger
9
+ from universal_mcp.tools.registry import ToolRegistry
10
+ from universal_mcp.types import ToolConfig, ToolFormat
11
+
12
+ from universal_mcp.agents.base import BaseAgent
13
+ from universal_mcp.agents.llm import load_chat_model
14
+ from universal_mcp.agents.codeact.prompts import (
15
+ create_default_prompt,
16
+ make_safe_function_name,
17
+ REFLECTION_PROMPT,
18
+ RETRY_PROMPT,
19
+ )
20
+ from universal_mcp.agents.codeact.sandbox import eval_unsafe
21
+ from universal_mcp.agents.codeact.state import CodeActState
22
+ from universal_mcp.agents.codeact.utils import extract_and_combine_codeblocks
23
+
24
+
25
+ class CodeActAgent(BaseAgent):
26
+ def __init__(
27
+ self,
28
+ name: str,
29
+ instructions: str,
30
+ model: str,
31
+ memory: BaseCheckpointSaver | None = None,
32
+ tools: ToolConfig | None = None,
33
+ registry: ToolRegistry | None = None,
34
+ *,
35
+ reflection_prompt: str = None,
36
+ reflection_model: BaseChatModel = None,
37
+ max_reflections: int = 3,
38
+ **kwargs,
39
+ ):
40
+ super().__init__(name, instructions, model, memory, **kwargs)
41
+ self.model_instance = load_chat_model(model)
42
+ self.tools_config = tools or {}
43
+ self.registry = registry
44
+ self.eval_fn = eval_unsafe
45
+ self.reflection_prompt = reflection_prompt
46
+ self.reflection_model = reflection_model or self.model_instance
47
+ self.max_reflections = max_reflections if reflection_prompt else 0
48
+ self.tools_context = {}
49
+ self.processed_tools: list[Union[StructuredTool, Callable]] = []
50
+
51
+ async def _build_graph(self):
52
+ if self.tools_config:
53
+ if not self.registry:
54
+ raise ValueError("Tools are configured but no registry is provided")
55
+ # Langchain tools are fine
56
+ exported_tools = await self.registry.export_tools(
57
+ self.tools_config, ToolFormat.LANGCHAIN
58
+ )
59
+ self.processed_tools = [
60
+ t if isinstance(t, StructuredTool) else create_tool(t)
61
+ for t in exported_tools
62
+ ]
63
+
64
+ self.instructions = create_default_prompt(
65
+ self.processed_tools, self.instructions
66
+ )
67
+
68
+ for tool in self.processed_tools:
69
+ safe_name = make_safe_function_name(tool.name)
70
+ tool_callable = (
71
+ tool.coroutine
72
+ if hasattr(tool, "coroutine") and tool.coroutine is not None
73
+ else tool.func
74
+ )
75
+ self.tools_context[safe_name] = tool_callable
76
+
77
+ agent = StateGraph(CodeActState)
78
+ agent.add_node("call_model", lambda state, config: self.call_model(state, config))
79
+ agent.add_node("sandbox", self.sandbox)
80
+
81
+ agent.set_entry_point("call_model")
82
+ agent.add_conditional_edges(
83
+ "call_model",
84
+ self.should_run_sandbox,
85
+ {
86
+ "sandbox": "sandbox",
87
+ END: END,
88
+ },
89
+ )
90
+ agent.add_edge("sandbox", "call_model")
91
+ return agent.compile(checkpointer=self.memory)
92
+
93
+ def should_run_sandbox(self, state: CodeActState) -> str:
94
+ if state.get("script"):
95
+ return "sandbox"
96
+ return END
97
+
98
+ def call_model(self, state: CodeActState, config: dict) -> dict:
99
+ context = config.get("context", {})
100
+ instructions = context.get("system_prompt", self.instructions)
101
+ model = self.model_instance
102
+ reflection_model = self.reflection_model
103
+
104
+ messages = [{"role": "system", "content": instructions}] + state["messages"]
105
+
106
+ response = model.invoke(messages)
107
+
108
+ code = extract_and_combine_codeblocks(response.content)
109
+
110
+ if self.max_reflections > 0 and code:
111
+ reflection_count = 0
112
+ while reflection_count < self.max_reflections:
113
+ conversation_history = "\n".join(
114
+ [
115
+ f'<message role="{("user" if m.type == "human" else "assistant")}">\n{m.content}\n</message>'
116
+ for m in state["messages"]
117
+ ]
118
+ )
119
+ conversation_history += (
120
+ f'\n<message role="assistant">\n{response.content}\n</message>'
121
+ )
122
+
123
+ formatted_prompt = REFLECTION_PROMPT.format(
124
+ conversation_history=conversation_history
125
+ )
126
+
127
+ reflection_messages = [
128
+ {"role": "system", "content": self.reflection_prompt},
129
+ {"role": "user", "content": formatted_prompt},
130
+ ]
131
+ reflection_result = reflection_model.invoke(reflection_messages)
132
+
133
+ if "NONE" in reflection_result.content:
134
+ break
135
+
136
+ retry_prompt = RETRY_PROMPT.format(
137
+ reflection_result=reflection_result.content
138
+ )
139
+
140
+ regeneration_messages = [
141
+ {"role": "system", "content": instructions},
142
+ *state["messages"],
143
+ {"role": "assistant", "content": response.content},
144
+ {"role": "user", "content": retry_prompt},
145
+ ]
146
+ response = model.invoke(regeneration_messages)
147
+
148
+ code = extract_and_combine_codeblocks(response.content)
149
+
150
+ if not code:
151
+ break
152
+
153
+ reflection_count += 1
154
+
155
+ if code:
156
+ return {"messages": [response], "script": code}
157
+ else:
158
+ return {"messages": [response], "script": None}
159
+
160
+ async def sandbox(self, state: CodeActState) -> dict:
161
+ existing_context = state.get("context", {})
162
+ context = {**existing_context, **self.tools_context}
163
+ if inspect.iscoroutinefunction(self.eval_fn):
164
+ output, new_vars = await self.eval_fn(state["script"], context)
165
+ else:
166
+ output, new_vars = self.eval_fn(state["script"], context)
167
+ new_context = {**existing_context, **new_vars}
168
+ return {
169
+ "messages": [{"role": "user", "content": output}],
170
+ "context": new_context,
171
+ }
@@ -0,0 +1,92 @@
1
+ import inspect
2
+ import re
3
+ from typing import Optional, Sequence
4
+
5
+ from langchain_core.tools import StructuredTool, tool as create_tool
6
+
7
+
8
+ def make_safe_function_name(name: str) -> str:
9
+ """Convert a tool name to a valid Python function name."""
10
+ # Replace non-alphanumeric characters with underscores
11
+ safe_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
12
+ # Ensure the name doesn't start with a digit
13
+ if safe_name and safe_name[0].isdigit():
14
+ safe_name = f"tool_{safe_name}"
15
+ # Handle empty name edge case
16
+ if not safe_name:
17
+ safe_name = "unnamed_tool"
18
+ return safe_name
19
+
20
+
21
+ def create_default_prompt(
22
+ tools: Sequence[StructuredTool],
23
+ base_prompt: Optional[str] = None,
24
+ ):
25
+ """Create default prompt for the CodeAct agent."""
26
+ prompt = f"{base_prompt}\n\n" if base_prompt else ""
27
+ prompt += """You will be given a task to perform. You should output either
28
+ - a Python code snippet that provides the solution to the task, or a step towards the solution. Any output you want to extract from the code should be printed to the console. Code should be output in a fenced code block.
29
+ - text to be shown directly to the user, if you want to ask for more information or provide the final answer.
30
+
31
+ In addition to the Python Standard Library, you can use the following functions:"""
32
+
33
+ for tool in tools:
34
+ # Use coroutine if it exists, otherwise use func
35
+ tool_callable = (
36
+ tool.coroutine
37
+ if hasattr(tool, "coroutine") and tool.coroutine is not None
38
+ else tool.func
39
+ )
40
+ # Create a safe function name
41
+ safe_name = make_safe_function_name(tool.name)
42
+ # Determine if it's an async function
43
+ is_async = inspect.iscoroutinefunction(tool_callable)
44
+ # Add appropriate function definition
45
+ prompt += f'''\n{"async " if is_async else ""}def {safe_name}{str(inspect.signature(tool_callable))}:
46
+ """{tool.description}"""
47
+ ...
48
+ '''
49
+
50
+ prompt += """
51
+
52
+ Variables defined at the top level of previous code snippets can be referenced in your code.
53
+
54
+ Always use print() statements to explore data structures and function outputs. Simply returning values will not display them back to you for inspection. For example, use print(result) instead of just 'result'.
55
+
56
+ As you don't know the output schema of the additional Python functions you have access to, start from exploring their contents before building a final solution.
57
+
58
+ IMPORTANT CODING STRATEGY:
59
+ 1. Only write code up to the point where you make an API call/tool usage with an output
60
+ 2. Print the type/shape and a sample entry of this output, and using that knowledge proceed to write the further code
61
+
62
+ This means:
63
+ - Write code that makes the API call or tool usage
64
+ - Print the result with type information: print(f"Type: {type(result)}")
65
+ - Print the shape/structure: print(f"Shape/Keys: {result.keys() if isinstance(result, dict) else len(result) if isinstance(result, (list, tuple)) else 'N/A'}")
66
+ - Print a sample entry: print(f"Sample: {result[0] if isinstance(result, (list, tuple)) and len(result) > 0 else result}")
67
+ - Then, based on this knowledge, write the code to process/use this data
68
+
69
+ Reminder: use Python code snippets to call tools"""
70
+ return prompt
71
+
72
+
73
+ REFLECTION_PROMPT = """
74
+ Review the assistant's latest code for as per the quality rules:
75
+
76
+ <conversation_history>
77
+ {conversation_history}
78
+ </conversation_history>
79
+
80
+ If you find ANY of these issues, describe the problem briefly and clearly.
81
+ If NO issues are found, respond with EXACTLY: "NONE"
82
+ """
83
+
84
+ RETRY_PROMPT = """
85
+ I need you to completely regenerate your previous response based on this feedback:
86
+
87
+ '''
88
+ {reflection_result}
89
+ '''
90
+
91
+ DO NOT reference the feedback directly. Instead, provide a completely new response that addresses the issues.
92
+ """
@@ -0,0 +1,48 @@
1
+ import asyncio
2
+ import builtins
3
+ import contextlib
4
+ import io
5
+ from typing import Any
6
+
7
+
8
+ async def eval_unsafe(code: str, _locals: dict[str, Any]) -> tuple[str, dict[str, Any]]:
9
+ """
10
+ Execute code in a non-blocking way and return the output and changed variables.
11
+ """
12
+ result = f"Executing code...\n{code}\n\nOutput:\n"
13
+ result += "=" * 50 + "\n"
14
+
15
+ # Create a combined globals/locals environment that includes builtins
16
+ # and the provided context. This allows nested functions to access tools.
17
+ execution_env = {**builtins.__dict__, **_locals}
18
+
19
+ def sync_eval_in_thread():
20
+ """Synchronously execute code and capture output."""
21
+ try:
22
+ with contextlib.redirect_stdout(io.StringIO()) as f:
23
+ exec(code, execution_env)
24
+ output = f.getvalue()
25
+ if not output:
26
+ output = "<code ran, no output printed to stdout>"
27
+ return output
28
+ except Exception as e:
29
+ return f"Error during execution: {repr(e)}"
30
+
31
+ # Run the synchronous exec in a separate thread to avoid blocking the event loop.
32
+ output = await asyncio.to_thread(sync_eval_in_thread)
33
+ result += output
34
+
35
+ # Identify all variables that are not part of the original builtins
36
+ # and were not in the initial _locals, or were changed.
37
+ changed_vars = {}
38
+ builtin_keys = set(builtins.__dict__.keys())
39
+
40
+ for key, value in execution_env.items():
41
+ if key in builtin_keys:
42
+ continue # Skip builtins
43
+
44
+ # Check if the key is new or if the value has changed
45
+ if key not in _locals or _locals[key] is not value:
46
+ changed_vars[key] = value
47
+
48
+ return result, changed_vars
@@ -0,0 +1,12 @@
1
+ from typing import Any, Optional
2
+
3
+ from langgraph.graph import MessagesState
4
+
5
+
6
+ class CodeActState(MessagesState):
7
+ """State for CodeAct agent."""
8
+
9
+ script: Optional[str]
10
+ """The Python code script to be executed."""
11
+ context: dict[str, Any]
12
+ """Dictionary containing the execution context with available tools and variables."""
@@ -6,7 +6,7 @@ You are an expert planner. Your goal is to consolidate a complex user request in
6
6
  2. **Focus on Data Handoffs:** A good decomposition often involves one sub-task to *retrieve* information and a subsequent sub-task to *use* that information.
7
7
  3. **Assume Internal Capabilities:** Do NOT create sub-tasks for abstract cognitive work like 'summarize' or 'analyze'.
8
8
  4. **Simplify Single Actions:** If the user's task is already a single, simple action, the output should be a single sub-task that concisely describes that action. Do not make it identical to the user's input.
9
-
9
+ 5. **General purpose sub tasks:** You also need to realise that these subtasks are going to be used to search for tools and apps. And the names and description of these tools and apps are going to be general in nature so the sub tasks should not be too specific. The task which you will get may be specific in nature but the sub taks must be general.
10
10
  **--- EXAMPLES ---**
11
11
 
12
12
  **EXAMPLE 1:**
@@ -30,25 +30,37 @@ You are an expert planner. Your goal is to consolidate a complex user request in
30
30
 
31
31
 
32
32
  APP_SEARCH_QUERY_PROMPT = """
33
- You are an expert at extracting the name of an application or a category of application from a sub-task description. Your goal is to generate a query for an app search engine.
33
+ You are an expert at selecting an application to perform a specific sub-task. Your goal is to generate a concise query for an app search engine.
34
34
 
35
- **INSTRUCTIONS:**
36
- 1. Read the sub-task carefully.
37
- 2. If an application is explicitly named (e.g., "Perplexity", "Gmail", "GitHub"), your query should be ONLY that name.
38
- 3. If no specific application is named, generate a query for the *category* of application (e.g., "web search", "email client", "document editor").
39
- 4. The query should be concise.
35
+ Analyze the current sub-task in the context of the original user goal and the ENTIRE PLAN so far.
40
36
 
41
- **EXAMPLES:**
42
- - **Sub-task:** "Perform a web search using Perplexity to find the best restaurants in Goa."
43
- - **Query:** "Perplexity"
37
+ **CORE INSTRUCTION:** If any application already used in the plan is capable of performing the current sub-task, your query MUST BE the name of that application to ensure continuity and efficiency. Otherwise, generate a concise query for the category of application needed.
44
38
 
45
- - **Sub-task:** "Fetch all marketing emails received from Gmail in the last 7 days."
46
- - **Query:** "Gmail"
39
+ **--- EXAMPLES ---**
47
40
 
48
- - **Sub-task:** "Find the latest news about artificial intelligence."
49
- - **Query:** "web search"
41
+ **EXAMPLE 1: Reusing an app from two steps ago**
42
+ - **Original User Task:** "Find my latest order confirmation in Gmail, search for reviews of the main product on perplexity, and then send an email to ankit@agentr.dev telling about the reviews"
43
+ - **Plan So Far:**
44
+ - The sub-task 'Find order confirmation in Gmail' was assigned to app 'google_mail'.
45
+ - The sub-task 'Search for product reviews on perplexity' was assigned to app 'perplexity'.
46
+ - **Current Sub-task:** "send an email to ankit@agentr.dev"
47
+ - **CORRECT QUERY:** "google_mail"
50
48
 
51
- **SUB-TASK:**
49
+ **EXAMPLE 2: First Step (No previous context)**
50
+ - **Original User Task:** "Find the best restaurants in Goa."
51
+ - **Plan So Far:** None. This is the first step.
52
+ - **Current Sub-task:** "Perform a web search to find the best restaurants in Goa."
53
+ - **CORRECT QUERY:** "web search"
54
+
55
+ **--- YOUR TASK ---**
56
+
57
+ **Original User Task:**
58
+ "{original_task}"
59
+
60
+ **Plan So Far:**
61
+ {plan_context}
62
+
63
+ **Current Sub-task:**
52
64
  "{sub_task}"
53
65
 
54
66
  **YOUR CONCISE APP SEARCH QUERY:**
@@ -62,6 +74,7 @@ You are an expert at summarizing the core *action* of a sub-task into a concise
62
74
  1. Focus only on the verb or action being performed in the sub-task.
63
75
  2. Include key entities related to the action.
64
76
  3. Do NOT include the names of applications (e.g., "Perplexity", "Gmail").
77
+ 4. You also need to realise that this query is going to be used to search for tools in a particular app. And the names and description of these tools are going to be general in nature so the query should not be too specific. The sub task which you will get may be specific in nature but the query must be general.
65
78
 
66
79
  **EXAMPLES:**
67
80
  - **Sub-task:** "Perform a web search using Perplexity to find the best restaurants in Goa."
@@ -107,6 +120,7 @@ You are an AI assistant that selects the most appropriate tool(s) from a list to
107
120
  or names. It is always good to have more tools than having insufficent tools.
108
121
  4. If no tool is a good fit, return an empty list.
109
122
  5. Only return the tool IDs.
123
+ 6. You should understand that the sub task maybe specific in nature but the tools are made to be general purpose and therefore the tool_candidates you will get will be very general purpose but that should not stop you from selecting the tools as these tools will be given to a very smart agent who will be able to use these tools for the specific sub-taks
110
124
 
111
125
  **SUB-TASK:**
112
126
  "{sub_task}"
@@ -85,38 +85,53 @@ def build_tool_node_graph(llm: BaseChatModel, registry: ToolRegistry) -> StateGr
85
85
  }
86
86
 
87
87
  async def _resolve_sub_tasks(state: AgentState) -> AgentState:
88
- """Iterates through sub-tasks, finding apps and tools for each using a two-query approach."""
88
+ """Iterates through sub-tasks, providing full plan context to the app selection prompt."""
89
89
  sub_tasks = state["sub_tasks"]
90
+ original_task = state["original_task"]
90
91
  current_plan = []
91
-
92
+
92
93
  for i, sub_task in enumerate(sub_tasks):
93
94
  task_desc = sub_task["task"]
94
95
  logger.info(f"Resolving sub-task: '{task_desc}'")
95
96
 
96
- # 1. Generate App-specific query to find the right application
97
- app_query_prompt = APP_SEARCH_QUERY_PROMPT.format(sub_task=task_desc)
97
+ # 1. Build the FULL context string from the entire plan so far
98
+ if not current_plan:
99
+ plan_context_str = "None. This is the first step."
100
+ else:
101
+ context_lines = [
102
+ f"- The sub-task '{step['task']}' was assigned to app '{step['app_id']}'."
103
+ for step in current_plan
104
+ ]
105
+ plan_context_str = "\n".join(context_lines)
106
+
107
+ # 2. Generate the App-specific query using the NEW full-context prompt
108
+ app_query_prompt = APP_SEARCH_QUERY_PROMPT.format(
109
+ original_task=original_task,
110
+ plan_context=plan_context_str,
111
+ sub_task=task_desc
112
+ )
98
113
  app_query_response = await llm.with_structured_output(SearchQuery).ainvoke(app_query_prompt)
99
114
  app_search_query = app_query_response.query
100
- logger.info(f"Generated app search query: '{app_search_query}'")
115
+ logger.info(f"Generated context-aware app search query: '{app_search_query}'")
101
116
 
102
- # 2. Search for candidate apps using the app-centric query
117
+ # 3. Search for candidate apps (the rest of the logic is the same)
103
118
  candidate_apps = await registry.search_apps(query=app_search_query, limit=5)
104
119
  if not candidate_apps:
105
120
  logger.error(f"No apps found for query '{app_search_query}' from sub-task: '{task_desc}'")
106
121
  return {"failed_sub_task_info": task_desc, "sub_tasks": []}
107
122
 
108
- # 3. Generate Action-specific query for finding the tool
123
+ # 4. Generate Action-specific query for finding the tool
109
124
  tool_query_prompt = TOOL_SEARCH_QUERY_PROMPT.format(sub_task=task_desc)
110
125
  tool_query_response = await llm.with_structured_output(SearchQuery).ainvoke(tool_query_prompt)
111
126
  tool_search_query = tool_query_response.query
112
127
  logger.info(f"Generated tool search query: '{tool_search_query}'")
113
128
 
114
- # 4. Find a suitable tool within the candidate apps using the action-centric query
129
+ # 5. Find a suitable tool within the candidate apps
115
130
  tool_found = False
116
131
  for app in candidate_apps:
117
132
  app_id = app["id"]
118
133
  logger.info(f"Searching for tools in app '{app_id}' with query '{tool_search_query}'...")
119
-
134
+
120
135
  found_tools = await registry.search_tools(query=tool_search_query, app_id=app_id, limit=5)
121
136
  if not found_tools:
122
137
  continue
@@ -124,7 +139,7 @@ def build_tool_node_graph(llm: BaseChatModel, registry: ToolRegistry) -> StateGr
124
139
  tool_candidates_str = "\n - ".join([f"{tool['name']}: {tool['description']}" for tool in found_tools])
125
140
  selection_prompt = TOOL_SELECTION_PROMPT.format(sub_task=task_desc, tool_candidates=tool_candidates_str)
126
141
  selection_response = await llm.with_structured_output(ToolSelection).ainvoke(selection_prompt)
127
-
142
+
128
143
  if selection_response.tool_ids:
129
144
  logger.success(f"Found and selected tool(s) {selection_response.tool_ids} in app '{app_id}'.")
130
145
  sub_task.update({
@@ -136,13 +151,13 @@ def build_tool_node_graph(llm: BaseChatModel, registry: ToolRegistry) -> StateGr
136
151
  current_plan.append(sub_task)
137
152
  tool_found = True
138
153
  break
139
-
154
+
140
155
  if not tool_found:
141
156
  logger.error(f"Could not find any suitable tool for sub-task: '{task_desc}'")
142
157
  return {"failed_sub_task_info": task_desc, "sub_tasks": []}
143
158
 
144
159
  return {"execution_plan": current_plan, "sub_tasks": []}
145
-
160
+
146
161
  def _handle_planning_failure(state: AgentState) -> AgentState:
147
162
  """Handles the case where all decomposition attempts have failed."""
148
163
  logger.error("Maximum decomposition attempts reached. Planning failed.")
@@ -219,7 +234,7 @@ async def main():
219
234
 
220
235
  graph = build_tool_node_graph(llm, registry)
221
236
 
222
- task = "Create a content calendar for next month with trending AI/ML topics using web search and optimal posting times based on my audience analytics in Google Sheets"
237
+ task = "Find my latest order confirmation in Gmail, search for reviews of the main product on perplexity, and then send an email to ankit@agentr.dev telling about the reviews"
223
238
 
224
239
  initial_state = {
225
240
  "original_task": task,