swarms 7.7.1__py3-none-any.whl → 7.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarms/structs/__init__.py +1 -4
- swarms/structs/agent.py +10 -3
- swarms/structs/conversation.py +18 -8
- swarms/structs/swarm_router.py +1 -1
- swarms/telemetry/main.py +12 -2
- swarms/utils/formatter.py +15 -1
- {swarms-7.7.1.dist-info → swarms-7.7.2.dist-info}/METADATA +1 -1
- {swarms-7.7.1.dist-info → swarms-7.7.2.dist-info}/RECORD +12 -17
- swarms/structs/async_workflow.py +0 -818
- swarms/structs/octotools.py +0 -844
- swarms/structs/pulsar_swarm.py +0 -469
- swarms/structs/swarm_load_balancer.py +0 -344
- swarms/structs/talk_hier.py +0 -729
- /swarms/structs/{multi_agent_orchestrator.py → multi_agent_router.py} +0 -0
- {swarms-7.7.1.dist-info → swarms-7.7.2.dist-info}/LICENSE +0 -0
- {swarms-7.7.1.dist-info → swarms-7.7.2.dist-info}/WHEEL +0 -0
- {swarms-7.7.1.dist-info → swarms-7.7.2.dist-info}/entry_points.txt +0 -0
swarms/structs/octotools.py
DELETED
@@ -1,844 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
OctoToolsSwarm: A multi-agent system for complex reasoning.
|
3
|
-
Implements the OctoTools framework using swarms.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import json
|
7
|
-
import logging
|
8
|
-
import re
|
9
|
-
from dataclasses import dataclass
|
10
|
-
from enum import Enum
|
11
|
-
from pathlib import Path
|
12
|
-
from typing import Any, Callable, Dict, List, Optional
|
13
|
-
import math # Import the math module
|
14
|
-
|
15
|
-
from dotenv import load_dotenv
|
16
|
-
from swarms import Agent
|
17
|
-
from swarms.structs.conversation import Conversation
|
18
|
-
|
19
|
-
# from exa_search import exa_search as web_search_execute
|
20
|
-
|
21
|
-
|
22
|
-
# Load environment variables
|
23
|
-
load_dotenv()
|
24
|
-
|
25
|
-
# Setup logging
|
26
|
-
logging.basicConfig(level=logging.INFO)
|
27
|
-
logger = logging.getLogger(__name__)
|
28
|
-
|
29
|
-
|
30
|
-
class ToolType(Enum):
|
31
|
-
"""Defines the types of tools available."""
|
32
|
-
|
33
|
-
IMAGE_CAPTIONER = "image_captioner"
|
34
|
-
OBJECT_DETECTOR = "object_detector"
|
35
|
-
WEB_SEARCH = "web_search"
|
36
|
-
PYTHON_CALCULATOR = "python_calculator"
|
37
|
-
# Add more tool types as needed
|
38
|
-
|
39
|
-
|
40
|
-
@dataclass
|
41
|
-
class Tool:
|
42
|
-
"""
|
43
|
-
Represents an external tool.
|
44
|
-
|
45
|
-
Attributes:
|
46
|
-
name: Unique name of the tool.
|
47
|
-
description: Description of the tool's function.
|
48
|
-
metadata: Dictionary containing tool metadata.
|
49
|
-
execute_func: Callable function that executes the tool's logic.
|
50
|
-
"""
|
51
|
-
|
52
|
-
name: str
|
53
|
-
description: str
|
54
|
-
metadata: Dict[str, Any]
|
55
|
-
execute_func: Callable
|
56
|
-
|
57
|
-
def execute(self, **kwargs):
|
58
|
-
"""Executes the tool's logic, handling potential errors."""
|
59
|
-
try:
|
60
|
-
return self.execute_func(**kwargs)
|
61
|
-
except Exception as e:
|
62
|
-
logger.error(
|
63
|
-
f"Error executing tool {self.name}: {str(e)}"
|
64
|
-
)
|
65
|
-
return {"error": str(e)}
|
66
|
-
|
67
|
-
|
68
|
-
class AgentRole(Enum):
|
69
|
-
"""Defines the roles for agents in the OctoTools system."""
|
70
|
-
|
71
|
-
PLANNER = "planner"
|
72
|
-
VERIFIER = "verifier"
|
73
|
-
SUMMARIZER = "summarizer"
|
74
|
-
|
75
|
-
|
76
|
-
class OctoToolsSwarm:
|
77
|
-
"""
|
78
|
-
A multi-agent system implementing the OctoTools framework.
|
79
|
-
|
80
|
-
Attributes:
|
81
|
-
model_name: Name of the LLM model to use.
|
82
|
-
max_iterations: Maximum number of action-execution iterations.
|
83
|
-
base_path: Path for saving agent states.
|
84
|
-
tools: List of available Tool objects.
|
85
|
-
"""
|
86
|
-
|
87
|
-
def __init__(
|
88
|
-
self,
|
89
|
-
tools: List[Tool],
|
90
|
-
model_name: str = "gemini/gemini-2.0-flash",
|
91
|
-
max_iterations: int = 10,
|
92
|
-
base_path: Optional[str] = None,
|
93
|
-
):
|
94
|
-
"""Initialize the OctoToolsSwarm system."""
|
95
|
-
self.model_name = model_name
|
96
|
-
self.max_iterations = max_iterations
|
97
|
-
self.base_path = (
|
98
|
-
Path(base_path)
|
99
|
-
if base_path
|
100
|
-
else Path("./octotools_states")
|
101
|
-
)
|
102
|
-
self.base_path.mkdir(exist_ok=True)
|
103
|
-
self.tools = {
|
104
|
-
tool.name: tool for tool in tools
|
105
|
-
} # Store tools in a dictionary
|
106
|
-
|
107
|
-
# Initialize agents
|
108
|
-
self._init_agents()
|
109
|
-
|
110
|
-
# Create conversation tracker and memory
|
111
|
-
self.conversation = Conversation()
|
112
|
-
self.memory = [] # Store the trajectory
|
113
|
-
|
114
|
-
def _init_agents(self) -> None:
|
115
|
-
"""Initialize all agents with their specific roles and prompts."""
|
116
|
-
# Planner agent
|
117
|
-
self.planner = Agent(
|
118
|
-
agent_name="OctoTools-Planner",
|
119
|
-
system_prompt=self._get_planner_prompt(),
|
120
|
-
model_name=self.model_name,
|
121
|
-
max_loops=3,
|
122
|
-
saved_state_path=str(self.base_path / "planner.json"),
|
123
|
-
verbose=True,
|
124
|
-
)
|
125
|
-
|
126
|
-
# Verifier agent
|
127
|
-
self.verifier = Agent(
|
128
|
-
agent_name="OctoTools-Verifier",
|
129
|
-
system_prompt=self._get_verifier_prompt(),
|
130
|
-
model_name=self.model_name,
|
131
|
-
max_loops=1,
|
132
|
-
saved_state_path=str(self.base_path / "verifier.json"),
|
133
|
-
verbose=True,
|
134
|
-
)
|
135
|
-
|
136
|
-
# Summarizer agent
|
137
|
-
self.summarizer = Agent(
|
138
|
-
agent_name="OctoTools-Summarizer",
|
139
|
-
system_prompt=self._get_summarizer_prompt(),
|
140
|
-
model_name=self.model_name,
|
141
|
-
max_loops=1,
|
142
|
-
saved_state_path=str(self.base_path / "summarizer.json"),
|
143
|
-
verbose=True,
|
144
|
-
)
|
145
|
-
|
146
|
-
def _get_planner_prompt(self) -> str:
|
147
|
-
"""Get the prompt for the planner agent (Improved with few-shot examples)."""
|
148
|
-
tool_descriptions = "\n".join(
|
149
|
-
[
|
150
|
-
f"- {tool_name}: {self.tools[tool_name].description}"
|
151
|
-
for tool_name in self.tools
|
152
|
-
]
|
153
|
-
)
|
154
|
-
return f"""You are the Planner in the OctoTools framework. Your role is to analyze the user's query,
|
155
|
-
identify required skills, suggest relevant tools, and plan the steps to solve the problem.
|
156
|
-
|
157
|
-
1. **Analyze the user's query:** Understand the requirements and identify the necessary skills and potentially relevant tools.
|
158
|
-
2. **Perform high-level planning:** Create a rough outline of how tools might be used to solve the problem.
|
159
|
-
3. **Perform low-level planning (action prediction):** At each step, select the best tool to use and formulate a specific sub-goal for that tool, considering the current context.
|
160
|
-
|
161
|
-
Available Tools:
|
162
|
-
{tool_descriptions}
|
163
|
-
|
164
|
-
Output your response in JSON format. Here are examples for different stages:
|
165
|
-
|
166
|
-
**Query Analysis (High-Level Planning):**
|
167
|
-
Example Input:
|
168
|
-
Query: "What is the capital of France?"
|
169
|
-
|
170
|
-
Example Output:
|
171
|
-
```json
|
172
|
-
{{
|
173
|
-
"summary": "The user is asking for the capital of France.",
|
174
|
-
"required_skills": ["knowledge retrieval"],
|
175
|
-
"relevant_tools": ["Web_Search_Tool"]
|
176
|
-
}}
|
177
|
-
```
|
178
|
-
|
179
|
-
**Action Prediction (Low-Level Planning):**
|
180
|
-
Example Input:
|
181
|
-
Context: {{ "query": "What is the capital of France?", "available_tools": ["Web_Search_Tool"] }}
|
182
|
-
|
183
|
-
Example Output:
|
184
|
-
```json
|
185
|
-
{{
|
186
|
-
"justification": "The Web_Search_Tool can be used to directly find the capital of France.",
|
187
|
-
"context": {{}},
|
188
|
-
"sub_goal": "Search the web for 'capital of France'.",
|
189
|
-
"tool_name": "Web_Search_Tool"
|
190
|
-
}}
|
191
|
-
```
|
192
|
-
Another Example:
|
193
|
-
Context: {{"query": "How many objects are in the image?", "available_tools": ["Image_Captioner_Tool", "Object_Detector_Tool"], "image": "objects.png"}}
|
194
|
-
|
195
|
-
Example Output:
|
196
|
-
```json
|
197
|
-
{{
|
198
|
-
"justification": "First, get a general description of the image to understand the context.",
|
199
|
-
"context": {{ "image": "objects.png" }},
|
200
|
-
"sub_goal": "Generate a description of the image.",
|
201
|
-
"tool_name": "Image_Captioner_Tool"
|
202
|
-
}}
|
203
|
-
```
|
204
|
-
|
205
|
-
Example for Finding Square Root:
|
206
|
-
Context: {{"query": "What is the square root of the number of objects in the image?", "available_tools": ["Object_Detector_Tool", "Python_Calculator_Tool"], "image": "objects.png", "Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"]}}
|
207
|
-
|
208
|
-
Example Output:
|
209
|
-
```json
|
210
|
-
{{
|
211
|
-
"justification": "We have detected 4 objects in the image. Now we need to find the square root of 4.",
|
212
|
-
"context": {{}},
|
213
|
-
"sub_goal": "Calculate the square root of 4",
|
214
|
-
"tool_name": "Python_Calculator_Tool"
|
215
|
-
}}
|
216
|
-
```
|
217
|
-
|
218
|
-
Your output MUST be a single, valid JSON object with the following keys:
|
219
|
-
- justification (string): Your reasoning.
|
220
|
-
- context (dict): A dictionary containing relevant information.
|
221
|
-
- sub_goal (string): The specific instruction for the tool.
|
222
|
-
- tool_name (string): The EXACT name of the tool to use.
|
223
|
-
|
224
|
-
Do NOT include any text outside of the JSON object.
|
225
|
-
"""
|
226
|
-
|
227
|
-
def _get_verifier_prompt(self) -> str:
|
228
|
-
"""Get the prompt for the verifier agent (Improved with few-shot examples)."""
|
229
|
-
return """You are the Context Verifier in the OctoTools framework. Your role is to analyze the current context
|
230
|
-
and memory to determine if the problem is solved, if there are any inconsistencies, or if further steps are needed.
|
231
|
-
|
232
|
-
Output your response in JSON format:
|
233
|
-
|
234
|
-
Expected output structure:
|
235
|
-
```json
|
236
|
-
{
|
237
|
-
"completeness": "Indicate whether the query is fully, partially, or not answered.",
|
238
|
-
"inconsistencies": "List any inconsistencies found in the context or memory.",
|
239
|
-
"verification_needs": "List any information that needs further verification.",
|
240
|
-
"ambiguities": "List any ambiguities found in the context or memory.",
|
241
|
-
"stop_signal": true/false
|
242
|
-
}
|
243
|
-
```
|
244
|
-
|
245
|
-
Example Input:
|
246
|
-
Context: { "last_result": { "result": "Caption: The image shows a cat." } }
|
247
|
-
Memory: [ { "component": "Action Predictor", "result": { "tool_name": "Image_Captioner_Tool" } } ]
|
248
|
-
|
249
|
-
Example Output:
|
250
|
-
```json
|
251
|
-
{
|
252
|
-
"completeness": "partial",
|
253
|
-
"inconsistencies": [],
|
254
|
-
"verification_needs": ["Object detection to confirm the presence of a cat."],
|
255
|
-
"ambiguities": [],
|
256
|
-
"stop_signal": false
|
257
|
-
}
|
258
|
-
```
|
259
|
-
|
260
|
-
Another Example:
|
261
|
-
Context: { "last_result": { "result": ["Detected object: cat"] } }
|
262
|
-
Memory: [ { "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } } ]
|
263
|
-
|
264
|
-
Example Output:
|
265
|
-
```json
|
266
|
-
{
|
267
|
-
"completeness": "yes",
|
268
|
-
"inconsistencies": [],
|
269
|
-
"verification_needs": [],
|
270
|
-
"ambiguities": [],
|
271
|
-
"stop_signal": true
|
272
|
-
}
|
273
|
-
```
|
274
|
-
|
275
|
-
Square Root Example:
|
276
|
-
Context: {
|
277
|
-
"query": "What is the square root of the number of objects in the image?",
|
278
|
-
"image": "example.png",
|
279
|
-
"Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"],
|
280
|
-
"Python_Calculator_Tool_result": "Result of 4**0.5 is 2.0"
|
281
|
-
}
|
282
|
-
Memory: [
|
283
|
-
{ "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } },
|
284
|
-
{ "component": "Action Predictor", "result": { "tool_name": "Python_Calculator_Tool" } }
|
285
|
-
]
|
286
|
-
|
287
|
-
Example Output:
|
288
|
-
```json
|
289
|
-
{
|
290
|
-
"completeness": "yes",
|
291
|
-
"inconsistencies": [],
|
292
|
-
"verification_needs": [],
|
293
|
-
"ambiguities": [],
|
294
|
-
"stop_signal": true
|
295
|
-
}
|
296
|
-
```
|
297
|
-
"""
|
298
|
-
|
299
|
-
def _get_summarizer_prompt(self) -> str:
|
300
|
-
"""Get the prompt for the summarizer agent (Improved with few-shot examples)."""
|
301
|
-
return """You are the Solution Summarizer in the OctoTools framework. Your role is to synthesize the final
|
302
|
-
answer to the user's query based on the complete trajectory of actions and results.
|
303
|
-
|
304
|
-
Output your response in JSON format:
|
305
|
-
|
306
|
-
Expected output structure:
|
307
|
-
```json
|
308
|
-
{
|
309
|
-
"final_answer": "Provide a clear and concise answer to the original query."
|
310
|
-
}
|
311
|
-
```
|
312
|
-
Example Input:
|
313
|
-
Memory: [
|
314
|
-
{"component": "Query Analyzer", "result": {"summary": "Find the capital of France."}},
|
315
|
-
{"component": "Action Predictor", "result": {"tool_name": "Web_Search_Tool"}},
|
316
|
-
{"component": "Tool Execution", "result": {"result": "The capital of France is Paris."}}
|
317
|
-
]
|
318
|
-
|
319
|
-
Example Output:
|
320
|
-
```json
|
321
|
-
{
|
322
|
-
"final_answer": "The capital of France is Paris."
|
323
|
-
}
|
324
|
-
```
|
325
|
-
|
326
|
-
Square Root Example:
|
327
|
-
Memory: [
|
328
|
-
{"component": "Query Analyzer", "result": {"summary": "Find the square root of the number of objects in the image."}},
|
329
|
-
{"component": "Action Predictor", "result": {"tool_name": "Object_Detector_Tool", "sub_goal": "Detect objects in the image"}},
|
330
|
-
{"component": "Tool Execution", "result": {"result": ["object1", "object2", "object3", "object4"]}},
|
331
|
-
{"component": "Action Predictor", "result": {"tool_name": "Python_Calculator_Tool", "sub_goal": "Calculate the square root of 4"}},
|
332
|
-
{"component": "Tool Execution", "result": {"result": "Result of 4**0.5 is 2.0"}}
|
333
|
-
]
|
334
|
-
|
335
|
-
Example Output:
|
336
|
-
```json
|
337
|
-
{
|
338
|
-
"final_answer": "The square root of the number of objects in the image is 2.0. There are 4 objects in the image, and the square root of 4 is 2.0."
|
339
|
-
}
|
340
|
-
```
|
341
|
-
"""
|
342
|
-
|
343
|
-
def _safely_parse_json(self, json_str: str) -> Dict[str, Any]:
|
344
|
-
"""Safely parse JSON, handling errors and using recursive descent."""
|
345
|
-
try:
|
346
|
-
return json.loads(json_str)
|
347
|
-
except json.JSONDecodeError:
|
348
|
-
logger.warning(
|
349
|
-
f"JSONDecodeError: Attempting to extract JSON from: {json_str}"
|
350
|
-
)
|
351
|
-
try:
|
352
|
-
# More robust JSON extraction with recursive descent
|
353
|
-
def extract_json(s):
|
354
|
-
stack = []
|
355
|
-
start = -1
|
356
|
-
for i, c in enumerate(s):
|
357
|
-
if c == "{":
|
358
|
-
if not stack:
|
359
|
-
start = i
|
360
|
-
stack.append(c)
|
361
|
-
elif c == "}":
|
362
|
-
if stack:
|
363
|
-
stack.pop()
|
364
|
-
if not stack and start != -1:
|
365
|
-
return s[start : i + 1]
|
366
|
-
return None
|
367
|
-
|
368
|
-
extracted_json = extract_json(json_str)
|
369
|
-
if extracted_json:
|
370
|
-
logger.info(f"Extracted JSON: {extracted_json}")
|
371
|
-
return json.loads(extracted_json)
|
372
|
-
else:
|
373
|
-
logger.error(
|
374
|
-
"Failed to extract JSON using recursive descent."
|
375
|
-
)
|
376
|
-
return {
|
377
|
-
"error": "Failed to parse JSON",
|
378
|
-
"content": json_str,
|
379
|
-
}
|
380
|
-
except Exception as e:
|
381
|
-
logger.exception(f"Error during JSON extraction: {e}")
|
382
|
-
return {
|
383
|
-
"error": "Failed to parse JSON",
|
384
|
-
"content": json_str,
|
385
|
-
}
|
386
|
-
|
387
|
-
def _execute_tool(
|
388
|
-
self, tool_name: str, context: Dict[str, Any]
|
389
|
-
) -> Dict[str, Any]:
|
390
|
-
"""Executes a tool based on its name and provided context."""
|
391
|
-
if tool_name not in self.tools:
|
392
|
-
return {"error": f"Tool '{tool_name}' not found."}
|
393
|
-
|
394
|
-
tool = self.tools[tool_name]
|
395
|
-
try:
|
396
|
-
# For Python Calculator tool, handle object counts from Object Detector
|
397
|
-
if tool_name == "Python_Calculator_Tool":
|
398
|
-
# Check for object detector results
|
399
|
-
object_detector_result = context.get(
|
400
|
-
"Object_Detector_Tool_result"
|
401
|
-
)
|
402
|
-
if object_detector_result and isinstance(
|
403
|
-
object_detector_result, list
|
404
|
-
):
|
405
|
-
# Calculate the number of objects
|
406
|
-
num_objects = len(object_detector_result)
|
407
|
-
# If sub_goal doesn't already contain an expression, create one
|
408
|
-
if (
|
409
|
-
"sub_goal" in context
|
410
|
-
and "Calculate the square root"
|
411
|
-
in context["sub_goal"]
|
412
|
-
):
|
413
|
-
context["expression"] = f"{num_objects}**0.5"
|
414
|
-
elif "expression" not in context:
|
415
|
-
# Default to square root if no expression is specified
|
416
|
-
context["expression"] = f"{num_objects}**0.5"
|
417
|
-
|
418
|
-
# Filter context: only pass expected inputs to the tool
|
419
|
-
valid_inputs = {
|
420
|
-
k: v
|
421
|
-
for k, v in context.items()
|
422
|
-
if k in tool.metadata.get("input_types", {})
|
423
|
-
}
|
424
|
-
result = tool.execute(**valid_inputs)
|
425
|
-
return {"result": result}
|
426
|
-
except Exception as e:
|
427
|
-
logger.exception(f"Error executing tool {tool_name}: {e}")
|
428
|
-
return {"error": str(e)}
|
429
|
-
|
430
|
-
def _run_agent(
|
431
|
-
self, agent: Agent, input_prompt: str
|
432
|
-
) -> Dict[str, Any]:
|
433
|
-
"""Runs a swarms agent, handling output and JSON parsing."""
|
434
|
-
try:
|
435
|
-
# Construct the full input, including the system prompt
|
436
|
-
full_input = f"{agent.system_prompt}\n\n{input_prompt}"
|
437
|
-
|
438
|
-
# Run the agent and capture the output
|
439
|
-
agent_response = agent.run(full_input)
|
440
|
-
|
441
|
-
logger.info(
|
442
|
-
f"DEBUG: Raw agent response: {agent_response}"
|
443
|
-
)
|
444
|
-
|
445
|
-
# Extract the LLM's response (remove conversation history, etc.)
|
446
|
-
response_text = agent_response # Assuming direct return
|
447
|
-
|
448
|
-
# Try to parse the response as JSON
|
449
|
-
parsed_response = self._safely_parse_json(response_text)
|
450
|
-
|
451
|
-
return parsed_response
|
452
|
-
|
453
|
-
except Exception as e:
|
454
|
-
logger.exception(
|
455
|
-
f"Error running agent {agent.agent_name}: {e}"
|
456
|
-
)
|
457
|
-
return {
|
458
|
-
"error": f"Agent {agent.agent_name} failed: {str(e)}"
|
459
|
-
}
|
460
|
-
|
461
|
-
def run(
|
462
|
-
self, query: str, image: Optional[str] = None
|
463
|
-
) -> Dict[str, Any]:
|
464
|
-
"""Execute the task through the multi-agent workflow."""
|
465
|
-
logger.info(f"Starting task: {query}")
|
466
|
-
|
467
|
-
try:
|
468
|
-
# Step 1: Query Analysis (High-Level Planning)
|
469
|
-
planner_input = (
|
470
|
-
f"Analyze the following query and determine the necessary skills and"
|
471
|
-
f" relevant tools: {query}"
|
472
|
-
)
|
473
|
-
query_analysis = self._run_agent(
|
474
|
-
self.planner, planner_input
|
475
|
-
)
|
476
|
-
|
477
|
-
if "error" in query_analysis:
|
478
|
-
return {
|
479
|
-
"error": f"Planner query analysis failed: {query_analysis['error']}",
|
480
|
-
"trajectory": self.memory,
|
481
|
-
"conversation": self.conversation.return_history_as_string(),
|
482
|
-
}
|
483
|
-
|
484
|
-
self.memory.append(
|
485
|
-
{
|
486
|
-
"step": 0,
|
487
|
-
"component": "Query Analyzer",
|
488
|
-
"result": query_analysis,
|
489
|
-
}
|
490
|
-
)
|
491
|
-
self.conversation.add(
|
492
|
-
role=self.planner.agent_name,
|
493
|
-
content=json.dumps(query_analysis),
|
494
|
-
)
|
495
|
-
|
496
|
-
# Initialize context with the query and image (if provided)
|
497
|
-
context = {"query": query}
|
498
|
-
if image:
|
499
|
-
context["image"] = image
|
500
|
-
|
501
|
-
# Add available tools to context
|
502
|
-
if "relevant_tools" in query_analysis:
|
503
|
-
context["available_tools"] = query_analysis[
|
504
|
-
"relevant_tools"
|
505
|
-
]
|
506
|
-
else:
|
507
|
-
# If no relevant tools specified, make all tools available
|
508
|
-
context["available_tools"] = list(self.tools.keys())
|
509
|
-
|
510
|
-
step_count = 1
|
511
|
-
|
512
|
-
# Step 2: Iterative Action-Execution Loop
|
513
|
-
while step_count <= self.max_iterations:
|
514
|
-
logger.info(
|
515
|
-
f"Starting iteration {step_count} of {self.max_iterations}"
|
516
|
-
)
|
517
|
-
|
518
|
-
# Step 2a: Action Prediction (Low-Level Planning)
|
519
|
-
action_planner_input = (
|
520
|
-
f"Current Context: {json.dumps(context)}\nAvailable Tools:"
|
521
|
-
f" {', '.join(context.get('available_tools', list(self.tools.keys())))}\nPlan the"
|
522
|
-
" next step."
|
523
|
-
)
|
524
|
-
action = self._run_agent(
|
525
|
-
self.planner, action_planner_input
|
526
|
-
)
|
527
|
-
if "error" in action:
|
528
|
-
logger.error(
|
529
|
-
f"Error in action prediction: {action['error']}"
|
530
|
-
)
|
531
|
-
return {
|
532
|
-
"error": f"Planner action prediction failed: {action['error']}",
|
533
|
-
"trajectory": self.memory,
|
534
|
-
"conversation": self.conversation.return_history_as_string(),
|
535
|
-
}
|
536
|
-
self.memory.append(
|
537
|
-
{
|
538
|
-
"step": step_count,
|
539
|
-
"component": "Action Predictor",
|
540
|
-
"result": action,
|
541
|
-
}
|
542
|
-
)
|
543
|
-
self.conversation.add(
|
544
|
-
role=self.planner.agent_name,
|
545
|
-
content=json.dumps(action),
|
546
|
-
)
|
547
|
-
|
548
|
-
# Input Validation for Action (Relaxed)
|
549
|
-
if (
|
550
|
-
not isinstance(action, dict)
|
551
|
-
or "tool_name" not in action
|
552
|
-
or "sub_goal" not in action
|
553
|
-
):
|
554
|
-
error_msg = (
|
555
|
-
"Action prediction did not return required fields (tool_name,"
|
556
|
-
" sub_goal) or was not a dictionary."
|
557
|
-
)
|
558
|
-
logger.error(error_msg)
|
559
|
-
self.memory.append(
|
560
|
-
{
|
561
|
-
"step": step_count,
|
562
|
-
"component": "Error",
|
563
|
-
"result": error_msg,
|
564
|
-
}
|
565
|
-
)
|
566
|
-
break
|
567
|
-
|
568
|
-
# Step 2b: Execute Tool
|
569
|
-
tool_execution_context = {
|
570
|
-
**context,
|
571
|
-
**action.get(
|
572
|
-
"context", {}
|
573
|
-
), # Add any additional context
|
574
|
-
"sub_goal": action[
|
575
|
-
"sub_goal"
|
576
|
-
], # Pass sub_goal to tool
|
577
|
-
}
|
578
|
-
|
579
|
-
tool_result = self._execute_tool(
|
580
|
-
action["tool_name"], tool_execution_context
|
581
|
-
)
|
582
|
-
|
583
|
-
self.memory.append(
|
584
|
-
{
|
585
|
-
"step": step_count,
|
586
|
-
"component": "Tool Execution",
|
587
|
-
"result": tool_result,
|
588
|
-
}
|
589
|
-
)
|
590
|
-
|
591
|
-
# Step 2c: Context Update - Store result with a descriptive key
|
592
|
-
if "result" in tool_result:
|
593
|
-
context[f"{action['tool_name']}_result"] = (
|
594
|
-
tool_result["result"]
|
595
|
-
)
|
596
|
-
if "error" in tool_result:
|
597
|
-
context[f"{action['tool_name']}_error"] = (
|
598
|
-
tool_result["error"]
|
599
|
-
)
|
600
|
-
|
601
|
-
# Step 2d: Context Verification
|
602
|
-
verifier_input = (
|
603
|
-
f"Current Context: {json.dumps(context)}\nMemory:"
|
604
|
-
f" {json.dumps(self.memory)}\nQuery: {query}"
|
605
|
-
)
|
606
|
-
verification = self._run_agent(
|
607
|
-
self.verifier, verifier_input
|
608
|
-
)
|
609
|
-
if "error" in verification:
|
610
|
-
return {
|
611
|
-
"error": f"Verifier failed: {verification['error']}",
|
612
|
-
"trajectory": self.memory,
|
613
|
-
"conversation": self.conversation.return_history_as_string(),
|
614
|
-
}
|
615
|
-
|
616
|
-
self.memory.append(
|
617
|
-
{
|
618
|
-
"step": step_count,
|
619
|
-
"component": "Context Verifier",
|
620
|
-
"result": verification,
|
621
|
-
}
|
622
|
-
)
|
623
|
-
self.conversation.add(
|
624
|
-
role=self.verifier.agent_name,
|
625
|
-
content=json.dumps(verification),
|
626
|
-
)
|
627
|
-
|
628
|
-
# Check for stop signal from Verifier
|
629
|
-
if verification.get("stop_signal") is True:
|
630
|
-
logger.info(
|
631
|
-
"Received stop signal from verifier. Stopping iterations."
|
632
|
-
)
|
633
|
-
break
|
634
|
-
|
635
|
-
# Safety mechanism - if we've executed the same tool multiple times
|
636
|
-
same_tool_count = sum(
|
637
|
-
1
|
638
|
-
for m in self.memory
|
639
|
-
if m.get("component") == "Action Predictor"
|
640
|
-
and m.get("result", {}).get("tool_name")
|
641
|
-
== action.get("tool_name")
|
642
|
-
)
|
643
|
-
|
644
|
-
if same_tool_count > 3:
|
645
|
-
logger.warning(
|
646
|
-
f"Tool {action.get('tool_name')} used more than 3 times. Forcing stop."
|
647
|
-
)
|
648
|
-
break
|
649
|
-
|
650
|
-
step_count += 1
|
651
|
-
|
652
|
-
# Step 3: Solution Summarization
|
653
|
-
summarizer_input = f"Complete Trajectory: {json.dumps(self.memory)}\nOriginal Query: {query}"
|
654
|
-
|
655
|
-
summarization = self._run_agent(
|
656
|
-
self.summarizer, summarizer_input
|
657
|
-
)
|
658
|
-
if "error" in summarization:
|
659
|
-
return {
|
660
|
-
"error": f"Summarizer failed: {summarization['error']}",
|
661
|
-
"trajectory": self.memory,
|
662
|
-
"conversation": self.conversation.return_history_as_string(),
|
663
|
-
}
|
664
|
-
self.conversation.add(
|
665
|
-
role=self.summarizer.agent_name,
|
666
|
-
content=json.dumps(summarization),
|
667
|
-
)
|
668
|
-
|
669
|
-
return {
|
670
|
-
"final_answer": summarization.get(
|
671
|
-
"final_answer", "No answer found."
|
672
|
-
),
|
673
|
-
"trajectory": self.memory,
|
674
|
-
"conversation": self.conversation.return_history_as_string(),
|
675
|
-
}
|
676
|
-
|
677
|
-
except Exception as e:
|
678
|
-
logger.exception(
|
679
|
-
f"Unexpected error in run method: {e}"
|
680
|
-
) # More detailed
|
681
|
-
return {
|
682
|
-
"error": str(e),
|
683
|
-
"trajectory": self.memory,
|
684
|
-
"conversation": self.conversation.return_history_as_string(),
|
685
|
-
}
|
686
|
-
|
687
|
-
def save_state(self) -> None:
|
688
|
-
"""Save the current state of all agents."""
|
689
|
-
for agent in [self.planner, self.verifier, self.summarizer]:
|
690
|
-
try:
|
691
|
-
agent.save_state()
|
692
|
-
except Exception as e:
|
693
|
-
logger.error(
|
694
|
-
f"Error saving state for {agent.agent_name}: {str(e)}"
|
695
|
-
)
|
696
|
-
|
697
|
-
def load_state(self) -> None:
|
698
|
-
"""Load the saved state of all agents."""
|
699
|
-
for agent in [self.planner, self.verifier, self.summarizer]:
|
700
|
-
try:
|
701
|
-
agent.load_state()
|
702
|
-
except Exception as e:
|
703
|
-
logger.error(
|
704
|
-
f"Error loading state for {agent.agent_name}: {str(e)}"
|
705
|
-
)
|
706
|
-
|
707
|
-
|
708
|
-
# --- Example Usage ---
|
709
|
-
|
710
|
-
|
711
|
-
# Define dummy tool functions (replace with actual implementations)
|
712
|
-
def image_captioner_execute(
|
713
|
-
image: str, prompt: str = "Describe the image", **kwargs
|
714
|
-
) -> str:
|
715
|
-
"""Dummy image captioner."""
|
716
|
-
print(
|
717
|
-
f"image_captioner_execute called with image: {image}, prompt: {prompt}"
|
718
|
-
)
|
719
|
-
return f"Caption for {image}: A descriptive caption (dummy)." # Simplified
|
720
|
-
|
721
|
-
|
722
|
-
def object_detector_execute(
|
723
|
-
image: str, labels: List[str] = [], **kwargs
|
724
|
-
) -> List[str]:
|
725
|
-
"""Dummy object detector, handles missing labels gracefully."""
|
726
|
-
print(
|
727
|
-
f"object_detector_execute called with image: {image}, labels: {labels}"
|
728
|
-
)
|
729
|
-
if not labels:
|
730
|
-
return [
|
731
|
-
"object1",
|
732
|
-
"object2",
|
733
|
-
"object3",
|
734
|
-
"object4",
|
735
|
-
] # Return default objects if no labels
|
736
|
-
return [f"Detected {label}" for label in labels] # Simplified
|
737
|
-
|
738
|
-
|
739
|
-
def web_search_execute(query: str, **kwargs) -> str:
|
740
|
-
"""Dummy web search."""
|
741
|
-
print(f"web_search_execute called with query: {query}")
|
742
|
-
return f"Search results for '{query}'..." # Simplified
|
743
|
-
|
744
|
-
|
745
|
-
def python_calculator_execute(expression: str, **kwargs) -> str:
|
746
|
-
"""Python calculator (using math module)."""
|
747
|
-
print(f"python_calculator_execute called with: {expression}")
|
748
|
-
try:
|
749
|
-
# Safely evaluate only simple expressions involving numbers and basic operations
|
750
|
-
if re.match(r"^[0-9+\-*/().\s]+$", expression):
|
751
|
-
result = eval(
|
752
|
-
expression, {"__builtins__": {}, "math": math}
|
753
|
-
)
|
754
|
-
return f"Result of {expression} is {result}"
|
755
|
-
else:
|
756
|
-
return "Error: Invalid expression for calculator."
|
757
|
-
except Exception as e:
|
758
|
-
return f"Error: {e}"
|
759
|
-
|
760
|
-
|
761
|
-
# Create utility function to get default tools
|
762
|
-
def get_default_tools() -> List[Tool]:
|
763
|
-
"""Returns a list of default tools that can be used with OctoToolsSwarm."""
|
764
|
-
image_captioner = Tool(
|
765
|
-
name="Image_Captioner_Tool",
|
766
|
-
description="Generates a caption for an image.",
|
767
|
-
metadata={
|
768
|
-
"input_types": {"image": "str", "prompt": "str"},
|
769
|
-
"output_type": "str",
|
770
|
-
"limitations": "May struggle with complex scenes or ambiguous objects.",
|
771
|
-
"best_practices": "Use with clear, well-lit images. Provide specific prompts for better results.",
|
772
|
-
},
|
773
|
-
execute_func=image_captioner_execute,
|
774
|
-
)
|
775
|
-
|
776
|
-
object_detector = Tool(
|
777
|
-
name="Object_Detector_Tool",
|
778
|
-
description="Detects objects in an image.",
|
779
|
-
metadata={
|
780
|
-
"input_types": {"image": "str", "labels": "list"},
|
781
|
-
"output_type": "list",
|
782
|
-
"limitations": "Accuracy depends on the quality of the image and the clarity of the objects.",
|
783
|
-
"best_practices": "Provide a list of specific object labels to detect. Use high-resolution images.",
|
784
|
-
},
|
785
|
-
execute_func=object_detector_execute,
|
786
|
-
)
|
787
|
-
|
788
|
-
web_search = Tool(
|
789
|
-
name="Web_Search_Tool",
|
790
|
-
description="Performs a web search.",
|
791
|
-
metadata={
|
792
|
-
"input_types": {"query": "str"},
|
793
|
-
"output_type": "str",
|
794
|
-
"limitations": "May not find specific or niche information.",
|
795
|
-
"best_practices": "Use specific and descriptive keywords for better results.",
|
796
|
-
},
|
797
|
-
execute_func=web_search_execute,
|
798
|
-
)
|
799
|
-
|
800
|
-
calculator = Tool(
|
801
|
-
name="Python_Calculator_Tool",
|
802
|
-
description="Evaluates a Python expression.",
|
803
|
-
metadata={
|
804
|
-
"input_types": {"expression": "str"},
|
805
|
-
"output_type": "str",
|
806
|
-
"limitations": "Cannot handle complex mathematical functions or libraries.",
|
807
|
-
"best_practices": "Use for basic arithmetic and simple calculations.",
|
808
|
-
},
|
809
|
-
execute_func=python_calculator_execute,
|
810
|
-
)
|
811
|
-
|
812
|
-
return [image_captioner, object_detector, web_search, calculator]
|
813
|
-
|
814
|
-
|
815
|
-
# Only execute the example when this script is run directly
|
816
|
-
# if __name__ == "__main__":
|
817
|
-
# print("Running OctoToolsSwarm example...")
|
818
|
-
|
819
|
-
# # Create an OctoToolsSwarm agent with default tools
|
820
|
-
# tools = get_default_tools()
|
821
|
-
# agent = OctoToolsSwarm(tools=tools)
|
822
|
-
|
823
|
-
# # Example query
|
824
|
-
# query = "What is the square root of the number of objects in this image?"
|
825
|
-
|
826
|
-
# # Create a dummy image file for testing if it doesn't exist
|
827
|
-
# image_path = "example.png"
|
828
|
-
# if not os.path.exists(image_path):
|
829
|
-
# with open(image_path, "w") as f:
|
830
|
-
# f.write("Dummy image content")
|
831
|
-
# print(f"Created dummy image file: {image_path}")
|
832
|
-
|
833
|
-
# # Run the agent
|
834
|
-
# result = agent.run(query, image=image_path)
|
835
|
-
|
836
|
-
# # Display results
|
837
|
-
# print("\n=== FINAL ANSWER ===")
|
838
|
-
# print(result["final_answer"])
|
839
|
-
|
840
|
-
# print("\n=== TRAJECTORY SUMMARY ===")
|
841
|
-
# for step in result["trajectory"]:
|
842
|
-
# print(f"Step {step.get('step', 'N/A')}: {step.get('component', 'Unknown')}")
|
843
|
-
|
844
|
-
# print("\nOctoToolsSwarm example completed.")
|