swarms 7.7.0__py3-none-any.whl → 7.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,844 +0,0 @@
1
- """
2
- OctoToolsSwarm: A multi-agent system for complex reasoning.
3
- Implements the OctoTools framework using swarms.
4
- """
5
-
6
- import json
7
- import logging
8
- import re
9
- from dataclasses import dataclass
10
- from enum import Enum
11
- from pathlib import Path
12
- from typing import Any, Callable, Dict, List, Optional
13
- import math # Import the math module
14
-
15
- from dotenv import load_dotenv
16
- from swarms import Agent
17
- from swarms.structs.conversation import Conversation
18
-
19
- # from exa_search import exa_search as web_search_execute
20
-
21
-
22
- # Load environment variables
23
- load_dotenv()
24
-
25
- # Setup logging
26
- logging.basicConfig(level=logging.INFO)
27
- logger = logging.getLogger(__name__)
28
-
29
-
30
- class ToolType(Enum):
31
- """Defines the types of tools available."""
32
-
33
- IMAGE_CAPTIONER = "image_captioner"
34
- OBJECT_DETECTOR = "object_detector"
35
- WEB_SEARCH = "web_search"
36
- PYTHON_CALCULATOR = "python_calculator"
37
- # Add more tool types as needed
38
-
39
-
40
- @dataclass
41
- class Tool:
42
- """
43
- Represents an external tool.
44
-
45
- Attributes:
46
- name: Unique name of the tool.
47
- description: Description of the tool's function.
48
- metadata: Dictionary containing tool metadata.
49
- execute_func: Callable function that executes the tool's logic.
50
- """
51
-
52
- name: str
53
- description: str
54
- metadata: Dict[str, Any]
55
- execute_func: Callable
56
-
57
- def execute(self, **kwargs):
58
- """Executes the tool's logic, handling potential errors."""
59
- try:
60
- return self.execute_func(**kwargs)
61
- except Exception as e:
62
- logger.error(
63
- f"Error executing tool {self.name}: {str(e)}"
64
- )
65
- return {"error": str(e)}
66
-
67
-
68
- class AgentRole(Enum):
69
- """Defines the roles for agents in the OctoTools system."""
70
-
71
- PLANNER = "planner"
72
- VERIFIER = "verifier"
73
- SUMMARIZER = "summarizer"
74
-
75
-
76
- class OctoToolsSwarm:
77
- """
78
- A multi-agent system implementing the OctoTools framework.
79
-
80
- Attributes:
81
- model_name: Name of the LLM model to use.
82
- max_iterations: Maximum number of action-execution iterations.
83
- base_path: Path for saving agent states.
84
- tools: List of available Tool objects.
85
- """
86
-
87
- def __init__(
88
- self,
89
- tools: List[Tool],
90
- model_name: str = "gemini/gemini-2.0-flash",
91
- max_iterations: int = 10,
92
- base_path: Optional[str] = None,
93
- ):
94
- """Initialize the OctoToolsSwarm system."""
95
- self.model_name = model_name
96
- self.max_iterations = max_iterations
97
- self.base_path = (
98
- Path(base_path)
99
- if base_path
100
- else Path("./octotools_states")
101
- )
102
- self.base_path.mkdir(exist_ok=True)
103
- self.tools = {
104
- tool.name: tool for tool in tools
105
- } # Store tools in a dictionary
106
-
107
- # Initialize agents
108
- self._init_agents()
109
-
110
- # Create conversation tracker and memory
111
- self.conversation = Conversation()
112
- self.memory = [] # Store the trajectory
113
-
114
- def _init_agents(self) -> None:
115
- """Initialize all agents with their specific roles and prompts."""
116
- # Planner agent
117
- self.planner = Agent(
118
- agent_name="OctoTools-Planner",
119
- system_prompt=self._get_planner_prompt(),
120
- model_name=self.model_name,
121
- max_loops=3,
122
- saved_state_path=str(self.base_path / "planner.json"),
123
- verbose=True,
124
- )
125
-
126
- # Verifier agent
127
- self.verifier = Agent(
128
- agent_name="OctoTools-Verifier",
129
- system_prompt=self._get_verifier_prompt(),
130
- model_name=self.model_name,
131
- max_loops=1,
132
- saved_state_path=str(self.base_path / "verifier.json"),
133
- verbose=True,
134
- )
135
-
136
- # Summarizer agent
137
- self.summarizer = Agent(
138
- agent_name="OctoTools-Summarizer",
139
- system_prompt=self._get_summarizer_prompt(),
140
- model_name=self.model_name,
141
- max_loops=1,
142
- saved_state_path=str(self.base_path / "summarizer.json"),
143
- verbose=True,
144
- )
145
-
146
- def _get_planner_prompt(self) -> str:
147
- """Get the prompt for the planner agent (Improved with few-shot examples)."""
148
- tool_descriptions = "\n".join(
149
- [
150
- f"- {tool_name}: {self.tools[tool_name].description}"
151
- for tool_name in self.tools
152
- ]
153
- )
154
- return f"""You are the Planner in the OctoTools framework. Your role is to analyze the user's query,
155
- identify required skills, suggest relevant tools, and plan the steps to solve the problem.
156
-
157
- 1. **Analyze the user's query:** Understand the requirements and identify the necessary skills and potentially relevant tools.
158
- 2. **Perform high-level planning:** Create a rough outline of how tools might be used to solve the problem.
159
- 3. **Perform low-level planning (action prediction):** At each step, select the best tool to use and formulate a specific sub-goal for that tool, considering the current context.
160
-
161
- Available Tools:
162
- {tool_descriptions}
163
-
164
- Output your response in JSON format. Here are examples for different stages:
165
-
166
- **Query Analysis (High-Level Planning):**
167
- Example Input:
168
- Query: "What is the capital of France?"
169
-
170
- Example Output:
171
- ```json
172
- {{
173
- "summary": "The user is asking for the capital of France.",
174
- "required_skills": ["knowledge retrieval"],
175
- "relevant_tools": ["Web_Search_Tool"]
176
- }}
177
- ```
178
-
179
- **Action Prediction (Low-Level Planning):**
180
- Example Input:
181
- Context: {{ "query": "What is the capital of France?", "available_tools": ["Web_Search_Tool"] }}
182
-
183
- Example Output:
184
- ```json
185
- {{
186
- "justification": "The Web_Search_Tool can be used to directly find the capital of France.",
187
- "context": {{}},
188
- "sub_goal": "Search the web for 'capital of France'.",
189
- "tool_name": "Web_Search_Tool"
190
- }}
191
- ```
192
- Another Example:
193
- Context: {{"query": "How many objects are in the image?", "available_tools": ["Image_Captioner_Tool", "Object_Detector_Tool"], "image": "objects.png"}}
194
-
195
- Example Output:
196
- ```json
197
- {{
198
- "justification": "First, get a general description of the image to understand the context.",
199
- "context": {{ "image": "objects.png" }},
200
- "sub_goal": "Generate a description of the image.",
201
- "tool_name": "Image_Captioner_Tool"
202
- }}
203
- ```
204
-
205
- Example for Finding Square Root:
206
- Context: {{"query": "What is the square root of the number of objects in the image?", "available_tools": ["Object_Detector_Tool", "Python_Calculator_Tool"], "image": "objects.png", "Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"]}}
207
-
208
- Example Output:
209
- ```json
210
- {{
211
- "justification": "We have detected 4 objects in the image. Now we need to find the square root of 4.",
212
- "context": {{}},
213
- "sub_goal": "Calculate the square root of 4",
214
- "tool_name": "Python_Calculator_Tool"
215
- }}
216
- ```
217
-
218
- Your output MUST be a single, valid JSON object with the following keys:
219
- - justification (string): Your reasoning.
220
- - context (dict): A dictionary containing relevant information.
221
- - sub_goal (string): The specific instruction for the tool.
222
- - tool_name (string): The EXACT name of the tool to use.
223
-
224
- Do NOT include any text outside of the JSON object.
225
- """
226
-
227
- def _get_verifier_prompt(self) -> str:
228
- """Get the prompt for the verifier agent (Improved with few-shot examples)."""
229
- return """You are the Context Verifier in the OctoTools framework. Your role is to analyze the current context
230
- and memory to determine if the problem is solved, if there are any inconsistencies, or if further steps are needed.
231
-
232
- Output your response in JSON format:
233
-
234
- Expected output structure:
235
- ```json
236
- {
237
- "completeness": "Indicate whether the query is fully, partially, or not answered.",
238
- "inconsistencies": "List any inconsistencies found in the context or memory.",
239
- "verification_needs": "List any information that needs further verification.",
240
- "ambiguities": "List any ambiguities found in the context or memory.",
241
- "stop_signal": true/false
242
- }
243
- ```
244
-
245
- Example Input:
246
- Context: { "last_result": { "result": "Caption: The image shows a cat." } }
247
- Memory: [ { "component": "Action Predictor", "result": { "tool_name": "Image_Captioner_Tool" } } ]
248
-
249
- Example Output:
250
- ```json
251
- {
252
- "completeness": "partial",
253
- "inconsistencies": [],
254
- "verification_needs": ["Object detection to confirm the presence of a cat."],
255
- "ambiguities": [],
256
- "stop_signal": false
257
- }
258
- ```
259
-
260
- Another Example:
261
- Context: { "last_result": { "result": ["Detected object: cat"] } }
262
- Memory: [ { "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } } ]
263
-
264
- Example Output:
265
- ```json
266
- {
267
- "completeness": "yes",
268
- "inconsistencies": [],
269
- "verification_needs": [],
270
- "ambiguities": [],
271
- "stop_signal": true
272
- }
273
- ```
274
-
275
- Square Root Example:
276
- Context: {
277
- "query": "What is the square root of the number of objects in the image?",
278
- "image": "example.png",
279
- "Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"],
280
- "Python_Calculator_Tool_result": "Result of 4**0.5 is 2.0"
281
- }
282
- Memory: [
283
- { "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } },
284
- { "component": "Action Predictor", "result": { "tool_name": "Python_Calculator_Tool" } }
285
- ]
286
-
287
- Example Output:
288
- ```json
289
- {
290
- "completeness": "yes",
291
- "inconsistencies": [],
292
- "verification_needs": [],
293
- "ambiguities": [],
294
- "stop_signal": true
295
- }
296
- ```
297
- """
298
-
299
- def _get_summarizer_prompt(self) -> str:
300
- """Get the prompt for the summarizer agent (Improved with few-shot examples)."""
301
- return """You are the Solution Summarizer in the OctoTools framework. Your role is to synthesize the final
302
- answer to the user's query based on the complete trajectory of actions and results.
303
-
304
- Output your response in JSON format:
305
-
306
- Expected output structure:
307
- ```json
308
- {
309
- "final_answer": "Provide a clear and concise answer to the original query."
310
- }
311
- ```
312
- Example Input:
313
- Memory: [
314
- {"component": "Query Analyzer", "result": {"summary": "Find the capital of France."}},
315
- {"component": "Action Predictor", "result": {"tool_name": "Web_Search_Tool"}},
316
- {"component": "Tool Execution", "result": {"result": "The capital of France is Paris."}}
317
- ]
318
-
319
- Example Output:
320
- ```json
321
- {
322
- "final_answer": "The capital of France is Paris."
323
- }
324
- ```
325
-
326
- Square Root Example:
327
- Memory: [
328
- {"component": "Query Analyzer", "result": {"summary": "Find the square root of the number of objects in the image."}},
329
- {"component": "Action Predictor", "result": {"tool_name": "Object_Detector_Tool", "sub_goal": "Detect objects in the image"}},
330
- {"component": "Tool Execution", "result": {"result": ["object1", "object2", "object3", "object4"]}},
331
- {"component": "Action Predictor", "result": {"tool_name": "Python_Calculator_Tool", "sub_goal": "Calculate the square root of 4"}},
332
- {"component": "Tool Execution", "result": {"result": "Result of 4**0.5 is 2.0"}}
333
- ]
334
-
335
- Example Output:
336
- ```json
337
- {
338
- "final_answer": "The square root of the number of objects in the image is 2.0. There are 4 objects in the image, and the square root of 4 is 2.0."
339
- }
340
- ```
341
- """
342
-
343
- def _safely_parse_json(self, json_str: str) -> Dict[str, Any]:
344
- """Safely parse JSON, handling errors and using recursive descent."""
345
- try:
346
- return json.loads(json_str)
347
- except json.JSONDecodeError:
348
- logger.warning(
349
- f"JSONDecodeError: Attempting to extract JSON from: {json_str}"
350
- )
351
- try:
352
- # More robust JSON extraction with recursive descent
353
- def extract_json(s):
354
- stack = []
355
- start = -1
356
- for i, c in enumerate(s):
357
- if c == "{":
358
- if not stack:
359
- start = i
360
- stack.append(c)
361
- elif c == "}":
362
- if stack:
363
- stack.pop()
364
- if not stack and start != -1:
365
- return s[start : i + 1]
366
- return None
367
-
368
- extracted_json = extract_json(json_str)
369
- if extracted_json:
370
- logger.info(f"Extracted JSON: {extracted_json}")
371
- return json.loads(extracted_json)
372
- else:
373
- logger.error(
374
- "Failed to extract JSON using recursive descent."
375
- )
376
- return {
377
- "error": "Failed to parse JSON",
378
- "content": json_str,
379
- }
380
- except Exception as e:
381
- logger.exception(f"Error during JSON extraction: {e}")
382
- return {
383
- "error": "Failed to parse JSON",
384
- "content": json_str,
385
- }
386
-
387
- def _execute_tool(
388
- self, tool_name: str, context: Dict[str, Any]
389
- ) -> Dict[str, Any]:
390
- """Executes a tool based on its name and provided context."""
391
- if tool_name not in self.tools:
392
- return {"error": f"Tool '{tool_name}' not found."}
393
-
394
- tool = self.tools[tool_name]
395
- try:
396
- # For Python Calculator tool, handle object counts from Object Detector
397
- if tool_name == "Python_Calculator_Tool":
398
- # Check for object detector results
399
- object_detector_result = context.get(
400
- "Object_Detector_Tool_result"
401
- )
402
- if object_detector_result and isinstance(
403
- object_detector_result, list
404
- ):
405
- # Calculate the number of objects
406
- num_objects = len(object_detector_result)
407
- # If sub_goal doesn't already contain an expression, create one
408
- if (
409
- "sub_goal" in context
410
- and "Calculate the square root"
411
- in context["sub_goal"]
412
- ):
413
- context["expression"] = f"{num_objects}**0.5"
414
- elif "expression" not in context:
415
- # Default to square root if no expression is specified
416
- context["expression"] = f"{num_objects}**0.5"
417
-
418
- # Filter context: only pass expected inputs to the tool
419
- valid_inputs = {
420
- k: v
421
- for k, v in context.items()
422
- if k in tool.metadata.get("input_types", {})
423
- }
424
- result = tool.execute(**valid_inputs)
425
- return {"result": result}
426
- except Exception as e:
427
- logger.exception(f"Error executing tool {tool_name}: {e}")
428
- return {"error": str(e)}
429
-
430
- def _run_agent(
431
- self, agent: Agent, input_prompt: str
432
- ) -> Dict[str, Any]:
433
- """Runs a swarms agent, handling output and JSON parsing."""
434
- try:
435
- # Construct the full input, including the system prompt
436
- full_input = f"{agent.system_prompt}\n\n{input_prompt}"
437
-
438
- # Run the agent and capture the output
439
- agent_response = agent.run(full_input)
440
-
441
- logger.info(
442
- f"DEBUG: Raw agent response: {agent_response}"
443
- )
444
-
445
- # Extract the LLM's response (remove conversation history, etc.)
446
- response_text = agent_response # Assuming direct return
447
-
448
- # Try to parse the response as JSON
449
- parsed_response = self._safely_parse_json(response_text)
450
-
451
- return parsed_response
452
-
453
- except Exception as e:
454
- logger.exception(
455
- f"Error running agent {agent.agent_name}: {e}"
456
- )
457
- return {
458
- "error": f"Agent {agent.agent_name} failed: {str(e)}"
459
- }
460
-
461
- def run(
462
- self, query: str, image: Optional[str] = None
463
- ) -> Dict[str, Any]:
464
- """Execute the task through the multi-agent workflow."""
465
- logger.info(f"Starting task: {query}")
466
-
467
- try:
468
- # Step 1: Query Analysis (High-Level Planning)
469
- planner_input = (
470
- f"Analyze the following query and determine the necessary skills and"
471
- f" relevant tools: {query}"
472
- )
473
- query_analysis = self._run_agent(
474
- self.planner, planner_input
475
- )
476
-
477
- if "error" in query_analysis:
478
- return {
479
- "error": f"Planner query analysis failed: {query_analysis['error']}",
480
- "trajectory": self.memory,
481
- "conversation": self.conversation.return_history_as_string(),
482
- }
483
-
484
- self.memory.append(
485
- {
486
- "step": 0,
487
- "component": "Query Analyzer",
488
- "result": query_analysis,
489
- }
490
- )
491
- self.conversation.add(
492
- role=self.planner.agent_name,
493
- content=json.dumps(query_analysis),
494
- )
495
-
496
- # Initialize context with the query and image (if provided)
497
- context = {"query": query}
498
- if image:
499
- context["image"] = image
500
-
501
- # Add available tools to context
502
- if "relevant_tools" in query_analysis:
503
- context["available_tools"] = query_analysis[
504
- "relevant_tools"
505
- ]
506
- else:
507
- # If no relevant tools specified, make all tools available
508
- context["available_tools"] = list(self.tools.keys())
509
-
510
- step_count = 1
511
-
512
- # Step 2: Iterative Action-Execution Loop
513
- while step_count <= self.max_iterations:
514
- logger.info(
515
- f"Starting iteration {step_count} of {self.max_iterations}"
516
- )
517
-
518
- # Step 2a: Action Prediction (Low-Level Planning)
519
- action_planner_input = (
520
- f"Current Context: {json.dumps(context)}\nAvailable Tools:"
521
- f" {', '.join(context.get('available_tools', list(self.tools.keys())))}\nPlan the"
522
- " next step."
523
- )
524
- action = self._run_agent(
525
- self.planner, action_planner_input
526
- )
527
- if "error" in action:
528
- logger.error(
529
- f"Error in action prediction: {action['error']}"
530
- )
531
- return {
532
- "error": f"Planner action prediction failed: {action['error']}",
533
- "trajectory": self.memory,
534
- "conversation": self.conversation.return_history_as_string(),
535
- }
536
- self.memory.append(
537
- {
538
- "step": step_count,
539
- "component": "Action Predictor",
540
- "result": action,
541
- }
542
- )
543
- self.conversation.add(
544
- role=self.planner.agent_name,
545
- content=json.dumps(action),
546
- )
547
-
548
- # Input Validation for Action (Relaxed)
549
- if (
550
- not isinstance(action, dict)
551
- or "tool_name" not in action
552
- or "sub_goal" not in action
553
- ):
554
- error_msg = (
555
- "Action prediction did not return required fields (tool_name,"
556
- " sub_goal) or was not a dictionary."
557
- )
558
- logger.error(error_msg)
559
- self.memory.append(
560
- {
561
- "step": step_count,
562
- "component": "Error",
563
- "result": error_msg,
564
- }
565
- )
566
- break
567
-
568
- # Step 2b: Execute Tool
569
- tool_execution_context = {
570
- **context,
571
- **action.get(
572
- "context", {}
573
- ), # Add any additional context
574
- "sub_goal": action[
575
- "sub_goal"
576
- ], # Pass sub_goal to tool
577
- }
578
-
579
- tool_result = self._execute_tool(
580
- action["tool_name"], tool_execution_context
581
- )
582
-
583
- self.memory.append(
584
- {
585
- "step": step_count,
586
- "component": "Tool Execution",
587
- "result": tool_result,
588
- }
589
- )
590
-
591
- # Step 2c: Context Update - Store result with a descriptive key
592
- if "result" in tool_result:
593
- context[f"{action['tool_name']}_result"] = (
594
- tool_result["result"]
595
- )
596
- if "error" in tool_result:
597
- context[f"{action['tool_name']}_error"] = (
598
- tool_result["error"]
599
- )
600
-
601
- # Step 2d: Context Verification
602
- verifier_input = (
603
- f"Current Context: {json.dumps(context)}\nMemory:"
604
- f" {json.dumps(self.memory)}\nQuery: {query}"
605
- )
606
- verification = self._run_agent(
607
- self.verifier, verifier_input
608
- )
609
- if "error" in verification:
610
- return {
611
- "error": f"Verifier failed: {verification['error']}",
612
- "trajectory": self.memory,
613
- "conversation": self.conversation.return_history_as_string(),
614
- }
615
-
616
- self.memory.append(
617
- {
618
- "step": step_count,
619
- "component": "Context Verifier",
620
- "result": verification,
621
- }
622
- )
623
- self.conversation.add(
624
- role=self.verifier.agent_name,
625
- content=json.dumps(verification),
626
- )
627
-
628
- # Check for stop signal from Verifier
629
- if verification.get("stop_signal") is True:
630
- logger.info(
631
- "Received stop signal from verifier. Stopping iterations."
632
- )
633
- break
634
-
635
- # Safety mechanism - if we've executed the same tool multiple times
636
- same_tool_count = sum(
637
- 1
638
- for m in self.memory
639
- if m.get("component") == "Action Predictor"
640
- and m.get("result", {}).get("tool_name")
641
- == action.get("tool_name")
642
- )
643
-
644
- if same_tool_count > 3:
645
- logger.warning(
646
- f"Tool {action.get('tool_name')} used more than 3 times. Forcing stop."
647
- )
648
- break
649
-
650
- step_count += 1
651
-
652
- # Step 3: Solution Summarization
653
- summarizer_input = f"Complete Trajectory: {json.dumps(self.memory)}\nOriginal Query: {query}"
654
-
655
- summarization = self._run_agent(
656
- self.summarizer, summarizer_input
657
- )
658
- if "error" in summarization:
659
- return {
660
- "error": f"Summarizer failed: {summarization['error']}",
661
- "trajectory": self.memory,
662
- "conversation": self.conversation.return_history_as_string(),
663
- }
664
- self.conversation.add(
665
- role=self.summarizer.agent_name,
666
- content=json.dumps(summarization),
667
- )
668
-
669
- return {
670
- "final_answer": summarization.get(
671
- "final_answer", "No answer found."
672
- ),
673
- "trajectory": self.memory,
674
- "conversation": self.conversation.return_history_as_string(),
675
- }
676
-
677
- except Exception as e:
678
- logger.exception(
679
- f"Unexpected error in run method: {e}"
680
- ) # More detailed
681
- return {
682
- "error": str(e),
683
- "trajectory": self.memory,
684
- "conversation": self.conversation.return_history_as_string(),
685
- }
686
-
687
- def save_state(self) -> None:
688
- """Save the current state of all agents."""
689
- for agent in [self.planner, self.verifier, self.summarizer]:
690
- try:
691
- agent.save_state()
692
- except Exception as e:
693
- logger.error(
694
- f"Error saving state for {agent.agent_name}: {str(e)}"
695
- )
696
-
697
- def load_state(self) -> None:
698
- """Load the saved state of all agents."""
699
- for agent in [self.planner, self.verifier, self.summarizer]:
700
- try:
701
- agent.load_state()
702
- except Exception as e:
703
- logger.error(
704
- f"Error loading state for {agent.agent_name}: {str(e)}"
705
- )
706
-
707
-
708
- # --- Example Usage ---
709
-
710
-
711
- # Define dummy tool functions (replace with actual implementations)
712
- def image_captioner_execute(
713
- image: str, prompt: str = "Describe the image", **kwargs
714
- ) -> str:
715
- """Dummy image captioner."""
716
- print(
717
- f"image_captioner_execute called with image: {image}, prompt: {prompt}"
718
- )
719
- return f"Caption for {image}: A descriptive caption (dummy)." # Simplified
720
-
721
-
722
- def object_detector_execute(
723
- image: str, labels: List[str] = [], **kwargs
724
- ) -> List[str]:
725
- """Dummy object detector, handles missing labels gracefully."""
726
- print(
727
- f"object_detector_execute called with image: {image}, labels: {labels}"
728
- )
729
- if not labels:
730
- return [
731
- "object1",
732
- "object2",
733
- "object3",
734
- "object4",
735
- ] # Return default objects if no labels
736
- return [f"Detected {label}" for label in labels] # Simplified
737
-
738
-
739
- def web_search_execute(query: str, **kwargs) -> str:
740
- """Dummy web search."""
741
- print(f"web_search_execute called with query: {query}")
742
- return f"Search results for '{query}'..." # Simplified
743
-
744
-
745
- def python_calculator_execute(expression: str, **kwargs) -> str:
746
- """Python calculator (using math module)."""
747
- print(f"python_calculator_execute called with: {expression}")
748
- try:
749
- # Safely evaluate only simple expressions involving numbers and basic operations
750
- if re.match(r"^[0-9+\-*/().\s]+$", expression):
751
- result = eval(
752
- expression, {"__builtins__": {}, "math": math}
753
- )
754
- return f"Result of {expression} is {result}"
755
- else:
756
- return "Error: Invalid expression for calculator."
757
- except Exception as e:
758
- return f"Error: {e}"
759
-
760
-
761
- # Create utility function to get default tools
762
- def get_default_tools() -> List[Tool]:
763
- """Returns a list of default tools that can be used with OctoToolsSwarm."""
764
- image_captioner = Tool(
765
- name="Image_Captioner_Tool",
766
- description="Generates a caption for an image.",
767
- metadata={
768
- "input_types": {"image": "str", "prompt": "str"},
769
- "output_type": "str",
770
- "limitations": "May struggle with complex scenes or ambiguous objects.",
771
- "best_practices": "Use with clear, well-lit images. Provide specific prompts for better results.",
772
- },
773
- execute_func=image_captioner_execute,
774
- )
775
-
776
- object_detector = Tool(
777
- name="Object_Detector_Tool",
778
- description="Detects objects in an image.",
779
- metadata={
780
- "input_types": {"image": "str", "labels": "list"},
781
- "output_type": "list",
782
- "limitations": "Accuracy depends on the quality of the image and the clarity of the objects.",
783
- "best_practices": "Provide a list of specific object labels to detect. Use high-resolution images.",
784
- },
785
- execute_func=object_detector_execute,
786
- )
787
-
788
- web_search = Tool(
789
- name="Web_Search_Tool",
790
- description="Performs a web search.",
791
- metadata={
792
- "input_types": {"query": "str"},
793
- "output_type": "str",
794
- "limitations": "May not find specific or niche information.",
795
- "best_practices": "Use specific and descriptive keywords for better results.",
796
- },
797
- execute_func=web_search_execute,
798
- )
799
-
800
- calculator = Tool(
801
- name="Python_Calculator_Tool",
802
- description="Evaluates a Python expression.",
803
- metadata={
804
- "input_types": {"expression": "str"},
805
- "output_type": "str",
806
- "limitations": "Cannot handle complex mathematical functions or libraries.",
807
- "best_practices": "Use for basic arithmetic and simple calculations.",
808
- },
809
- execute_func=python_calculator_execute,
810
- )
811
-
812
- return [image_captioner, object_detector, web_search, calculator]
813
-
814
-
815
- # Only execute the example when this script is run directly
816
- # if __name__ == "__main__":
817
- # print("Running OctoToolsSwarm example...")
818
-
819
- # # Create an OctoToolsSwarm agent with default tools
820
- # tools = get_default_tools()
821
- # agent = OctoToolsSwarm(tools=tools)
822
-
823
- # # Example query
824
- # query = "What is the square root of the number of objects in this image?"
825
-
826
- # # Create a dummy image file for testing if it doesn't exist
827
- # image_path = "example.png"
828
- # if not os.path.exists(image_path):
829
- # with open(image_path, "w") as f:
830
- # f.write("Dummy image content")
831
- # print(f"Created dummy image file: {image_path}")
832
-
833
- # # Run the agent
834
- # result = agent.run(query, image=image_path)
835
-
836
- # # Display results
837
- # print("\n=== FINAL ANSWER ===")
838
- # print(result["final_answer"])
839
-
840
- # print("\n=== TRAJECTORY SUMMARY ===")
841
- # for step in result["trajectory"]:
842
- # print(f"Step {step.get('step', 'N/A')}: {step.get('component', 'Unknown')}")
843
-
844
- # print("\nOctoToolsSwarm example completed.")