swarms 7.8.9__py3-none-any.whl → 7.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarms/cli/onboarding_process.py +1 -3
- swarms/prompts/collaborative_prompts.py +177 -0
- swarms/structs/agent.py +349 -87
- swarms/structs/concurrent_workflow.py +70 -196
- swarms/structs/interactive_groupchat.py +319 -12
- swarms/structs/ma_utils.py +25 -6
- swarms/structs/mixture_of_agents.py +88 -113
- swarms/structs/swarm_router.py +148 -187
- swarms/telemetry/__init__.py +4 -18
- swarms/telemetry/log_executions.py +43 -0
- swarms/telemetry/main.py +53 -217
- swarms/tools/base_tool.py +8 -3
- swarms/utils/formatter.py +130 -13
- swarms/utils/litellm_wrapper.py +5 -1
- swarms-7.9.0.dist-info/METADATA +626 -0
- {swarms-7.8.9.dist-info → swarms-7.9.0.dist-info}/RECORD +19 -17
- swarms-7.8.9.dist-info/METADATA +0 -2119
- {swarms-7.8.9.dist-info → swarms-7.9.0.dist-info}/LICENSE +0 -0
- {swarms-7.8.9.dist-info → swarms-7.9.0.dist-info}/WHEEL +0 -0
- {swarms-7.8.9.dist-info → swarms-7.9.0.dist-info}/entry_points.txt +0 -0
swarms/structs/agent.py
CHANGED
@@ -56,7 +56,6 @@ from swarms.tools.base_tool import BaseTool
|
|
56
56
|
from swarms.tools.py_func_to_openai_func_str import (
|
57
57
|
convert_multiple_functions_to_openai_function_schema,
|
58
58
|
)
|
59
|
-
from swarms.utils.any_to_str import any_to_str
|
60
59
|
from swarms.utils.data_to_text import data_to_text
|
61
60
|
from swarms.utils.file_processing import create_file_in_folder
|
62
61
|
from swarms.utils.formatter import formatter
|
@@ -288,6 +287,11 @@ class Agent:
|
|
288
287
|
>>> print(response)
|
289
288
|
>>> # Generate a report on the financials.
|
290
289
|
|
290
|
+
>>> # Real-time streaming example
|
291
|
+
>>> agent = Agent(llm=llm, max_loops=1, streaming_on=True)
|
292
|
+
>>> response = agent.run("Tell me a long story.") # Will stream in real-time
|
293
|
+
>>> print(response) # Final complete response
|
294
|
+
|
291
295
|
"""
|
292
296
|
|
293
297
|
def __init__(
|
@@ -404,7 +408,7 @@ class Agent:
|
|
404
408
|
llm_args: dict = None,
|
405
409
|
load_state_path: str = None,
|
406
410
|
role: agent_roles = "worker",
|
407
|
-
|
411
|
+
print_on: bool = True,
|
408
412
|
tools_list_dictionary: Optional[List[Dict[str, Any]]] = None,
|
409
413
|
mcp_url: Optional[Union[str, MCPConnection]] = None,
|
410
414
|
mcp_urls: List[str] = None,
|
@@ -420,6 +424,7 @@ class Agent:
|
|
420
424
|
rag_config: Optional[RAGConfig] = None,
|
421
425
|
tool_call_summary: bool = True,
|
422
426
|
output_raw_json_from_tool_call: bool = False,
|
427
|
+
summarize_multiple_images: bool = False,
|
423
428
|
*args,
|
424
429
|
**kwargs,
|
425
430
|
):
|
@@ -540,7 +545,7 @@ class Agent:
|
|
540
545
|
self.llm_args = llm_args
|
541
546
|
self.load_state_path = load_state_path
|
542
547
|
self.role = role
|
543
|
-
self.
|
548
|
+
self.print_on = print_on
|
544
549
|
self.tools_list_dictionary = tools_list_dictionary
|
545
550
|
self.mcp_url = mcp_url
|
546
551
|
self.mcp_urls = mcp_urls
|
@@ -558,6 +563,7 @@ class Agent:
|
|
558
563
|
self.output_raw_json_from_tool_call = (
|
559
564
|
output_raw_json_from_tool_call
|
560
565
|
)
|
566
|
+
self.summarize_multiple_images = summarize_multiple_images
|
561
567
|
|
562
568
|
# self.short_memory = self.short_memory_init()
|
563
569
|
|
@@ -630,16 +636,20 @@ class Agent:
|
|
630
636
|
)
|
631
637
|
|
632
638
|
self.short_memory.add(
|
633
|
-
role=
|
639
|
+
role=self.agent_name,
|
634
640
|
content=self.tools_list_dictionary,
|
635
641
|
)
|
636
642
|
|
637
643
|
def short_memory_init(self):
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
prompt
|
644
|
+
prompt = ""
|
645
|
+
|
646
|
+
# Add agent name, description, and instructions to the prompt
|
647
|
+
if self.agent_name is not None:
|
648
|
+
prompt += f"\n Name: {self.agent_name}"
|
649
|
+
elif self.agent_description is not None:
|
650
|
+
prompt += f"\n Description: {self.agent_description}"
|
651
|
+
elif self.system_prompt is not None:
|
652
|
+
prompt += f"\n Instructions: {self.system_prompt}"
|
643
653
|
else:
|
644
654
|
prompt = self.system_prompt
|
645
655
|
|
@@ -810,6 +820,29 @@ class Agent:
|
|
810
820
|
|
811
821
|
return json.loads(self.tools_list_dictionary)
|
812
822
|
|
823
|
+
def check_model_supports_utilities(self, img: str = None) -> bool:
|
824
|
+
"""
|
825
|
+
Check if the current model supports vision capabilities.
|
826
|
+
|
827
|
+
Args:
|
828
|
+
img (str, optional): Image input to check vision support for. Defaults to None.
|
829
|
+
|
830
|
+
Returns:
|
831
|
+
bool: True if model supports vision and image is provided, False otherwise.
|
832
|
+
"""
|
833
|
+
from litellm.utils import supports_vision
|
834
|
+
|
835
|
+
# Only check vision support if an image is provided
|
836
|
+
if img is not None:
|
837
|
+
out = supports_vision(self.model_name)
|
838
|
+
if not out:
|
839
|
+
raise ValueError(
|
840
|
+
f"Model {self.model_name} does not support vision capabilities. Please use a vision-enabled model."
|
841
|
+
)
|
842
|
+
return out
|
843
|
+
|
844
|
+
return False
|
845
|
+
|
813
846
|
def check_if_no_prompt_then_autogenerate(self, task: str = None):
|
814
847
|
"""
|
815
848
|
Checks if auto_generate_prompt is enabled and generates a prompt by combining agent name, description and system prompt if available.
|
@@ -931,12 +964,7 @@ class Agent:
|
|
931
964
|
self,
|
932
965
|
task: Optional[Union[str, Any]] = None,
|
933
966
|
img: Optional[str] = None,
|
934
|
-
speech: Optional[str] = None,
|
935
|
-
video: Optional[str] = None,
|
936
|
-
is_last: Optional[bool] = False,
|
937
967
|
print_task: Optional[bool] = False,
|
938
|
-
generate_speech: Optional[bool] = False,
|
939
|
-
correct_answer: Optional[str] = None,
|
940
968
|
*args,
|
941
969
|
**kwargs,
|
942
970
|
) -> Any:
|
@@ -961,9 +989,12 @@ class Agent:
|
|
961
989
|
|
962
990
|
self.check_if_no_prompt_then_autogenerate(task)
|
963
991
|
|
992
|
+
if img is not None:
|
993
|
+
self.check_model_supports_utilities(img=img)
|
994
|
+
|
964
995
|
self.short_memory.add(role=self.user_name, content=task)
|
965
996
|
|
966
|
-
if self.plan_enabled
|
997
|
+
if self.plan_enabled is True:
|
967
998
|
self.plan(task)
|
968
999
|
|
969
1000
|
# Set the loop count
|
@@ -1030,12 +1061,23 @@ class Agent:
|
|
1030
1061
|
)
|
1031
1062
|
self.memory_query(task_prompt)
|
1032
1063
|
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1064
|
+
if img is not None:
|
1065
|
+
response = self.call_llm(
|
1066
|
+
task=task_prompt,
|
1067
|
+
img=img,
|
1068
|
+
current_loop=loop_count,
|
1069
|
+
*args,
|
1070
|
+
**kwargs,
|
1071
|
+
)
|
1072
|
+
else:
|
1073
|
+
response = self.call_llm(
|
1074
|
+
task=task_prompt,
|
1075
|
+
current_loop=loop_count,
|
1076
|
+
*args,
|
1077
|
+
**kwargs,
|
1078
|
+
)
|
1038
1079
|
|
1080
|
+
# Parse the response from the agent with the output type
|
1039
1081
|
if exists(self.tools_list_dictionary):
|
1040
1082
|
if isinstance(response, BaseModel):
|
1041
1083
|
response = response.model_dump()
|
@@ -1053,18 +1095,22 @@ class Agent:
|
|
1053
1095
|
|
1054
1096
|
# Check and execute callable tools
|
1055
1097
|
if exists(self.tools):
|
1056
|
-
|
1057
1098
|
if (
|
1058
1099
|
self.output_raw_json_from_tool_call
|
1059
1100
|
is True
|
1060
1101
|
):
|
1061
|
-
print(type(response))
|
1062
1102
|
response = response
|
1063
1103
|
else:
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1104
|
+
# Only execute tools if response is not None
|
1105
|
+
if response is not None:
|
1106
|
+
self.execute_tools(
|
1107
|
+
response=response,
|
1108
|
+
loop_count=loop_count,
|
1109
|
+
)
|
1110
|
+
else:
|
1111
|
+
logger.warning(
|
1112
|
+
f"LLM returned None response in loop {loop_count}, skipping tool execution"
|
1113
|
+
)
|
1068
1114
|
|
1069
1115
|
# Handle MCP tools
|
1070
1116
|
if (
|
@@ -1072,10 +1118,16 @@ class Agent:
|
|
1072
1118
|
or exists(self.mcp_config)
|
1073
1119
|
or exists(self.mcp_urls)
|
1074
1120
|
):
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1121
|
+
# Only handle MCP tools if response is not None
|
1122
|
+
if response is not None:
|
1123
|
+
self.mcp_tool_handling(
|
1124
|
+
response=response,
|
1125
|
+
current_loop=loop_count,
|
1126
|
+
)
|
1127
|
+
else:
|
1128
|
+
logger.warning(
|
1129
|
+
f"LLM returned None response in loop {loop_count}, skipping MCP tool handling"
|
1130
|
+
)
|
1079
1131
|
|
1080
1132
|
self.sentiment_and_evaluator(response)
|
1081
1133
|
|
@@ -1130,7 +1182,10 @@ class Agent:
|
|
1130
1182
|
user_input.lower()
|
1131
1183
|
== self.custom_exit_command.lower()
|
1132
1184
|
):
|
1133
|
-
|
1185
|
+
self.pretty_print(
|
1186
|
+
"Exiting as per user request.",
|
1187
|
+
loop_count=loop_count,
|
1188
|
+
)
|
1134
1189
|
break
|
1135
1190
|
|
1136
1191
|
self.short_memory.add(
|
@@ -1231,12 +1286,6 @@ class Agent:
|
|
1231
1286
|
self,
|
1232
1287
|
task: Optional[str] = None,
|
1233
1288
|
img: Optional[str] = None,
|
1234
|
-
is_last: bool = False,
|
1235
|
-
device: str = "cpu", # gpu
|
1236
|
-
device_id: int = 1,
|
1237
|
-
all_cores: bool = True,
|
1238
|
-
do_not_use_cluster_ops: bool = True,
|
1239
|
-
all_gpus: bool = False,
|
1240
1289
|
*args,
|
1241
1290
|
**kwargs,
|
1242
1291
|
) -> Any:
|
@@ -1245,10 +1294,6 @@ class Agent:
|
|
1245
1294
|
Args:
|
1246
1295
|
task (Optional[str]): The task to be performed. Defaults to None.
|
1247
1296
|
img (Optional[str]): The image to be processed. Defaults to None.
|
1248
|
-
is_last (bool): Indicates if this is the last task. Defaults to False.
|
1249
|
-
device (str): The device to use for execution. Defaults to "cpu".
|
1250
|
-
device_id (int): The ID of the GPU to use if device is set to "gpu". Defaults to 0.
|
1251
|
-
all_cores (bool): If True, uses all available CPU cores. Defaults to True.
|
1252
1297
|
"""
|
1253
1298
|
try:
|
1254
1299
|
return self.run(
|
@@ -1339,10 +1384,15 @@ class Agent:
|
|
1339
1384
|
# Get the current conversation history
|
1340
1385
|
history = self.short_memory.get_str()
|
1341
1386
|
|
1387
|
+
plan_prompt = f"Create a comprehensive step-by-step plan to complete the following task: \n\n {task}"
|
1388
|
+
|
1342
1389
|
# Construct the planning prompt by combining history, planning prompt, and task
|
1343
|
-
|
1344
|
-
f"{history}\n\n{self.planning_prompt}\n\nTask: {task}"
|
1345
|
-
|
1390
|
+
if exists(self.planning_prompt):
|
1391
|
+
planning_prompt = f"{history}\n\n{self.planning_prompt}\n\nTask: {task}"
|
1392
|
+
else:
|
1393
|
+
planning_prompt = (
|
1394
|
+
f"{history}\n\n{plan_prompt}\n\nTask: {task}"
|
1395
|
+
)
|
1346
1396
|
|
1347
1397
|
# Generate the plan using the LLM
|
1348
1398
|
plan = self.llm.run(task=planning_prompt, *args, **kwargs)
|
@@ -1350,9 +1400,6 @@ class Agent:
|
|
1350
1400
|
# Store the generated plan in short-term memory
|
1351
1401
|
self.short_memory.add(role=self.agent_name, content=plan)
|
1352
1402
|
|
1353
|
-
logger.info(
|
1354
|
-
f"Successfully created plan for task: {task[:50]}..."
|
1355
|
-
)
|
1356
1403
|
return None
|
1357
1404
|
|
1358
1405
|
except Exception as error:
|
@@ -1477,10 +1524,13 @@ class Agent:
|
|
1477
1524
|
f"The model '{self.model_name}' does not support function calling. Please use a model that supports function calling."
|
1478
1525
|
)
|
1479
1526
|
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
|
1527
|
+
try:
|
1528
|
+
if self.max_tokens > get_max_tokens(self.model_name):
|
1529
|
+
raise AgentInitializationError(
|
1530
|
+
f"Max tokens is set to {self.max_tokens}, but the model '{self.model_name}' only supports {get_max_tokens(self.model_name)} tokens. Please set max tokens to {get_max_tokens(self.model_name)} or less."
|
1531
|
+
)
|
1532
|
+
except Exception:
|
1533
|
+
pass
|
1484
1534
|
|
1485
1535
|
if self.model_name not in model_list:
|
1486
1536
|
logger.warning(
|
@@ -2424,7 +2474,12 @@ class Agent:
|
|
2424
2474
|
return None
|
2425
2475
|
|
2426
2476
|
def call_llm(
|
2427
|
-
self,
|
2477
|
+
self,
|
2478
|
+
task: str,
|
2479
|
+
img: Optional[str] = None,
|
2480
|
+
current_loop: int = 0,
|
2481
|
+
*args,
|
2482
|
+
**kwargs,
|
2428
2483
|
) -> str:
|
2429
2484
|
"""
|
2430
2485
|
Calls the appropriate method on the `llm` object based on the given task.
|
@@ -2446,14 +2501,81 @@ class Agent:
|
|
2446
2501
|
"""
|
2447
2502
|
|
2448
2503
|
try:
|
2449
|
-
if
|
2450
|
-
|
2451
|
-
|
2452
|
-
|
2504
|
+
# Set streaming parameter in LLM if streaming is enabled
|
2505
|
+
if self.streaming_on and hasattr(self.llm, "stream"):
|
2506
|
+
original_stream = self.llm.stream
|
2507
|
+
self.llm.stream = True
|
2508
|
+
|
2509
|
+
if img is not None:
|
2510
|
+
streaming_response = self.llm.run(
|
2511
|
+
task=task, img=img, *args, **kwargs
|
2512
|
+
)
|
2513
|
+
else:
|
2514
|
+
streaming_response = self.llm.run(
|
2515
|
+
task=task, *args, **kwargs
|
2516
|
+
)
|
2517
|
+
|
2518
|
+
# If we get a streaming response, handle it with the new streaming panel
|
2519
|
+
if hasattr(
|
2520
|
+
streaming_response, "__iter__"
|
2521
|
+
) and not isinstance(streaming_response, str):
|
2522
|
+
# Check print_on parameter for different streaming behaviors
|
2523
|
+
if self.print_on is False:
|
2524
|
+
# Silent streaming - no printing, just collect chunks
|
2525
|
+
chunks = []
|
2526
|
+
for chunk in streaming_response:
|
2527
|
+
if (
|
2528
|
+
hasattr(chunk, "choices")
|
2529
|
+
and chunk.choices[0].delta.content
|
2530
|
+
):
|
2531
|
+
content = chunk.choices[
|
2532
|
+
0
|
2533
|
+
].delta.content
|
2534
|
+
chunks.append(content)
|
2535
|
+
complete_response = "".join(chunks)
|
2536
|
+
else:
|
2537
|
+
# Collect chunks for conversation saving
|
2538
|
+
collected_chunks = []
|
2539
|
+
|
2540
|
+
def on_chunk_received(chunk: str):
|
2541
|
+
"""Callback to collect chunks as they arrive"""
|
2542
|
+
collected_chunks.append(chunk)
|
2543
|
+
# Optional: Save each chunk to conversation in real-time
|
2544
|
+
# This creates a more detailed conversation history
|
2545
|
+
if self.verbose:
|
2546
|
+
logger.debug(
|
2547
|
+
f"Streaming chunk received: {chunk[:50]}..."
|
2548
|
+
)
|
2549
|
+
|
2550
|
+
# Use the streaming panel to display and collect the response
|
2551
|
+
complete_response = formatter.print_streaming_panel(
|
2552
|
+
streaming_response,
|
2553
|
+
title=f"🤖 Agent: {self.agent_name} Loops: {current_loop}",
|
2554
|
+
style=None, # Use random color like non-streaming approach
|
2555
|
+
collect_chunks=True,
|
2556
|
+
on_chunk_callback=on_chunk_received,
|
2557
|
+
)
|
2558
|
+
|
2559
|
+
# Restore original stream setting
|
2560
|
+
self.llm.stream = original_stream
|
2561
|
+
|
2562
|
+
# Return the complete response for further processing
|
2563
|
+
return complete_response
|
2564
|
+
else:
|
2565
|
+
# Restore original stream setting
|
2566
|
+
self.llm.stream = original_stream
|
2567
|
+
return streaming_response
|
2453
2568
|
else:
|
2454
|
-
|
2569
|
+
# Non-streaming call
|
2570
|
+
if img is not None:
|
2571
|
+
out = self.llm.run(
|
2572
|
+
task=task, img=img, *args, **kwargs
|
2573
|
+
)
|
2574
|
+
else:
|
2575
|
+
out = self.llm.run(task=task, *args, **kwargs)
|
2576
|
+
|
2577
|
+
return out
|
2455
2578
|
|
2456
|
-
return out
|
2457
2579
|
except AgentLLMError as e:
|
2458
2580
|
logger.error(
|
2459
2581
|
f"Error calling LLM: {e}. Task: {task}, Args: {args}, Kwargs: {kwargs}"
|
@@ -2479,7 +2601,8 @@ class Agent:
|
|
2479
2601
|
self,
|
2480
2602
|
task: Optional[Union[str, Any]] = None,
|
2481
2603
|
img: Optional[str] = None,
|
2482
|
-
|
2604
|
+
imgs: Optional[List[str]] = None,
|
2605
|
+
correct_answer: Optional[str] = None,
|
2483
2606
|
*args,
|
2484
2607
|
**kwargs,
|
2485
2608
|
) -> Any:
|
@@ -2493,11 +2616,7 @@ class Agent:
|
|
2493
2616
|
Args:
|
2494
2617
|
task (Optional[str], optional): The task to be executed. Defaults to None.
|
2495
2618
|
img (Optional[str], optional): The image to be processed. Defaults to None.
|
2496
|
-
|
2497
|
-
device_id (int, optional): The ID of the GPU to use if device is set to "gpu". Defaults to 0.
|
2498
|
-
all_cores (bool, optional): If True, uses all available CPU cores. Defaults to True.
|
2499
|
-
scheduled_run_date (Optional[datetime], optional): The date and time to schedule the task. Defaults to None.
|
2500
|
-
do_not_use_cluster_ops (bool, optional): If True, does not use cluster ops. Defaults to False.
|
2619
|
+
imgs (Optional[List[str]], optional): The list of images to be processed. Defaults to None.
|
2501
2620
|
*args: Additional positional arguments to be passed to the execution method.
|
2502
2621
|
**kwargs: Additional keyword arguments to be passed to the execution method.
|
2503
2622
|
|
@@ -2510,21 +2629,28 @@ class Agent:
|
|
2510
2629
|
"""
|
2511
2630
|
|
2512
2631
|
if not isinstance(task, str):
|
2513
|
-
task =
|
2514
|
-
|
2515
|
-
if scheduled_run_date:
|
2516
|
-
while datetime.now() < scheduled_run_date:
|
2517
|
-
time.sleep(
|
2518
|
-
1
|
2519
|
-
) # Sleep for a short period to avoid busy waiting
|
2632
|
+
task = format_data_structure(task)
|
2520
2633
|
|
2521
2634
|
try:
|
2522
|
-
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
|
2527
|
-
|
2635
|
+
if exists(imgs):
|
2636
|
+
output = self.run_multiple_images(
|
2637
|
+
task=task, imgs=imgs, *args, **kwargs
|
2638
|
+
)
|
2639
|
+
elif exists(correct_answer):
|
2640
|
+
output = self.continuous_run_with_answer(
|
2641
|
+
task=task,
|
2642
|
+
img=img,
|
2643
|
+
correct_answer=correct_answer,
|
2644
|
+
*args,
|
2645
|
+
**kwargs,
|
2646
|
+
)
|
2647
|
+
else:
|
2648
|
+
output = self._run(
|
2649
|
+
task=task,
|
2650
|
+
img=img,
|
2651
|
+
*args,
|
2652
|
+
**kwargs,
|
2653
|
+
)
|
2528
2654
|
|
2529
2655
|
return output
|
2530
2656
|
|
@@ -2664,14 +2790,12 @@ class Agent:
|
|
2664
2790
|
return self.role
|
2665
2791
|
|
2666
2792
|
def pretty_print(self, response: str, loop_count: int):
|
2667
|
-
if self.
|
2793
|
+
if self.print_on is False:
|
2668
2794
|
if self.streaming_on is True:
|
2669
|
-
#
|
2670
|
-
|
2671
|
-
|
2672
|
-
|
2673
|
-
)
|
2674
|
-
elif self.no_print is True:
|
2795
|
+
# Skip printing here since real streaming is handled in call_llm
|
2796
|
+
# This avoids double printing when streaming_on=True
|
2797
|
+
pass
|
2798
|
+
elif self.print_on is False:
|
2675
2799
|
pass
|
2676
2800
|
else:
|
2677
2801
|
# logger.info(f"Response: {response}")
|
@@ -2781,7 +2905,7 @@ class Agent:
|
|
2781
2905
|
)
|
2782
2906
|
# tool_response = format_data_structure(tool_response)
|
2783
2907
|
|
2784
|
-
print(f"Multiple MCP Tool Response: {tool_response}")
|
2908
|
+
# print(f"Multiple MCP Tool Response: {tool_response}")
|
2785
2909
|
else:
|
2786
2910
|
raise AgentMCPConnectionError(
|
2787
2911
|
"mcp_url must be either a string URL or MCPConnection object"
|
@@ -2791,7 +2915,7 @@ class Agent:
|
|
2791
2915
|
# execute_tool_call_simple returns a string directly, not an object with content attribute
|
2792
2916
|
text_content = f"MCP Tool Response: \n\n {json.dumps(tool_response, indent=2)}"
|
2793
2917
|
|
2794
|
-
if self.
|
2918
|
+
if self.print_on is False:
|
2795
2919
|
formatter.print_panel(
|
2796
2920
|
text_content,
|
2797
2921
|
"MCP Tool Response: 🛠️",
|
@@ -2834,7 +2958,7 @@ class Agent:
|
|
2834
2958
|
temperature=self.temperature,
|
2835
2959
|
max_tokens=self.max_tokens,
|
2836
2960
|
system_prompt=self.system_prompt,
|
2837
|
-
stream=
|
2961
|
+
stream=False, # Always disable streaming for tool summaries
|
2838
2962
|
tools_list_dictionary=None,
|
2839
2963
|
parallel_tool_calls=False,
|
2840
2964
|
base_url=self.llm_base_url,
|
@@ -2842,6 +2966,13 @@ class Agent:
|
|
2842
2966
|
)
|
2843
2967
|
|
2844
2968
|
def execute_tools(self, response: any, loop_count: int):
|
2969
|
+
# Handle None response gracefully
|
2970
|
+
if response is None:
|
2971
|
+
logger.warning(
|
2972
|
+
f"Cannot execute tools with None response in loop {loop_count}. "
|
2973
|
+
"This may indicate the LLM did not return a valid response."
|
2974
|
+
)
|
2975
|
+
return
|
2845
2976
|
|
2846
2977
|
output = (
|
2847
2978
|
self.tool_struct.execute_function_calls_from_api_response(
|
@@ -2888,3 +3019,134 @@ class Agent:
|
|
2888
3019
|
|
2889
3020
|
def list_output_types(self):
|
2890
3021
|
return OutputType
|
3022
|
+
|
3023
|
+
def run_multiple_images(
|
3024
|
+
self, task: str, imgs: List[str], *args, **kwargs
|
3025
|
+
):
|
3026
|
+
"""
|
3027
|
+
Run the agent with multiple images using concurrent processing.
|
3028
|
+
|
3029
|
+
Args:
|
3030
|
+
task (str): The task to be performed on each image.
|
3031
|
+
imgs (List[str]): List of image paths or URLs to process.
|
3032
|
+
*args: Additional positional arguments to pass to the agent's run method.
|
3033
|
+
**kwargs: Additional keyword arguments to pass to the agent's run method.
|
3034
|
+
|
3035
|
+
Returns:
|
3036
|
+
List[Any]: A list of outputs generated for each image in the same order as the input images.
|
3037
|
+
|
3038
|
+
Examples:
|
3039
|
+
>>> agent = Agent()
|
3040
|
+
>>> outputs = agent.run_multiple_images(
|
3041
|
+
... task="Describe what you see in this image",
|
3042
|
+
... imgs=["image1.jpg", "image2.png", "image3.jpeg"]
|
3043
|
+
... )
|
3044
|
+
>>> print(f"Processed {len(outputs)} images")
|
3045
|
+
Processed 3 images
|
3046
|
+
|
3047
|
+
Raises:
|
3048
|
+
Exception: If an error occurs while processing any of the images.
|
3049
|
+
"""
|
3050
|
+
# Calculate number of workers as 95% of available CPU cores
|
3051
|
+
cpu_count = os.cpu_count()
|
3052
|
+
max_workers = max(1, int(cpu_count * 0.95))
|
3053
|
+
|
3054
|
+
# Use ThreadPoolExecutor for concurrent processing
|
3055
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
3056
|
+
# Submit all image processing tasks
|
3057
|
+
future_to_img = {
|
3058
|
+
executor.submit(
|
3059
|
+
self.run, task=task, img=img, *args, **kwargs
|
3060
|
+
): img
|
3061
|
+
for img in imgs
|
3062
|
+
}
|
3063
|
+
|
3064
|
+
# Collect results in order
|
3065
|
+
outputs = []
|
3066
|
+
for future in future_to_img:
|
3067
|
+
try:
|
3068
|
+
output = future.result()
|
3069
|
+
outputs.append(output)
|
3070
|
+
except Exception as e:
|
3071
|
+
logger.error(f"Error processing image: {e}")
|
3072
|
+
outputs.append(
|
3073
|
+
None
|
3074
|
+
) # or raise the exception based on your preference
|
3075
|
+
|
3076
|
+
# Combine the outputs into a single string if summarization is enabled
|
3077
|
+
if self.summarize_multiple_images is True:
|
3078
|
+
output = "\n".join(outputs)
|
3079
|
+
|
3080
|
+
prompt = f"""
|
3081
|
+
You have already analyzed {len(outputs)} images and provided detailed descriptions for each one.
|
3082
|
+
Now, based on your previous analysis of these images, create a comprehensive report that:
|
3083
|
+
|
3084
|
+
1. Synthesizes the key findings across all images
|
3085
|
+
2. Identifies common themes, patterns, or relationships between the images
|
3086
|
+
3. Provides an overall summary that captures the most important insights
|
3087
|
+
4. Highlights any notable differences or contrasts between the images
|
3088
|
+
|
3089
|
+
Here are your previous analyses of the images:
|
3090
|
+
{output}
|
3091
|
+
|
3092
|
+
Please create a well-structured report that brings together your insights from all {len(outputs)} images.
|
3093
|
+
"""
|
3094
|
+
|
3095
|
+
outputs = self.run(task=prompt, *args, **kwargs)
|
3096
|
+
|
3097
|
+
return outputs
|
3098
|
+
|
3099
|
+
def continuous_run_with_answer(
|
3100
|
+
self,
|
3101
|
+
task: str,
|
3102
|
+
img: Optional[str] = None,
|
3103
|
+
correct_answer: str = None,
|
3104
|
+
max_attempts: int = 10,
|
3105
|
+
):
|
3106
|
+
"""
|
3107
|
+
Run the agent with the task until the correct answer is provided.
|
3108
|
+
|
3109
|
+
Args:
|
3110
|
+
task (str): The task to be performed
|
3111
|
+
correct_answer (str): The correct answer that must be found in the response
|
3112
|
+
max_attempts (int): Maximum number of attempts before giving up (default: 10)
|
3113
|
+
|
3114
|
+
Returns:
|
3115
|
+
str: The response containing the correct answer
|
3116
|
+
|
3117
|
+
Raises:
|
3118
|
+
Exception: If max_attempts is reached without finding the correct answer
|
3119
|
+
"""
|
3120
|
+
attempts = 0
|
3121
|
+
|
3122
|
+
while attempts < max_attempts:
|
3123
|
+
attempts += 1
|
3124
|
+
|
3125
|
+
if self.verbose:
|
3126
|
+
logger.info(
|
3127
|
+
f"Attempt {attempts}/{max_attempts} to find correct answer"
|
3128
|
+
)
|
3129
|
+
|
3130
|
+
response = self._run(task=task, img=img)
|
3131
|
+
|
3132
|
+
# Check if the correct answer is in the response (case-insensitive)
|
3133
|
+
if correct_answer.lower() in response.lower():
|
3134
|
+
if self.verbose:
|
3135
|
+
logger.info(
|
3136
|
+
f"Correct answer found on attempt {attempts}"
|
3137
|
+
)
|
3138
|
+
return response
|
3139
|
+
else:
|
3140
|
+
# Add feedback to help guide the agent
|
3141
|
+
feedback = "Your previous response was incorrect. Think carefully about the question and ensure your response directly addresses what was asked."
|
3142
|
+
self.short_memory.add(role="User", content=feedback)
|
3143
|
+
|
3144
|
+
if self.verbose:
|
3145
|
+
logger.info(
|
3146
|
+
f"Correct answer not found. Expected: '{correct_answer}'"
|
3147
|
+
)
|
3148
|
+
|
3149
|
+
# If we reach here, we've exceeded max_attempts
|
3150
|
+
raise Exception(
|
3151
|
+
f"Failed to find correct answer '{correct_answer}' after {max_attempts} attempts"
|
3152
|
+
)
|