swarms 7.7.7__py3-none-any.whl → 7.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarms/__init__.py +0 -1
- swarms/agents/cort_agent.py +206 -0
- swarms/agents/react_agent.py +173 -0
- swarms/communication/base_communication.py +290 -0
- swarms/communication/duckdb_wrap.py +369 -72
- swarms/communication/pulsar_struct.py +691 -0
- swarms/communication/redis_wrap.py +1362 -0
- swarms/communication/sqlite_wrap.py +547 -44
- swarms/prompts/safety_prompt.py +50 -0
- swarms/structs/agent.py +13 -8
- swarms/structs/concurrent_workflow.py +56 -242
- swarms/structs/conversation.py +228 -38
- swarms/structs/council_judge.py +456 -0
- swarms/structs/deep_research_swarm.py +19 -22
- swarms/structs/malt.py +30 -28
- swarms/structs/multi_model_gpu_manager.py +1 -1
- swarms/structs/output_types.py +1 -1
- swarms/structs/swarm_router.py +2 -2
- swarms/tools/mcp_client.py +1 -1
- swarms/tools/py_func_to_openai_func_str.py +2 -2
- swarms/utils/history_output_formatter.py +5 -5
- swarms/utils/try_except_wrapper.py +2 -2
- swarms/utils/xml_utils.py +42 -0
- {swarms-7.7.7.dist-info → swarms-7.7.9.dist-info}/METADATA +4 -3
- {swarms-7.7.7.dist-info → swarms-7.7.9.dist-info}/RECORD +28 -22
- {swarms-7.7.7.dist-info → swarms-7.7.9.dist-info}/WHEEL +1 -1
- swarms/client/__init__.py +0 -15
- swarms/client/main.py +0 -407
- {swarms-7.7.7.dist-info → swarms-7.7.9.dist-info}/LICENSE +0 -0
- {swarms-7.7.7.dist-info → swarms-7.7.9.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,456 @@
|
|
1
|
+
from typing import Dict, Tuple
|
2
|
+
from functools import lru_cache
|
3
|
+
import multiprocessing
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
|
+
from swarms.structs.agent import Agent
|
6
|
+
from swarms.structs.conversation import Conversation
|
7
|
+
from loguru import logger
|
8
|
+
|
9
|
+
|
10
|
+
class EvaluationError(Exception):
|
11
|
+
"""Base exception for evaluation-related errors."""
|
12
|
+
|
13
|
+
pass
|
14
|
+
|
15
|
+
|
16
|
+
class DimensionEvaluationError(EvaluationError):
|
17
|
+
"""Exception raised when a specific dimension evaluation fails."""
|
18
|
+
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
class AggregationError(EvaluationError):
|
23
|
+
"""Exception raised when aggregation of evaluations fails."""
|
24
|
+
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
# Define evaluation dimensions and their evaluation goals
|
29
|
+
EVAL_DIMENSIONS: Dict[str, str] = {
|
30
|
+
"accuracy": """Evaluate the factual correctness of the model's response with high precision:
|
31
|
+
1. Verify all claims against authoritative sources
|
32
|
+
2. Check for outdated information or temporal inconsistencies
|
33
|
+
3. Identify any technical inaccuracies or misrepresentations
|
34
|
+
4. Flag unsupported assertions or speculative statements
|
35
|
+
5. Note any contradictions within the response itself
|
36
|
+
6. Assess the reliability of cited sources or references""",
|
37
|
+
"helpfulness": """Assess the practical utility and problem-solving effectiveness:
|
38
|
+
1. Evaluate if the response directly addresses the user's core question
|
39
|
+
2. Check if the solution is actionable and implementable
|
40
|
+
3. Assess whether the response provides necessary context and prerequisites
|
41
|
+
4. Verify if the response anticipates and addresses potential follow-up questions
|
42
|
+
5. Evaluate if the response includes relevant examples or analogies
|
43
|
+
6. Check if the response provides appropriate warnings or limitations""",
|
44
|
+
"harmlessness": """Evaluate the safety and ethical considerations:
|
45
|
+
1. Check for any harmful stereotypes or biases
|
46
|
+
2. Identify potential misuse or dangerous applications
|
47
|
+
3. Assess if the response promotes unsafe practices
|
48
|
+
4. Evaluate the appropriateness of the content for different audiences
|
49
|
+
5. Check for any offensive language or insensitive content
|
50
|
+
6. Assess if the response includes appropriate disclaimers""",
|
51
|
+
"coherence": """Analyze the structural and logical quality:
|
52
|
+
1. Evaluate the organization and flow of information
|
53
|
+
2. Check for clear topic sentences and transitions
|
54
|
+
3. Assess the consistency of terminology and definitions
|
55
|
+
4. Verify logical progression of ideas
|
56
|
+
5. Check for clear paragraph structure
|
57
|
+
6. Evaluate the use of examples and supporting evidence""",
|
58
|
+
"conciseness": """Assess the efficiency and precision of communication:
|
59
|
+
1. Identify redundant or repetitive information
|
60
|
+
2. Check for unnecessary qualifiers or hedges
|
61
|
+
3. Evaluate if the response could be more direct
|
62
|
+
4. Assess the balance between detail and brevity
|
63
|
+
5. Check for filler content or unnecessary context
|
64
|
+
6. Evaluate if the response stays focused on key points""",
|
65
|
+
"instruction_adherence": """Evaluate alignment with user requirements:
|
66
|
+
1. Check if all aspects of the prompt are addressed
|
67
|
+
2. Verify if the response stays within specified constraints
|
68
|
+
3. Assess if the format matches requested output type
|
69
|
+
4. Check if the response maintains appropriate scope
|
70
|
+
5. Verify if the response follows any specific guidelines
|
71
|
+
6. Assess if the response meets implicit expectations""",
|
72
|
+
}
|
73
|
+
|
74
|
+
|
75
|
+
@lru_cache(maxsize=128)
|
76
|
+
def judge_system_prompt() -> str:
|
77
|
+
"""
|
78
|
+
Returns the system prompt for judge agents.
|
79
|
+
Cached to avoid repeated string creation.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
str: The system prompt for judge agents
|
83
|
+
"""
|
84
|
+
return """You are an expert AI evaluator with deep expertise in language model output analysis and quality assessment. Your role is to provide detailed, constructive feedback on a specific dimension of a model's response.
|
85
|
+
|
86
|
+
Key Responsibilities:
|
87
|
+
1. Provide granular, specific feedback rather than general observations
|
88
|
+
2. Reference exact phrases, sentences, or sections that demonstrate strengths or weaknesses
|
89
|
+
3. Explain the impact of identified issues on the overall response quality
|
90
|
+
4. Suggest specific improvements with concrete examples
|
91
|
+
5. Maintain a professional, constructive tone throughout
|
92
|
+
6. Focus exclusively on your assigned evaluation dimension
|
93
|
+
|
94
|
+
Your feedback should be detailed enough that a developer could:
|
95
|
+
- Understand exactly what aspects need improvement
|
96
|
+
- Implement specific changes to enhance the response
|
97
|
+
- Measure the impact of those changes
|
98
|
+
- Replicate your evaluation criteria
|
99
|
+
|
100
|
+
Remember: You are writing for a technical team focused on LLM behavior analysis and model improvement."""
|
101
|
+
|
102
|
+
|
103
|
+
@lru_cache(maxsize=128)
|
104
|
+
def build_judge_prompt(
|
105
|
+
dimension_name: str, user_prompt: str, model_response: str
|
106
|
+
) -> str:
|
107
|
+
"""
|
108
|
+
Builds a prompt for evaluating a specific dimension.
|
109
|
+
Cached to avoid repeated string creation for same inputs.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
dimension_name (str): Name of the evaluation dimension
|
113
|
+
user_prompt (str): The original user prompt
|
114
|
+
model_response (str): The model's response to evaluate
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
str: The formatted evaluation prompt
|
118
|
+
|
119
|
+
Raises:
|
120
|
+
KeyError: If dimension_name is not in EVAL_DIMENSIONS
|
121
|
+
"""
|
122
|
+
if dimension_name not in EVAL_DIMENSIONS:
|
123
|
+
raise KeyError(
|
124
|
+
f"Unknown evaluation dimension: {dimension_name}"
|
125
|
+
)
|
126
|
+
|
127
|
+
evaluation_focus = EVAL_DIMENSIONS[dimension_name]
|
128
|
+
return f"""## Evaluation Dimension: {dimension_name.upper()}
|
129
|
+
|
130
|
+
{evaluation_focus}
|
131
|
+
|
132
|
+
Your task is to provide a detailed, technical analysis of the model response focusing exclusively on the {dimension_name} dimension.
|
133
|
+
|
134
|
+
Guidelines:
|
135
|
+
1. Be specific and reference exact parts of the response
|
136
|
+
2. Explain the reasoning behind your observations
|
137
|
+
3. Provide concrete examples of both strengths and weaknesses
|
138
|
+
4. Suggest specific improvements where applicable
|
139
|
+
5. Maintain a technical, analytical tone
|
140
|
+
|
141
|
+
--- BEGIN USER PROMPT ---
|
142
|
+
{user_prompt}
|
143
|
+
--- END USER PROMPT ---
|
144
|
+
|
145
|
+
--- BEGIN MODEL RESPONSE ---
|
146
|
+
{model_response}
|
147
|
+
--- END MODEL RESPONSE ---
|
148
|
+
|
149
|
+
### Technical Analysis ({dimension_name.upper()} Dimension):
|
150
|
+
Provide a comprehensive analysis that would be valuable for model improvement."""
|
151
|
+
|
152
|
+
|
153
|
+
@lru_cache(maxsize=128)
|
154
|
+
def aggregator_system_prompt() -> str:
|
155
|
+
"""
|
156
|
+
Returns the system prompt for the aggregator agent.
|
157
|
+
Cached to avoid repeated string creation.
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
str: The system prompt for the aggregator agent
|
161
|
+
"""
|
162
|
+
return """You are a senior AI evaluator responsible for synthesizing detailed technical feedback across multiple evaluation dimensions. Your role is to create a comprehensive analysis report that helps the development team understand and improve the model's performance.
|
163
|
+
|
164
|
+
Key Responsibilities:
|
165
|
+
1. Identify patterns and correlations across different dimensions
|
166
|
+
2. Highlight critical issues that affect multiple aspects of the response
|
167
|
+
3. Prioritize feedback based on impact and severity
|
168
|
+
4. Provide actionable recommendations for improvement
|
169
|
+
5. Maintain technical precision while ensuring clarity
|
170
|
+
|
171
|
+
Your report should be structured as follows:
|
172
|
+
1. Executive Summary
|
173
|
+
- Key strengths and weaknesses
|
174
|
+
- Critical issues requiring immediate attention
|
175
|
+
- Overall assessment
|
176
|
+
|
177
|
+
2. Detailed Analysis
|
178
|
+
- Cross-dimensional patterns
|
179
|
+
- Specific examples and their implications
|
180
|
+
- Technical impact assessment
|
181
|
+
|
182
|
+
3. Recommendations
|
183
|
+
- Prioritized improvement areas
|
184
|
+
- Specific technical suggestions
|
185
|
+
- Implementation considerations
|
186
|
+
|
187
|
+
Focus on synthesizing the input feedback without adding new analysis."""
|
188
|
+
|
189
|
+
|
190
|
+
def build_aggregation_prompt(rationales: Dict[str, str]) -> str:
|
191
|
+
"""
|
192
|
+
Builds the prompt for aggregating evaluation results.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
rationales (Dict[str, str]): Dictionary mapping dimension names to their evaluation results
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
str: The formatted aggregation prompt
|
199
|
+
"""
|
200
|
+
aggregation_input = "### MULTI-DIMENSION TECHNICAL ANALYSIS:\n"
|
201
|
+
for dim, text in rationales.items():
|
202
|
+
aggregation_input += (
|
203
|
+
f"\n--- {dim.upper()} ANALYSIS ---\n{text.strip()}\n"
|
204
|
+
)
|
205
|
+
aggregation_input += "\n### COMPREHENSIVE TECHNICAL REPORT:\n"
|
206
|
+
return aggregation_input
|
207
|
+
|
208
|
+
|
209
|
+
class CouncilAsAJudge:
|
210
|
+
"""
|
211
|
+
A council of AI agents that evaluates model responses across multiple dimensions.
|
212
|
+
|
213
|
+
This class implements a parallel evaluation system where multiple specialized agents
|
214
|
+
evaluate different aspects of a model's response, and their findings are aggregated
|
215
|
+
into a comprehensive report.
|
216
|
+
|
217
|
+
Attributes:
|
218
|
+
id (str): Unique identifier for the council
|
219
|
+
name (str): Display name of the council
|
220
|
+
description (str): Description of the council's purpose
|
221
|
+
model_name (str): Name of the model to use for evaluations
|
222
|
+
output_type (str): Type of output to return
|
223
|
+
judge_agents (Dict[str, Agent]): Dictionary of dimension-specific judge agents
|
224
|
+
aggregator_agent (Agent): Agent responsible for aggregating evaluations
|
225
|
+
conversation (Conversation): Conversation history tracker
|
226
|
+
max_workers (int): Maximum number of worker threads for parallel execution
|
227
|
+
"""
|
228
|
+
|
229
|
+
def __init__(
|
230
|
+
self,
|
231
|
+
id: str = "CouncilAsAJudge",
|
232
|
+
name: str = "CouncilAsAJudge",
|
233
|
+
description: str = "Evaluates the model's response across multiple dimensions",
|
234
|
+
model_name: str = "gpt-4o-mini",
|
235
|
+
output_type: str = "string",
|
236
|
+
cache_size: int = 128,
|
237
|
+
):
|
238
|
+
"""
|
239
|
+
Initialize the CouncilAsAJudge.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
id (str): Unique identifier for the council
|
243
|
+
name (str): Display name of the council
|
244
|
+
description (str): Description of the council's purpose
|
245
|
+
model_name (str): Name of the model to use for evaluations
|
246
|
+
output_type (str): Type of output to return
|
247
|
+
cache_size (int): Size of the LRU cache for prompts
|
248
|
+
"""
|
249
|
+
self.id = id
|
250
|
+
self.name = name
|
251
|
+
self.description = description
|
252
|
+
self.model_name = model_name
|
253
|
+
self.output_type = output_type
|
254
|
+
self.judge_agents = self._create_judges()
|
255
|
+
self.aggregator_agent = self._create_aggregator()
|
256
|
+
self.conversation = Conversation()
|
257
|
+
|
258
|
+
# Calculate optimal number of workers (75% of available CPU cores)
|
259
|
+
total_cores = multiprocessing.cpu_count()
|
260
|
+
self.max_workers = max(1, int(total_cores * 0.75))
|
261
|
+
logger.info(
|
262
|
+
f"Using {self.max_workers} worker threads out of {total_cores} CPU cores"
|
263
|
+
)
|
264
|
+
|
265
|
+
# Configure caching
|
266
|
+
self._configure_caching(cache_size)
|
267
|
+
|
268
|
+
def _configure_caching(self, cache_size: int) -> None:
|
269
|
+
"""
|
270
|
+
Configure caching for frequently used functions.
|
271
|
+
|
272
|
+
Args:
|
273
|
+
cache_size (int): Size of the LRU cache
|
274
|
+
"""
|
275
|
+
# Update cache sizes for cached functions
|
276
|
+
judge_system_prompt.cache_info = (
|
277
|
+
lambda: None
|
278
|
+
) # Reset cache info
|
279
|
+
build_judge_prompt.cache_info = lambda: None
|
280
|
+
aggregator_system_prompt.cache_info = lambda: None
|
281
|
+
|
282
|
+
# Set new cache sizes
|
283
|
+
judge_system_prompt.__wrapped__.__wrapped__ = lru_cache(
|
284
|
+
maxsize=cache_size
|
285
|
+
)(judge_system_prompt.__wrapped__)
|
286
|
+
build_judge_prompt.__wrapped__.__wrapped__ = lru_cache(
|
287
|
+
maxsize=cache_size
|
288
|
+
)(build_judge_prompt.__wrapped__)
|
289
|
+
aggregator_system_prompt.__wrapped__.__wrapped__ = lru_cache(
|
290
|
+
maxsize=cache_size
|
291
|
+
)(aggregator_system_prompt.__wrapped__)
|
292
|
+
|
293
|
+
def _create_judges(self) -> Dict[str, Agent]:
|
294
|
+
"""
|
295
|
+
Create judge agents for each evaluation dimension.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Dict[str, Agent]: Dictionary mapping dimension names to judge agents
|
299
|
+
|
300
|
+
Raises:
|
301
|
+
RuntimeError: If agent creation fails
|
302
|
+
"""
|
303
|
+
try:
|
304
|
+
return {
|
305
|
+
dim: Agent(
|
306
|
+
agent_name=f"{dim}_judge",
|
307
|
+
system_prompt=judge_system_prompt(),
|
308
|
+
model_name=self.model_name,
|
309
|
+
max_loops=1,
|
310
|
+
autosave=False,
|
311
|
+
dashboard=False,
|
312
|
+
verbose=False,
|
313
|
+
dynamic_temperature_enabled=True,
|
314
|
+
)
|
315
|
+
for dim in EVAL_DIMENSIONS
|
316
|
+
}
|
317
|
+
except Exception as e:
|
318
|
+
raise RuntimeError(
|
319
|
+
f"Failed to create judge agents: {str(e)}"
|
320
|
+
)
|
321
|
+
|
322
|
+
def _create_aggregator(self) -> Agent:
|
323
|
+
"""
|
324
|
+
Create the aggregator agent.
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
Agent: The aggregator agent
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
RuntimeError: If agent creation fails
|
331
|
+
"""
|
332
|
+
try:
|
333
|
+
return Agent(
|
334
|
+
agent_name="aggregator_agent",
|
335
|
+
system_prompt=aggregator_system_prompt(),
|
336
|
+
model_name=self.model_name,
|
337
|
+
max_loops=1,
|
338
|
+
autosave=False,
|
339
|
+
dashboard=False,
|
340
|
+
verbose=False,
|
341
|
+
dynamic_temperature_enabled=True,
|
342
|
+
)
|
343
|
+
except Exception as e:
|
344
|
+
raise RuntimeError(
|
345
|
+
f"Failed to create aggregator agent: {str(e)}"
|
346
|
+
)
|
347
|
+
|
348
|
+
def _evaluate_dimension(
|
349
|
+
self,
|
350
|
+
dim: str,
|
351
|
+
agent: Agent,
|
352
|
+
user_prompt: str,
|
353
|
+
model_response: str,
|
354
|
+
) -> Tuple[str, str]:
|
355
|
+
"""
|
356
|
+
Evaluate a single dimension of the model response.
|
357
|
+
|
358
|
+
Args:
|
359
|
+
dim (str): Dimension to evaluate
|
360
|
+
agent (Agent): Judge agent for this dimension
|
361
|
+
user_prompt (str): Original user prompt
|
362
|
+
model_response (str): Model's response to evaluate
|
363
|
+
|
364
|
+
Returns:
|
365
|
+
Tuple[str, str]: Tuple of (dimension name, evaluation result)
|
366
|
+
|
367
|
+
Raises:
|
368
|
+
DimensionEvaluationError: If evaluation fails
|
369
|
+
"""
|
370
|
+
try:
|
371
|
+
prompt = build_judge_prompt(
|
372
|
+
dim, user_prompt, model_response
|
373
|
+
)
|
374
|
+
result = agent.run(prompt)
|
375
|
+
|
376
|
+
self.conversation.add(
|
377
|
+
role=agent.agent_name,
|
378
|
+
content=result,
|
379
|
+
)
|
380
|
+
|
381
|
+
return dim, result.strip()
|
382
|
+
except Exception as e:
|
383
|
+
raise DimensionEvaluationError(
|
384
|
+
f"Failed to evaluate dimension {dim}: {str(e)}"
|
385
|
+
)
|
386
|
+
|
387
|
+
def run(self, task: str, model_response: str) -> None:
|
388
|
+
"""
|
389
|
+
Run the evaluation process using ThreadPoolExecutor.
|
390
|
+
|
391
|
+
Args:
|
392
|
+
task (str): Original user prompt
|
393
|
+
model_response (str): Model's response to evaluate
|
394
|
+
|
395
|
+
Raises:
|
396
|
+
EvaluationError: If evaluation process fails
|
397
|
+
"""
|
398
|
+
logger.info(
|
399
|
+
f"🧠 Running CouncilAsAJudge in parallel mode with {self.max_workers} workers...\n"
|
400
|
+
)
|
401
|
+
|
402
|
+
try:
|
403
|
+
# Create tasks for all dimensions
|
404
|
+
tasks = [
|
405
|
+
(dim, agent, task, model_response)
|
406
|
+
for dim, agent in self.judge_agents.items()
|
407
|
+
]
|
408
|
+
|
409
|
+
# Run evaluations in parallel using ThreadPoolExecutor
|
410
|
+
with ThreadPoolExecutor(
|
411
|
+
max_workers=self.max_workers
|
412
|
+
) as executor:
|
413
|
+
# Submit all tasks
|
414
|
+
future_to_dim = {
|
415
|
+
executor.submit(
|
416
|
+
self._evaluate_dimension,
|
417
|
+
dim,
|
418
|
+
agent,
|
419
|
+
task,
|
420
|
+
model_response,
|
421
|
+
): dim
|
422
|
+
for dim, agent, _, _ in tasks
|
423
|
+
}
|
424
|
+
|
425
|
+
# Collect results as they complete
|
426
|
+
all_rationales = {}
|
427
|
+
for future in as_completed(future_to_dim):
|
428
|
+
try:
|
429
|
+
dim, result = future.result()
|
430
|
+
all_rationales[dim] = result
|
431
|
+
except Exception as e:
|
432
|
+
dim = future_to_dim[future]
|
433
|
+
logger.error(
|
434
|
+
f"Task for dimension {dim} failed: {str(e)}"
|
435
|
+
)
|
436
|
+
raise DimensionEvaluationError(
|
437
|
+
f"Failed to evaluate dimension {dim}: {str(e)}"
|
438
|
+
)
|
439
|
+
|
440
|
+
# Generate final report
|
441
|
+
aggregation_prompt = build_aggregation_prompt(
|
442
|
+
all_rationales
|
443
|
+
)
|
444
|
+
final_report = self.aggregator_agent.run(
|
445
|
+
aggregation_prompt
|
446
|
+
)
|
447
|
+
|
448
|
+
self.conversation.add(
|
449
|
+
role=self.aggregator_agent.agent_name,
|
450
|
+
content=final_report,
|
451
|
+
)
|
452
|
+
|
453
|
+
except Exception as e:
|
454
|
+
raise EvaluationError(
|
455
|
+
f"Evaluation process failed: {str(e)}"
|
456
|
+
)
|
@@ -271,28 +271,11 @@ OUTPUT REQUIREMENTS:
|
|
271
271
|
Remember: Your goal is to make complex information accessible while maintaining accuracy and depth. Prioritize clarity without sacrificing important nuance or detail."""
|
272
272
|
|
273
273
|
|
274
|
-
# Initialize the research agent
|
275
|
-
research_agent = Agent(
|
276
|
-
agent_name="Deep-Research-Agent",
|
277
|
-
agent_description="Specialized agent for conducting comprehensive research across multiple domains",
|
278
|
-
system_prompt=RESEARCH_AGENT_PROMPT,
|
279
|
-
max_loops=1, # Allow multiple iterations for thorough research
|
280
|
-
tools_list_dictionary=tools,
|
281
|
-
model_name="gpt-4o-mini",
|
282
|
-
)
|
283
|
-
|
284
|
-
|
285
|
-
reasoning_duo = ReasoningDuo(
|
286
|
-
system_prompt=SUMMARIZATION_AGENT_PROMPT, output_type="string"
|
287
|
-
)
|
288
|
-
|
289
|
-
|
290
274
|
class DeepResearchSwarm:
|
291
275
|
def __init__(
|
292
276
|
self,
|
293
277
|
name: str = "DeepResearchSwarm",
|
294
278
|
description: str = "A swarm that conducts comprehensive research across multiple domains",
|
295
|
-
research_agent: Agent = research_agent,
|
296
279
|
max_loops: int = 1,
|
297
280
|
nice_print: bool = True,
|
298
281
|
output_type: str = "json",
|
@@ -303,7 +286,6 @@ class DeepResearchSwarm:
|
|
303
286
|
):
|
304
287
|
self.name = name
|
305
288
|
self.description = description
|
306
|
-
self.research_agent = research_agent
|
307
289
|
self.max_loops = max_loops
|
308
290
|
self.nice_print = nice_print
|
309
291
|
self.output_type = output_type
|
@@ -319,6 +301,21 @@ class DeepResearchSwarm:
|
|
319
301
|
max_workers=self.max_workers
|
320
302
|
)
|
321
303
|
|
304
|
+
# Initialize the research agent
|
305
|
+
self.research_agent = Agent(
|
306
|
+
agent_name="Deep-Research-Agent",
|
307
|
+
agent_description="Specialized agent for conducting comprehensive research across multiple domains",
|
308
|
+
system_prompt=RESEARCH_AGENT_PROMPT,
|
309
|
+
max_loops=1, # Allow multiple iterations for thorough research
|
310
|
+
tools_list_dictionary=tools,
|
311
|
+
model_name="gpt-4o-mini",
|
312
|
+
)
|
313
|
+
|
314
|
+
self.reasoning_duo = ReasoningDuo(
|
315
|
+
system_prompt=SUMMARIZATION_AGENT_PROMPT,
|
316
|
+
output_type="string",
|
317
|
+
)
|
318
|
+
|
322
319
|
def __del__(self):
|
323
320
|
"""Clean up the executor on object destruction"""
|
324
321
|
self.executor.shutdown(wait=False)
|
@@ -388,7 +385,7 @@ class DeepResearchSwarm:
|
|
388
385
|
results = exa_search(query)
|
389
386
|
|
390
387
|
# Run the reasoning on the search results
|
391
|
-
reasoning_output = reasoning_duo.run(results)
|
388
|
+
reasoning_output = self.reasoning_duo.run(results)
|
392
389
|
|
393
390
|
return (results, reasoning_output)
|
394
391
|
|
@@ -426,7 +423,7 @@ class DeepResearchSwarm:
|
|
426
423
|
|
427
424
|
# Add reasoning output to conversation
|
428
425
|
self.conversation.add(
|
429
|
-
role=reasoning_duo.agent_name,
|
426
|
+
role=self.reasoning_duo.agent_name,
|
430
427
|
content=reasoning_output,
|
431
428
|
)
|
432
429
|
except Exception as e:
|
@@ -438,12 +435,12 @@ class DeepResearchSwarm:
|
|
438
435
|
|
439
436
|
# Once all query processing is complete, generate the final summary
|
440
437
|
# This step runs after all queries to ensure it summarizes all results
|
441
|
-
final_summary = reasoning_duo.run(
|
438
|
+
final_summary = self.reasoning_duo.run(
|
442
439
|
f"Generate an extensive report of the following content: {self.conversation.get_str()}"
|
443
440
|
)
|
444
441
|
|
445
442
|
self.conversation.add(
|
446
|
-
role=reasoning_duo.agent_name,
|
443
|
+
role=self.reasoning_duo.agent_name,
|
447
444
|
content=final_summary,
|
448
445
|
)
|
449
446
|
|
swarms/structs/malt.py
CHANGED
@@ -58,12 +58,6 @@ You are a world-renowned mathematician with an extensive background in multiple
|
|
58
58
|
Your response should be as comprehensive as possible, leaving no room for ambiguity, and it should reflect your mastery in constructing original mathematical arguments.
|
59
59
|
"""
|
60
60
|
|
61
|
-
proof_creator_agent = Agent(
|
62
|
-
agent_name="Proof-Creator-Agent",
|
63
|
-
model_name="gpt-4o-mini",
|
64
|
-
max_loops=1,
|
65
|
-
system_prompt=proof_creator_prompt,
|
66
|
-
)
|
67
61
|
|
68
62
|
# Agent 2: Proof Verifier Agent
|
69
63
|
proof_verifier_prompt = """
|
@@ -92,12 +86,6 @@ You are an esteemed mathematician and veteran academic known for your precise an
|
|
92
86
|
Your review must be exhaustive, ensuring that even the most subtle aspects of the proof are scrutinized in depth.
|
93
87
|
"""
|
94
88
|
|
95
|
-
proof_verifier_agent = Agent(
|
96
|
-
agent_name="Proof-Verifier-Agent",
|
97
|
-
model_name="gpt-4o-mini",
|
98
|
-
max_loops=1,
|
99
|
-
system_prompt=proof_verifier_prompt,
|
100
|
-
)
|
101
89
|
|
102
90
|
# Agent 3: Proof Refiner Agent
|
103
91
|
proof_refiner_prompt = """
|
@@ -126,13 +114,6 @@ You are an expert in mathematical exposition and refinement with decades of expe
|
|
126
114
|
Your refined proof should be a masterpiece of mathematical writing, addressing all the feedback with detailed revisions and explanations.
|
127
115
|
"""
|
128
116
|
|
129
|
-
proof_refiner_agent = Agent(
|
130
|
-
agent_name="Proof-Refiner-Agent",
|
131
|
-
model_name="gpt-4o-mini",
|
132
|
-
max_loops=1,
|
133
|
-
system_prompt=proof_refiner_prompt,
|
134
|
-
)
|
135
|
-
|
136
117
|
|
137
118
|
majority_voting_prompt = """
|
138
119
|
Engage in a comprehensive and exhaustive majority voting analysis of the following conversation, ensuring a deep and thoughtful examination of the responses provided by each agent. This analysis should not only summarize the responses but also critically engage with the content, context, and implications of each agent's input.
|
@@ -160,13 +141,6 @@ Please adhere to the following detailed guidelines:
|
|
160
141
|
Throughout your analysis, focus on uncovering clear patterns while being attentive to the subtleties and complexities inherent in the responses. Pay particular attention to the nuances of mathematical contexts where algorithmic thinking may be required, ensuring that your examination is both rigorous and accessible to a diverse audience.
|
161
142
|
"""
|
162
143
|
|
163
|
-
majority_voting_agent = Agent(
|
164
|
-
agent_name="Majority-Voting-Agent",
|
165
|
-
model_name="gpt-4o-mini",
|
166
|
-
max_loops=1,
|
167
|
-
system_prompt=majority_voting_prompt,
|
168
|
-
)
|
169
|
-
|
170
144
|
|
171
145
|
class MALT:
|
172
146
|
"""
|
@@ -210,6 +184,34 @@ class MALT:
|
|
210
184
|
self.conversation = Conversation()
|
211
185
|
logger.debug("Conversation initialized.")
|
212
186
|
|
187
|
+
proof_refiner_agent = Agent(
|
188
|
+
agent_name="Proof-Refiner-Agent",
|
189
|
+
model_name="gpt-4o-mini",
|
190
|
+
max_loops=1,
|
191
|
+
system_prompt=proof_refiner_prompt,
|
192
|
+
)
|
193
|
+
|
194
|
+
proof_verifier_agent = Agent(
|
195
|
+
agent_name="Proof-Verifier-Agent",
|
196
|
+
model_name="gpt-4o-mini",
|
197
|
+
max_loops=1,
|
198
|
+
system_prompt=proof_verifier_prompt,
|
199
|
+
)
|
200
|
+
|
201
|
+
majority_voting_agent = Agent(
|
202
|
+
agent_name="Majority-Voting-Agent",
|
203
|
+
model_name="gpt-4o-mini",
|
204
|
+
max_loops=1,
|
205
|
+
system_prompt=majority_voting_prompt,
|
206
|
+
)
|
207
|
+
|
208
|
+
proof_creator_agent = Agent(
|
209
|
+
agent_name="Proof-Creator-Agent",
|
210
|
+
model_name="gpt-4o-mini",
|
211
|
+
max_loops=1,
|
212
|
+
system_prompt=proof_creator_prompt,
|
213
|
+
)
|
214
|
+
|
213
215
|
if preset_agents:
|
214
216
|
self.main_agent = proof_creator_agent
|
215
217
|
self.refiner_agent = proof_refiner_agent
|
@@ -304,12 +306,12 @@ class MALT:
|
|
304
306
|
######################### MAJORITY VOTING #########################
|
305
307
|
|
306
308
|
# Majority Voting on the verified outputs
|
307
|
-
majority_voting_verified = majority_voting_agent.run(
|
309
|
+
majority_voting_verified = self.majority_voting_agent.run(
|
308
310
|
task=any_to_str(verified_outputs),
|
309
311
|
)
|
310
312
|
|
311
313
|
self.conversation.add(
|
312
|
-
role=majority_voting_agent.agent_name,
|
314
|
+
role=self.majority_voting_agent.agent_name,
|
313
315
|
content=majority_voting_verified,
|
314
316
|
)
|
315
317
|
|
swarms/structs/output_types.py
CHANGED
swarms/structs/swarm_router.py
CHANGED
@@ -121,7 +121,7 @@ class SwarmRouter:
|
|
121
121
|
shared_memory_system (Any, optional): Shared memory system for agents. Defaults to None.
|
122
122
|
rules (str, optional): Rules to inject into every agent. Defaults to None.
|
123
123
|
documents (List[str], optional): List of document file paths to use. Defaults to empty list.
|
124
|
-
output_type (str, optional): Output format type. Defaults to "string".
|
124
|
+
output_type (str, optional): Output format type. Defaults to "string". Supported: 'str', 'string', 'list', 'json', 'dict', 'yaml', 'xml'.
|
125
125
|
|
126
126
|
Attributes:
|
127
127
|
name (str): Name identifier for the SwarmRouter instance
|
@@ -136,7 +136,7 @@ class SwarmRouter:
|
|
136
136
|
shared_memory_system (Any): Shared memory system for agents
|
137
137
|
rules (str): Rules injected into every agent
|
138
138
|
documents (List[str]): List of document file paths
|
139
|
-
output_type (str): Output format type
|
139
|
+
output_type (str): Output format type. Supported: 'str', 'string', 'list', 'json', 'dict', 'yaml', 'xml'.
|
140
140
|
logs (List[SwarmLog]): List of execution logs
|
141
141
|
swarm: The instantiated swarm object
|
142
142
|
|
swarms/tools/mcp_client.py
CHANGED