swarms 7.6.0__py3-none-any.whl → 7.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarms/agents/__init__.py +9 -2
- swarms/agents/agent_judge.py +119 -0
- swarms/agents/flexion_agent.py +625 -0
- swarms/agents/gkp_agent.py +581 -0
- swarms/agents/reasoning_agents.py +32 -0
- swarms/prompts/agent_judge_prompt.py +38 -0
- swarms/structs/__init__.py +7 -2
- swarms/structs/agent.py +37 -8
- swarms/structs/agent_builder.py +6 -8
- swarms/structs/concurrent_workflow.py +1 -1
- swarms/structs/deep_research_swarm.py +482 -0
- swarms/structs/dynamic_conversational_swarm.py +226 -0
- swarms/structs/hiearchical_swarm.py +1 -1
- swarms/structs/hybrid_hiearchical_peer_swarm.py +273 -0
- swarms/structs/majority_voting.py +1 -1
- swarms/structs/mixture_of_agents.py +1 -1
- swarms/structs/multi_agent_orchestrator.py +1 -1
- swarms/structs/output_types.py +3 -0
- swarms/structs/rearrange.py +1 -1
- swarms/structs/sequential_workflow.py +1 -1
- swarms/structs/swarm_router.py +12 -1
- swarms/structs/swarms_api.py +1 -1
- swarms/telemetry/main.py +7 -3
- swarms/tools/mcp_integration.py +554 -0
- swarms/tools/tool_schema_base_model.py +57 -0
- {swarms-7.6.0.dist-info → swarms-7.6.2.dist-info}/METADATA +79 -1
- {swarms-7.6.0.dist-info → swarms-7.6.2.dist-info}/RECORD +30 -24
- swarms/structs/agent_security.py +0 -318
- swarms/structs/airflow_swarm.py +0 -430
- swarms/structs/output_type.py +0 -18
- {swarms-7.6.0.dist-info → swarms-7.6.2.dist-info}/LICENSE +0 -0
- {swarms-7.6.0.dist-info → swarms-7.6.2.dist-info}/WHEEL +0 -0
- {swarms-7.6.0.dist-info → swarms-7.6.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,625 @@
|
|
1
|
+
from typing import List, Dict, Any, Tuple
|
2
|
+
import time
|
3
|
+
from datetime import datetime
|
4
|
+
|
5
|
+
from swarms.structs.agent import Agent
|
6
|
+
from swarms.structs.conversation import Conversation
|
7
|
+
|
8
|
+
from loguru import logger
|
9
|
+
|
10
|
+
|
11
|
+
# Define Reflexion prompt with detailed instructions
|
12
|
+
REFLEXION_PROMPT = """You are Reflexion, an advanced AI assistant designed to generate high-quality responses and continuously improve through self-reflection.
|
13
|
+
|
14
|
+
CAPABILITIES:
|
15
|
+
- Deep reasoning: Break down complex problems step-by-step
|
16
|
+
- Self-evaluation: Critically assess your own responses
|
17
|
+
- Self-reflection: Generate insights about your performance and areas for improvement
|
18
|
+
- Memory utilization: Learn from past experiences and build upon previous knowledge
|
19
|
+
|
20
|
+
PROCESS:
|
21
|
+
1. UNDERSTAND the user's query thoroughly
|
22
|
+
2. GENERATE a detailed, thoughtful response
|
23
|
+
3. EVALUATE your response against these criteria:
|
24
|
+
- Accuracy: Is all information factually correct?
|
25
|
+
- Completeness: Does it address all aspects of the query?
|
26
|
+
- Clarity: Is it well-structured and easy to understand?
|
27
|
+
- Relevance: Does it focus on what the user needs?
|
28
|
+
- Actionability: Does it provide practical, implementable solutions?
|
29
|
+
4. REFLECT on your performance and identify improvements
|
30
|
+
5. REFINE your response based on self-reflection
|
31
|
+
|
32
|
+
KEY PRINCIPLES:
|
33
|
+
- Be thorough but concise
|
34
|
+
- Prioritize practical, actionable advice
|
35
|
+
- Maintain awareness of your limitations
|
36
|
+
- Be transparent about uncertainty
|
37
|
+
- Learn continuously from each interaction
|
38
|
+
|
39
|
+
Always maintain your role as a helpful assistant focused on providing valuable information and solutions.
|
40
|
+
"""
|
41
|
+
|
42
|
+
|
43
|
+
class ReflexionMemory:
|
44
|
+
"""
|
45
|
+
A memory system for the Reflexion agent to store past experiences, reflections, and feedback.
|
46
|
+
|
47
|
+
Attributes:
|
48
|
+
short_term_memory (List[Dict]): Recent interactions and their evaluations
|
49
|
+
long_term_memory (List[Dict]): Persistent storage of important reflections and patterns
|
50
|
+
memory_capacity (int): Maximum number of entries in long-term memory
|
51
|
+
"""
|
52
|
+
|
53
|
+
def __init__(self, memory_capacity: int = 100):
|
54
|
+
"""
|
55
|
+
Initialize the memory system.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
memory_capacity (int): Maximum number of entries in long-term memory
|
59
|
+
"""
|
60
|
+
self.short_term_memory = []
|
61
|
+
self.long_term_memory = []
|
62
|
+
self.memory_capacity = memory_capacity
|
63
|
+
|
64
|
+
def add_short_term_memory(self, entry: Dict[str, Any]) -> None:
|
65
|
+
"""
|
66
|
+
Add an entry to short-term memory.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
entry (Dict[str, Any]): Memory entry containing task, response, evaluation, etc.
|
70
|
+
"""
|
71
|
+
# Add timestamp to track when memories were created
|
72
|
+
entry["timestamp"] = datetime.now().isoformat()
|
73
|
+
self.short_term_memory.append(entry)
|
74
|
+
|
75
|
+
# Keep only the most recent 10 entries in short-term memory
|
76
|
+
if len(self.short_term_memory) > 10:
|
77
|
+
self.short_term_memory.pop(0)
|
78
|
+
|
79
|
+
def add_long_term_memory(self, entry: Dict[str, Any]) -> None:
|
80
|
+
"""
|
81
|
+
Add an important entry to long-term memory.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
entry (Dict[str, Any]): Memory entry containing task, response, evaluation, etc.
|
85
|
+
"""
|
86
|
+
entry["timestamp"] = datetime.now().isoformat()
|
87
|
+
|
88
|
+
# Check if similar entry exists to avoid duplication
|
89
|
+
for existing in self.long_term_memory:
|
90
|
+
if (
|
91
|
+
self._similarity(existing, entry) > 0.8
|
92
|
+
): # Hypothetical similarity threshold
|
93
|
+
logger.debug(
|
94
|
+
"Similar entry already exists in long-term memory"
|
95
|
+
)
|
96
|
+
return
|
97
|
+
|
98
|
+
self.long_term_memory.append(entry)
|
99
|
+
|
100
|
+
# If exceeded capacity, remove oldest or least relevant entry
|
101
|
+
if len(self.long_term_memory) > self.memory_capacity:
|
102
|
+
self.long_term_memory.pop(0) # Simple FIFO strategy
|
103
|
+
|
104
|
+
def get_relevant_memories(
|
105
|
+
self, task: str, limit: int = 5
|
106
|
+
) -> List[Dict[str, Any]]:
|
107
|
+
"""
|
108
|
+
Retrieve memories relevant to the current task.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
task (str): The current task
|
112
|
+
limit (int): Maximum number of memories to retrieve
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
List[Dict[str, Any]]: Relevant memories
|
116
|
+
"""
|
117
|
+
# In a production implementation, this would use embeddings and vector similarity
|
118
|
+
# For now, implement a simple keyword-based relevance scoring
|
119
|
+
scored_memories = []
|
120
|
+
|
121
|
+
# Score and combine memories from both short and long-term
|
122
|
+
all_memories = self.short_term_memory + self.long_term_memory
|
123
|
+
for memory in all_memories:
|
124
|
+
relevance = self._calculate_relevance(memory, task)
|
125
|
+
scored_memories.append((memory, relevance))
|
126
|
+
|
127
|
+
# Sort by relevance score (descending)
|
128
|
+
scored_memories.sort(key=lambda x: x[1], reverse=True)
|
129
|
+
|
130
|
+
# Return the top 'limit' memories
|
131
|
+
return [memory for memory, score in scored_memories[:limit]]
|
132
|
+
|
133
|
+
def _calculate_relevance(
|
134
|
+
self, memory: Dict[str, Any], task: str
|
135
|
+
) -> float:
|
136
|
+
"""
|
137
|
+
Calculate relevance of a memory to the current task.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
memory (Dict[str, Any]): The memory entry
|
141
|
+
task (str): The current task
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
float: Relevance score between 0 and 1
|
145
|
+
"""
|
146
|
+
# Simple implementation - count shared words between task and memory task
|
147
|
+
memory_task = memory.get("task", "")
|
148
|
+
memory_reflection = memory.get("reflection", "")
|
149
|
+
|
150
|
+
task_words = set(task.lower().split())
|
151
|
+
memory_words = set(
|
152
|
+
(memory_task + " " + memory_reflection).lower().split()
|
153
|
+
)
|
154
|
+
|
155
|
+
if not task_words or not memory_words:
|
156
|
+
return 0.0
|
157
|
+
|
158
|
+
intersection = task_words.intersection(memory_words)
|
159
|
+
return len(intersection) / min(
|
160
|
+
len(task_words), len(memory_words)
|
161
|
+
)
|
162
|
+
|
163
|
+
def _similarity(
|
164
|
+
self, entry1: Dict[str, Any], entry2: Dict[str, Any]
|
165
|
+
) -> float:
|
166
|
+
"""
|
167
|
+
Calculate similarity between two memory entries.
|
168
|
+
|
169
|
+
Args:
|
170
|
+
entry1 (Dict[str, Any]): First memory entry
|
171
|
+
entry2 (Dict[str, Any]): Second memory entry
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
float: Similarity score between 0 and 1
|
175
|
+
"""
|
176
|
+
# Simple implementation - compare tasks and reflections
|
177
|
+
task1 = entry1.get("task", "")
|
178
|
+
task2 = entry2.get("task", "")
|
179
|
+
reflection1 = entry1.get("reflection", "")
|
180
|
+
reflection2 = entry2.get("reflection", "")
|
181
|
+
|
182
|
+
words1 = set((task1 + " " + reflection1).lower().split())
|
183
|
+
words2 = set((task2 + " " + reflection2).lower().split())
|
184
|
+
|
185
|
+
if not words1 or not words2:
|
186
|
+
return 0.0
|
187
|
+
|
188
|
+
intersection = words1.intersection(words2)
|
189
|
+
return len(intersection) / (
|
190
|
+
len(words1) + len(words2) - len(intersection)
|
191
|
+
)
|
192
|
+
|
193
|
+
|
194
|
+
class ReflexionAgent:
|
195
|
+
"""
|
196
|
+
An advanced agent that implements the Reflexion framework to improve through self-reflection.
|
197
|
+
|
198
|
+
The agent follows a process of:
|
199
|
+
1. Acting on tasks
|
200
|
+
2. Evaluating its performance
|
201
|
+
3. Generating self-reflections
|
202
|
+
4. Using these reflections to improve future responses
|
203
|
+
|
204
|
+
Attributes:
|
205
|
+
agent_name (str): The name of the agent
|
206
|
+
system_prompt (str): The system prompt for the agent
|
207
|
+
model_name (str): The model name used for generating responses
|
208
|
+
conversation (Conversation): Instance to manage conversation history
|
209
|
+
max_loops (int): Maximum number of reflection iterations per task
|
210
|
+
memory (ReflexionMemory): Memory system to store experiences and reflections
|
211
|
+
actor (Agent): The agent that generates initial responses
|
212
|
+
evaluator (Agent): The agent that evaluates responses
|
213
|
+
reflector (Agent): The agent that generates self-reflections
|
214
|
+
"""
|
215
|
+
|
216
|
+
def __init__(
|
217
|
+
self,
|
218
|
+
agent_name: str = "reflexion-agent",
|
219
|
+
system_prompt: str = REFLEXION_PROMPT,
|
220
|
+
model_name: str = "openai/o1",
|
221
|
+
max_loops: int = 3,
|
222
|
+
memory_capacity: int = 100,
|
223
|
+
) -> None:
|
224
|
+
"""
|
225
|
+
Initializes the ReflexionAgent with specified parameters.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
agent_name (str): The name of the agent
|
229
|
+
system_prompt (str): The system prompt for the agent
|
230
|
+
model_name (str): The model name used for generating responses
|
231
|
+
max_loops (int): Maximum number of reflection iterations per task
|
232
|
+
memory_capacity (int): Maximum capacity of long-term memory
|
233
|
+
"""
|
234
|
+
self.agent_name = agent_name
|
235
|
+
self.system_prompt = system_prompt
|
236
|
+
self.model_name = model_name
|
237
|
+
self.conversation = Conversation(time_enabled=True)
|
238
|
+
self.max_loops = max_loops
|
239
|
+
self.memory = ReflexionMemory(memory_capacity=memory_capacity)
|
240
|
+
|
241
|
+
# Actor agent - generates initial responses
|
242
|
+
self.actor = Agent(
|
243
|
+
agent_name=f"{agent_name}-actor",
|
244
|
+
agent_description="You generate thorough, accurate, and helpful responses to tasks",
|
245
|
+
system_prompt=system_prompt,
|
246
|
+
model_name=model_name,
|
247
|
+
max_loops=1,
|
248
|
+
)
|
249
|
+
|
250
|
+
# Evaluator agent - evaluates responses
|
251
|
+
self.evaluator = Agent(
|
252
|
+
agent_name=f"{agent_name}-evaluator",
|
253
|
+
agent_description="You critically evaluate responses against quality criteria",
|
254
|
+
system_prompt="""You are an expert evaluator of text quality.
|
255
|
+
Your job is to thoroughly assess responses against these criteria:
|
256
|
+
1. Accuracy: Is all information factually correct?
|
257
|
+
2. Completeness: Does it address all aspects of the query?
|
258
|
+
3. Clarity: Is it well-structured and easy to understand?
|
259
|
+
4. Relevance: Does it focus on what the user needs?
|
260
|
+
5. Actionability: Does it provide practical, implementable solutions?
|
261
|
+
|
262
|
+
For each criterion, provide:
|
263
|
+
- A score from 1-10
|
264
|
+
- Specific examples of what was done well or poorly
|
265
|
+
- Concrete suggestions for improvement
|
266
|
+
|
267
|
+
Be precise, objective, and constructive in your criticism.
|
268
|
+
Your goal is to help improve responses, not just criticize them.
|
269
|
+
End with an overall assessment and a final score from 1-10.
|
270
|
+
""",
|
271
|
+
model_name=model_name,
|
272
|
+
max_loops=1,
|
273
|
+
)
|
274
|
+
|
275
|
+
# Reflector agent - generates self-reflections
|
276
|
+
self.reflector = Agent(
|
277
|
+
agent_name=f"{agent_name}-reflector",
|
278
|
+
agent_description="You generate insightful self-reflections to improve future responses",
|
279
|
+
system_prompt="""You are an expert at generating insightful self-reflections.
|
280
|
+
|
281
|
+
Given a task, a response to that task, and an evaluation of that response, your job is to create a thoughtful self-reflection that will help improve future responses to similar tasks.
|
282
|
+
|
283
|
+
Your reflection should:
|
284
|
+
1. Identify key strengths and weaknesses in the response
|
285
|
+
2. Analyze why certain approaches worked or didn't work
|
286
|
+
3. Extract general principles and lessons learned
|
287
|
+
4. Provide specific strategies for handling similar tasks better in the future
|
288
|
+
5. Be concrete and actionable, not vague or general
|
289
|
+
|
290
|
+
Focus on extracting lasting insights that will be valuable for improving future performance. Be honest about shortcomings while maintaining a constructive, improvement-oriented tone.
|
291
|
+
""",
|
292
|
+
model_name=model_name,
|
293
|
+
max_loops=1,
|
294
|
+
)
|
295
|
+
|
296
|
+
logger.info(
|
297
|
+
f"Initialized {self.agent_name} with model {self.model_name}"
|
298
|
+
)
|
299
|
+
|
300
|
+
def act(
|
301
|
+
self,
|
302
|
+
task: str,
|
303
|
+
relevant_memories: List[Dict[str, Any]] = None,
|
304
|
+
) -> str:
|
305
|
+
"""
|
306
|
+
Generate a response to the given task using the actor agent.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
task (str): The task to respond to
|
310
|
+
relevant_memories (List[Dict[str, Any]]): Relevant past memories to consider
|
311
|
+
|
312
|
+
Returns:
|
313
|
+
str: The generated response
|
314
|
+
"""
|
315
|
+
# Construct prompt with relevant memories if available
|
316
|
+
prompt = task
|
317
|
+
if relevant_memories and len(relevant_memories) > 0:
|
318
|
+
memories_text = "\n\n".join(
|
319
|
+
[
|
320
|
+
f"PAST REFLECTION: {memory.get('reflection', 'No reflection available')}"
|
321
|
+
for memory in relevant_memories
|
322
|
+
]
|
323
|
+
)
|
324
|
+
prompt = f"""TASK: {task}
|
325
|
+
|
326
|
+
RELEVANT PAST REFLECTIONS:
|
327
|
+
{memories_text}
|
328
|
+
|
329
|
+
Based on the task and relevant past reflections, provide a comprehensive response."""
|
330
|
+
|
331
|
+
logger.debug(f"Actor prompt: {prompt}")
|
332
|
+
|
333
|
+
# Generate response
|
334
|
+
start_time = time.time()
|
335
|
+
response = self.actor.run(task=prompt)
|
336
|
+
end_time = time.time()
|
337
|
+
|
338
|
+
logger.debug(
|
339
|
+
f"Actor generated response in {end_time - start_time:.2f}s"
|
340
|
+
)
|
341
|
+
|
342
|
+
return response
|
343
|
+
|
344
|
+
def evaluate(self, task: str, response: str) -> Tuple[str, float]:
|
345
|
+
"""
|
346
|
+
Evaluate the quality of a response to a task.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
task (str): The original task
|
350
|
+
response (str): The response to evaluate
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
Tuple[str, float]: Evaluation feedback and numerical score
|
354
|
+
"""
|
355
|
+
prompt = f"""TASK: {task}
|
356
|
+
|
357
|
+
RESPONSE:
|
358
|
+
{response}
|
359
|
+
|
360
|
+
Evaluate this response thoroughly according to the criteria in your instructions. Be specific and constructive."""
|
361
|
+
|
362
|
+
logger.debug(f"Evaluating response for task: {task[:100]}...")
|
363
|
+
|
364
|
+
evaluation = self.evaluator.run(task=prompt)
|
365
|
+
|
366
|
+
# Extract numerical score from evaluation (in a production system, you'd want a more
|
367
|
+
# robust parsing method here, potentially using structured output)
|
368
|
+
try:
|
369
|
+
# Look for a final score in the format "Final Score: X/10" or similar
|
370
|
+
import re
|
371
|
+
|
372
|
+
score_matches = re.findall(
|
373
|
+
r"(?:final|overall)\s+score:?\s*(\d+(?:\.\d+)?)",
|
374
|
+
evaluation.lower(),
|
375
|
+
)
|
376
|
+
score = float(score_matches[-1]) if score_matches else 5.0
|
377
|
+
# Normalize to 0-1 range
|
378
|
+
normalized_score = score / 10.0
|
379
|
+
except Exception as e:
|
380
|
+
logger.error(f"Failed to extract score: {e}")
|
381
|
+
normalized_score = 0.5 # Default mid-range score
|
382
|
+
|
383
|
+
logger.debug(
|
384
|
+
f"Evaluation complete. Score: {normalized_score:.2f}"
|
385
|
+
)
|
386
|
+
|
387
|
+
return evaluation, normalized_score
|
388
|
+
|
389
|
+
def reflect(
|
390
|
+
self, task: str, response: str, evaluation: str
|
391
|
+
) -> str:
|
392
|
+
"""
|
393
|
+
Generate a self-reflection based on the task, response, and evaluation.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
task (str): The original task
|
397
|
+
response (str): The generated response
|
398
|
+
evaluation (str): The evaluation feedback
|
399
|
+
|
400
|
+
Returns:
|
401
|
+
str: The self-reflection
|
402
|
+
"""
|
403
|
+
prompt = f"""TASK: {task}
|
404
|
+
|
405
|
+
RESPONSE:
|
406
|
+
{response}
|
407
|
+
|
408
|
+
EVALUATION:
|
409
|
+
{evaluation}
|
410
|
+
|
411
|
+
Based on this task, response, and evaluation, generate a thoughtful self-reflection that identifies key lessons and strategies for improving future responses to similar tasks."""
|
412
|
+
|
413
|
+
logger.debug(
|
414
|
+
f"Generating reflection for task: {task[:100]}..."
|
415
|
+
)
|
416
|
+
|
417
|
+
reflection = self.reflector.run(task=prompt)
|
418
|
+
|
419
|
+
logger.debug(f"Reflection generated: {reflection[:100]}...")
|
420
|
+
|
421
|
+
return reflection
|
422
|
+
|
423
|
+
def refine(
|
424
|
+
self,
|
425
|
+
task: str,
|
426
|
+
original_response: str,
|
427
|
+
evaluation: str,
|
428
|
+
reflection: str,
|
429
|
+
) -> str:
|
430
|
+
"""
|
431
|
+
Refine the original response based on evaluation and reflection.
|
432
|
+
|
433
|
+
Args:
|
434
|
+
task (str): The original task
|
435
|
+
original_response (str): The original response
|
436
|
+
evaluation (str): The evaluation feedback
|
437
|
+
reflection (str): The self-reflection
|
438
|
+
|
439
|
+
Returns:
|
440
|
+
str: The refined response
|
441
|
+
"""
|
442
|
+
prompt = f"""TASK: {task}
|
443
|
+
|
444
|
+
ORIGINAL RESPONSE:
|
445
|
+
{original_response}
|
446
|
+
|
447
|
+
EVALUATION:
|
448
|
+
{evaluation}
|
449
|
+
|
450
|
+
REFLECTION:
|
451
|
+
{reflection}
|
452
|
+
|
453
|
+
Based on the original response, evaluation, and reflection, provide an improved response to the task. Focus on addressing the weaknesses identified while maintaining the strengths."""
|
454
|
+
|
455
|
+
logger.debug(f"Refining response for task: {task[:100]}...")
|
456
|
+
|
457
|
+
refined_response = self.actor.run(task=prompt)
|
458
|
+
|
459
|
+
logger.debug(f"Response refined: {refined_response[:100]}...")
|
460
|
+
|
461
|
+
return refined_response
|
462
|
+
|
463
|
+
def step(
|
464
|
+
self,
|
465
|
+
task: str,
|
466
|
+
iteration: int = 0,
|
467
|
+
previous_response: str = None,
|
468
|
+
) -> Dict[str, Any]:
|
469
|
+
"""
|
470
|
+
Process a single task through one iteration of the Reflexion process.
|
471
|
+
|
472
|
+
Args:
|
473
|
+
task (str): The task to process
|
474
|
+
iteration (int): Current iteration number
|
475
|
+
previous_response (str): Response from previous iteration
|
476
|
+
|
477
|
+
Returns:
|
478
|
+
Dict[str, Any]: Results of this iteration
|
479
|
+
"""
|
480
|
+
# Retrieve relevant memories if not the first iteration
|
481
|
+
relevant_memories = []
|
482
|
+
if iteration > 0:
|
483
|
+
relevant_memories = self.memory.get_relevant_memories(
|
484
|
+
task
|
485
|
+
)
|
486
|
+
logger.debug(
|
487
|
+
f"Retrieved {len(relevant_memories)} relevant memories"
|
488
|
+
)
|
489
|
+
|
490
|
+
# Generate response (or use previous response if provided)
|
491
|
+
if previous_response is None:
|
492
|
+
response = self.act(task, relevant_memories)
|
493
|
+
else:
|
494
|
+
response = previous_response
|
495
|
+
|
496
|
+
# Evaluate the response
|
497
|
+
evaluation, score = self.evaluate(task, response)
|
498
|
+
|
499
|
+
# Generate reflection
|
500
|
+
reflection = self.reflect(task, response, evaluation)
|
501
|
+
|
502
|
+
# Store in memory
|
503
|
+
memory_entry = {
|
504
|
+
"task": task,
|
505
|
+
"response": response,
|
506
|
+
"evaluation": evaluation,
|
507
|
+
"reflection": reflection,
|
508
|
+
"score": score,
|
509
|
+
"iteration": iteration,
|
510
|
+
}
|
511
|
+
|
512
|
+
self.memory.add_short_term_memory(memory_entry)
|
513
|
+
|
514
|
+
# For high-quality reflections or final iterations, add to long-term memory
|
515
|
+
if score > 0.8 or iteration == self.max_loops - 1:
|
516
|
+
self.memory.add_long_term_memory(memory_entry)
|
517
|
+
|
518
|
+
# Return results of this step
|
519
|
+
return {
|
520
|
+
"task": task,
|
521
|
+
"response": response,
|
522
|
+
"evaluation": evaluation,
|
523
|
+
"reflection": reflection,
|
524
|
+
"score": score,
|
525
|
+
"iteration": iteration,
|
526
|
+
}
|
527
|
+
|
528
|
+
def run(
|
529
|
+
self, tasks: List[str], include_intermediates: bool = False
|
530
|
+
) -> List[Any]:
|
531
|
+
"""
|
532
|
+
Execute the Reflexion process for a list of tasks.
|
533
|
+
|
534
|
+
Args:
|
535
|
+
tasks (List[str]): List of tasks to process
|
536
|
+
include_intermediates (bool): Whether to include intermediate iterations in results
|
537
|
+
|
538
|
+
Returns:
|
539
|
+
List[Any]: Final responses or complete iteration history
|
540
|
+
"""
|
541
|
+
all_results = []
|
542
|
+
|
543
|
+
for task_idx, task in enumerate(tasks):
|
544
|
+
logger.info(f"Processing task {task_idx+1}/{len(tasks)}")
|
545
|
+
|
546
|
+
iterations = []
|
547
|
+
best_response = None
|
548
|
+
best_score = -1
|
549
|
+
|
550
|
+
# Run through multiple iterations of reflection
|
551
|
+
for iteration in range(self.max_loops):
|
552
|
+
logger.debug(
|
553
|
+
f"Starting iteration {iteration+1}/{self.max_loops}"
|
554
|
+
)
|
555
|
+
|
556
|
+
# In first iteration, generate new response
|
557
|
+
# In subsequent iterations, refine previous response
|
558
|
+
if iteration == 0:
|
559
|
+
step_result = self.step(task, iteration)
|
560
|
+
step_result["response"]
|
561
|
+
else:
|
562
|
+
# Refine previous response
|
563
|
+
prev_result = iterations[-1]
|
564
|
+
refined_response = self.refine(
|
565
|
+
task,
|
566
|
+
prev_result["response"],
|
567
|
+
prev_result["evaluation"],
|
568
|
+
prev_result["reflection"],
|
569
|
+
)
|
570
|
+
|
571
|
+
# Evaluate and reflect on the refined response
|
572
|
+
step_result = self.step(
|
573
|
+
task, iteration, refined_response
|
574
|
+
)
|
575
|
+
|
576
|
+
iterations.append(step_result)
|
577
|
+
|
578
|
+
# Track best response based on evaluation score
|
579
|
+
if step_result["score"] > best_score:
|
580
|
+
best_response = step_result["response"]
|
581
|
+
best_score = step_result["score"]
|
582
|
+
|
583
|
+
# If score is very high, we can stop early
|
584
|
+
if step_result["score"] > 0.9:
|
585
|
+
logger.debug(
|
586
|
+
f"Score {step_result['score']} exceeds threshold. Stopping early."
|
587
|
+
)
|
588
|
+
break
|
589
|
+
|
590
|
+
# Add to conversation history (simplified)
|
591
|
+
self.conversation.add("user", task)
|
592
|
+
self.conversation.add("assistant", best_response)
|
593
|
+
|
594
|
+
# Determine what to return
|
595
|
+
if include_intermediates:
|
596
|
+
all_results.append(iterations)
|
597
|
+
else:
|
598
|
+
all_results.append(best_response)
|
599
|
+
|
600
|
+
return all_results
|
601
|
+
|
602
|
+
|
603
|
+
# # Example usage
|
604
|
+
# if __name__ == "__main__":
|
605
|
+
# # Initialize the Reflexion Agent
|
606
|
+
# agent = ReflexionAgent(
|
607
|
+
# agent_name="reflexion-agent",
|
608
|
+
# model_name="gpt-4o", # Using OpenAI's model
|
609
|
+
# max_loops=1, # Maximum of 3 reflection iterations
|
610
|
+
# )
|
611
|
+
|
612
|
+
# # Example tasks
|
613
|
+
# tasks = [
|
614
|
+
# "Explain QFT to a high school student.",
|
615
|
+
# ]
|
616
|
+
|
617
|
+
# # Run the agent
|
618
|
+
# results = agent.run(tasks)
|
619
|
+
|
620
|
+
# # Print results
|
621
|
+
# for i, result in enumerate(results):
|
622
|
+
# print(f"\n\nTASK {i+1}:")
|
623
|
+
# print(f"{tasks[i]}\n")
|
624
|
+
# print("FINAL RESPONSE:")
|
625
|
+
# print(f"{result}")
|