swarms 7.6.0__py3-none-any.whl → 7.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ from typing import List, Dict, Any, Tuple
2
+ import time
3
+ from datetime import datetime
4
+
5
+ from swarms.structs.agent import Agent
6
+ from swarms.structs.conversation import Conversation
7
+
8
+ from loguru import logger
9
+
10
+
11
+ # Define Reflexion prompt with detailed instructions
12
+ REFLEXION_PROMPT = """You are Reflexion, an advanced AI assistant designed to generate high-quality responses and continuously improve through self-reflection.
13
+
14
+ CAPABILITIES:
15
+ - Deep reasoning: Break down complex problems step-by-step
16
+ - Self-evaluation: Critically assess your own responses
17
+ - Self-reflection: Generate insights about your performance and areas for improvement
18
+ - Memory utilization: Learn from past experiences and build upon previous knowledge
19
+
20
+ PROCESS:
21
+ 1. UNDERSTAND the user's query thoroughly
22
+ 2. GENERATE a detailed, thoughtful response
23
+ 3. EVALUATE your response against these criteria:
24
+ - Accuracy: Is all information factually correct?
25
+ - Completeness: Does it address all aspects of the query?
26
+ - Clarity: Is it well-structured and easy to understand?
27
+ - Relevance: Does it focus on what the user needs?
28
+ - Actionability: Does it provide practical, implementable solutions?
29
+ 4. REFLECT on your performance and identify improvements
30
+ 5. REFINE your response based on self-reflection
31
+
32
+ KEY PRINCIPLES:
33
+ - Be thorough but concise
34
+ - Prioritize practical, actionable advice
35
+ - Maintain awareness of your limitations
36
+ - Be transparent about uncertainty
37
+ - Learn continuously from each interaction
38
+
39
+ Always maintain your role as a helpful assistant focused on providing valuable information and solutions.
40
+ """
41
+
42
+
43
+ class ReflexionMemory:
44
+ """
45
+ A memory system for the Reflexion agent to store past experiences, reflections, and feedback.
46
+
47
+ Attributes:
48
+ short_term_memory (List[Dict]): Recent interactions and their evaluations
49
+ long_term_memory (List[Dict]): Persistent storage of important reflections and patterns
50
+ memory_capacity (int): Maximum number of entries in long-term memory
51
+ """
52
+
53
+ def __init__(self, memory_capacity: int = 100):
54
+ """
55
+ Initialize the memory system.
56
+
57
+ Args:
58
+ memory_capacity (int): Maximum number of entries in long-term memory
59
+ """
60
+ self.short_term_memory = []
61
+ self.long_term_memory = []
62
+ self.memory_capacity = memory_capacity
63
+
64
+ def add_short_term_memory(self, entry: Dict[str, Any]) -> None:
65
+ """
66
+ Add an entry to short-term memory.
67
+
68
+ Args:
69
+ entry (Dict[str, Any]): Memory entry containing task, response, evaluation, etc.
70
+ """
71
+ # Add timestamp to track when memories were created
72
+ entry["timestamp"] = datetime.now().isoformat()
73
+ self.short_term_memory.append(entry)
74
+
75
+ # Keep only the most recent 10 entries in short-term memory
76
+ if len(self.short_term_memory) > 10:
77
+ self.short_term_memory.pop(0)
78
+
79
+ def add_long_term_memory(self, entry: Dict[str, Any]) -> None:
80
+ """
81
+ Add an important entry to long-term memory.
82
+
83
+ Args:
84
+ entry (Dict[str, Any]): Memory entry containing task, response, evaluation, etc.
85
+ """
86
+ entry["timestamp"] = datetime.now().isoformat()
87
+
88
+ # Check if similar entry exists to avoid duplication
89
+ for existing in self.long_term_memory:
90
+ if (
91
+ self._similarity(existing, entry) > 0.8
92
+ ): # Hypothetical similarity threshold
93
+ logger.debug(
94
+ "Similar entry already exists in long-term memory"
95
+ )
96
+ return
97
+
98
+ self.long_term_memory.append(entry)
99
+
100
+ # If exceeded capacity, remove oldest or least relevant entry
101
+ if len(self.long_term_memory) > self.memory_capacity:
102
+ self.long_term_memory.pop(0) # Simple FIFO strategy
103
+
104
+ def get_relevant_memories(
105
+ self, task: str, limit: int = 5
106
+ ) -> List[Dict[str, Any]]:
107
+ """
108
+ Retrieve memories relevant to the current task.
109
+
110
+ Args:
111
+ task (str): The current task
112
+ limit (int): Maximum number of memories to retrieve
113
+
114
+ Returns:
115
+ List[Dict[str, Any]]: Relevant memories
116
+ """
117
+ # In a production implementation, this would use embeddings and vector similarity
118
+ # For now, implement a simple keyword-based relevance scoring
119
+ scored_memories = []
120
+
121
+ # Score and combine memories from both short and long-term
122
+ all_memories = self.short_term_memory + self.long_term_memory
123
+ for memory in all_memories:
124
+ relevance = self._calculate_relevance(memory, task)
125
+ scored_memories.append((memory, relevance))
126
+
127
+ # Sort by relevance score (descending)
128
+ scored_memories.sort(key=lambda x: x[1], reverse=True)
129
+
130
+ # Return the top 'limit' memories
131
+ return [memory for memory, score in scored_memories[:limit]]
132
+
133
+ def _calculate_relevance(
134
+ self, memory: Dict[str, Any], task: str
135
+ ) -> float:
136
+ """
137
+ Calculate relevance of a memory to the current task.
138
+
139
+ Args:
140
+ memory (Dict[str, Any]): The memory entry
141
+ task (str): The current task
142
+
143
+ Returns:
144
+ float: Relevance score between 0 and 1
145
+ """
146
+ # Simple implementation - count shared words between task and memory task
147
+ memory_task = memory.get("task", "")
148
+ memory_reflection = memory.get("reflection", "")
149
+
150
+ task_words = set(task.lower().split())
151
+ memory_words = set(
152
+ (memory_task + " " + memory_reflection).lower().split()
153
+ )
154
+
155
+ if not task_words or not memory_words:
156
+ return 0.0
157
+
158
+ intersection = task_words.intersection(memory_words)
159
+ return len(intersection) / min(
160
+ len(task_words), len(memory_words)
161
+ )
162
+
163
+ def _similarity(
164
+ self, entry1: Dict[str, Any], entry2: Dict[str, Any]
165
+ ) -> float:
166
+ """
167
+ Calculate similarity between two memory entries.
168
+
169
+ Args:
170
+ entry1 (Dict[str, Any]): First memory entry
171
+ entry2 (Dict[str, Any]): Second memory entry
172
+
173
+ Returns:
174
+ float: Similarity score between 0 and 1
175
+ """
176
+ # Simple implementation - compare tasks and reflections
177
+ task1 = entry1.get("task", "")
178
+ task2 = entry2.get("task", "")
179
+ reflection1 = entry1.get("reflection", "")
180
+ reflection2 = entry2.get("reflection", "")
181
+
182
+ words1 = set((task1 + " " + reflection1).lower().split())
183
+ words2 = set((task2 + " " + reflection2).lower().split())
184
+
185
+ if not words1 or not words2:
186
+ return 0.0
187
+
188
+ intersection = words1.intersection(words2)
189
+ return len(intersection) / (
190
+ len(words1) + len(words2) - len(intersection)
191
+ )
192
+
193
+
194
+ class ReflexionAgent:
195
+ """
196
+ An advanced agent that implements the Reflexion framework to improve through self-reflection.
197
+
198
+ The agent follows a process of:
199
+ 1. Acting on tasks
200
+ 2. Evaluating its performance
201
+ 3. Generating self-reflections
202
+ 4. Using these reflections to improve future responses
203
+
204
+ Attributes:
205
+ agent_name (str): The name of the agent
206
+ system_prompt (str): The system prompt for the agent
207
+ model_name (str): The model name used for generating responses
208
+ conversation (Conversation): Instance to manage conversation history
209
+ max_loops (int): Maximum number of reflection iterations per task
210
+ memory (ReflexionMemory): Memory system to store experiences and reflections
211
+ actor (Agent): The agent that generates initial responses
212
+ evaluator (Agent): The agent that evaluates responses
213
+ reflector (Agent): The agent that generates self-reflections
214
+ """
215
+
216
+ def __init__(
217
+ self,
218
+ agent_name: str = "reflexion-agent",
219
+ system_prompt: str = REFLEXION_PROMPT,
220
+ model_name: str = "openai/o1",
221
+ max_loops: int = 3,
222
+ memory_capacity: int = 100,
223
+ ) -> None:
224
+ """
225
+ Initializes the ReflexionAgent with specified parameters.
226
+
227
+ Args:
228
+ agent_name (str): The name of the agent
229
+ system_prompt (str): The system prompt for the agent
230
+ model_name (str): The model name used for generating responses
231
+ max_loops (int): Maximum number of reflection iterations per task
232
+ memory_capacity (int): Maximum capacity of long-term memory
233
+ """
234
+ self.agent_name = agent_name
235
+ self.system_prompt = system_prompt
236
+ self.model_name = model_name
237
+ self.conversation = Conversation(time_enabled=True)
238
+ self.max_loops = max_loops
239
+ self.memory = ReflexionMemory(memory_capacity=memory_capacity)
240
+
241
+ # Actor agent - generates initial responses
242
+ self.actor = Agent(
243
+ agent_name=f"{agent_name}-actor",
244
+ agent_description="You generate thorough, accurate, and helpful responses to tasks",
245
+ system_prompt=system_prompt,
246
+ model_name=model_name,
247
+ max_loops=1,
248
+ )
249
+
250
+ # Evaluator agent - evaluates responses
251
+ self.evaluator = Agent(
252
+ agent_name=f"{agent_name}-evaluator",
253
+ agent_description="You critically evaluate responses against quality criteria",
254
+ system_prompt="""You are an expert evaluator of text quality.
255
+ Your job is to thoroughly assess responses against these criteria:
256
+ 1. Accuracy: Is all information factually correct?
257
+ 2. Completeness: Does it address all aspects of the query?
258
+ 3. Clarity: Is it well-structured and easy to understand?
259
+ 4. Relevance: Does it focus on what the user needs?
260
+ 5. Actionability: Does it provide practical, implementable solutions?
261
+
262
+ For each criterion, provide:
263
+ - A score from 1-10
264
+ - Specific examples of what was done well or poorly
265
+ - Concrete suggestions for improvement
266
+
267
+ Be precise, objective, and constructive in your criticism.
268
+ Your goal is to help improve responses, not just criticize them.
269
+ End with an overall assessment and a final score from 1-10.
270
+ """,
271
+ model_name=model_name,
272
+ max_loops=1,
273
+ )
274
+
275
+ # Reflector agent - generates self-reflections
276
+ self.reflector = Agent(
277
+ agent_name=f"{agent_name}-reflector",
278
+ agent_description="You generate insightful self-reflections to improve future responses",
279
+ system_prompt="""You are an expert at generating insightful self-reflections.
280
+
281
+ Given a task, a response to that task, and an evaluation of that response, your job is to create a thoughtful self-reflection that will help improve future responses to similar tasks.
282
+
283
+ Your reflection should:
284
+ 1. Identify key strengths and weaknesses in the response
285
+ 2. Analyze why certain approaches worked or didn't work
286
+ 3. Extract general principles and lessons learned
287
+ 4. Provide specific strategies for handling similar tasks better in the future
288
+ 5. Be concrete and actionable, not vague or general
289
+
290
+ Focus on extracting lasting insights that will be valuable for improving future performance. Be honest about shortcomings while maintaining a constructive, improvement-oriented tone.
291
+ """,
292
+ model_name=model_name,
293
+ max_loops=1,
294
+ )
295
+
296
+ logger.info(
297
+ f"Initialized {self.agent_name} with model {self.model_name}"
298
+ )
299
+
300
+ def act(
301
+ self,
302
+ task: str,
303
+ relevant_memories: List[Dict[str, Any]] = None,
304
+ ) -> str:
305
+ """
306
+ Generate a response to the given task using the actor agent.
307
+
308
+ Args:
309
+ task (str): The task to respond to
310
+ relevant_memories (List[Dict[str, Any]]): Relevant past memories to consider
311
+
312
+ Returns:
313
+ str: The generated response
314
+ """
315
+ # Construct prompt with relevant memories if available
316
+ prompt = task
317
+ if relevant_memories and len(relevant_memories) > 0:
318
+ memories_text = "\n\n".join(
319
+ [
320
+ f"PAST REFLECTION: {memory.get('reflection', 'No reflection available')}"
321
+ for memory in relevant_memories
322
+ ]
323
+ )
324
+ prompt = f"""TASK: {task}
325
+
326
+ RELEVANT PAST REFLECTIONS:
327
+ {memories_text}
328
+
329
+ Based on the task and relevant past reflections, provide a comprehensive response."""
330
+
331
+ logger.debug(f"Actor prompt: {prompt}")
332
+
333
+ # Generate response
334
+ start_time = time.time()
335
+ response = self.actor.run(task=prompt)
336
+ end_time = time.time()
337
+
338
+ logger.debug(
339
+ f"Actor generated response in {end_time - start_time:.2f}s"
340
+ )
341
+
342
+ return response
343
+
344
+ def evaluate(self, task: str, response: str) -> Tuple[str, float]:
345
+ """
346
+ Evaluate the quality of a response to a task.
347
+
348
+ Args:
349
+ task (str): The original task
350
+ response (str): The response to evaluate
351
+
352
+ Returns:
353
+ Tuple[str, float]: Evaluation feedback and numerical score
354
+ """
355
+ prompt = f"""TASK: {task}
356
+
357
+ RESPONSE:
358
+ {response}
359
+
360
+ Evaluate this response thoroughly according to the criteria in your instructions. Be specific and constructive."""
361
+
362
+ logger.debug(f"Evaluating response for task: {task[:100]}...")
363
+
364
+ evaluation = self.evaluator.run(task=prompt)
365
+
366
+ # Extract numerical score from evaluation (in a production system, you'd want a more
367
+ # robust parsing method here, potentially using structured output)
368
+ try:
369
+ # Look for a final score in the format "Final Score: X/10" or similar
370
+ import re
371
+
372
+ score_matches = re.findall(
373
+ r"(?:final|overall)\s+score:?\s*(\d+(?:\.\d+)?)",
374
+ evaluation.lower(),
375
+ )
376
+ score = float(score_matches[-1]) if score_matches else 5.0
377
+ # Normalize to 0-1 range
378
+ normalized_score = score / 10.0
379
+ except Exception as e:
380
+ logger.error(f"Failed to extract score: {e}")
381
+ normalized_score = 0.5 # Default mid-range score
382
+
383
+ logger.debug(
384
+ f"Evaluation complete. Score: {normalized_score:.2f}"
385
+ )
386
+
387
+ return evaluation, normalized_score
388
+
389
+ def reflect(
390
+ self, task: str, response: str, evaluation: str
391
+ ) -> str:
392
+ """
393
+ Generate a self-reflection based on the task, response, and evaluation.
394
+
395
+ Args:
396
+ task (str): The original task
397
+ response (str): The generated response
398
+ evaluation (str): The evaluation feedback
399
+
400
+ Returns:
401
+ str: The self-reflection
402
+ """
403
+ prompt = f"""TASK: {task}
404
+
405
+ RESPONSE:
406
+ {response}
407
+
408
+ EVALUATION:
409
+ {evaluation}
410
+
411
+ Based on this task, response, and evaluation, generate a thoughtful self-reflection that identifies key lessons and strategies for improving future responses to similar tasks."""
412
+
413
+ logger.debug(
414
+ f"Generating reflection for task: {task[:100]}..."
415
+ )
416
+
417
+ reflection = self.reflector.run(task=prompt)
418
+
419
+ logger.debug(f"Reflection generated: {reflection[:100]}...")
420
+
421
+ return reflection
422
+
423
+ def refine(
424
+ self,
425
+ task: str,
426
+ original_response: str,
427
+ evaluation: str,
428
+ reflection: str,
429
+ ) -> str:
430
+ """
431
+ Refine the original response based on evaluation and reflection.
432
+
433
+ Args:
434
+ task (str): The original task
435
+ original_response (str): The original response
436
+ evaluation (str): The evaluation feedback
437
+ reflection (str): The self-reflection
438
+
439
+ Returns:
440
+ str: The refined response
441
+ """
442
+ prompt = f"""TASK: {task}
443
+
444
+ ORIGINAL RESPONSE:
445
+ {original_response}
446
+
447
+ EVALUATION:
448
+ {evaluation}
449
+
450
+ REFLECTION:
451
+ {reflection}
452
+
453
+ Based on the original response, evaluation, and reflection, provide an improved response to the task. Focus on addressing the weaknesses identified while maintaining the strengths."""
454
+
455
+ logger.debug(f"Refining response for task: {task[:100]}...")
456
+
457
+ refined_response = self.actor.run(task=prompt)
458
+
459
+ logger.debug(f"Response refined: {refined_response[:100]}...")
460
+
461
+ return refined_response
462
+
463
+ def step(
464
+ self,
465
+ task: str,
466
+ iteration: int = 0,
467
+ previous_response: str = None,
468
+ ) -> Dict[str, Any]:
469
+ """
470
+ Process a single task through one iteration of the Reflexion process.
471
+
472
+ Args:
473
+ task (str): The task to process
474
+ iteration (int): Current iteration number
475
+ previous_response (str): Response from previous iteration
476
+
477
+ Returns:
478
+ Dict[str, Any]: Results of this iteration
479
+ """
480
+ # Retrieve relevant memories if not the first iteration
481
+ relevant_memories = []
482
+ if iteration > 0:
483
+ relevant_memories = self.memory.get_relevant_memories(
484
+ task
485
+ )
486
+ logger.debug(
487
+ f"Retrieved {len(relevant_memories)} relevant memories"
488
+ )
489
+
490
+ # Generate response (or use previous response if provided)
491
+ if previous_response is None:
492
+ response = self.act(task, relevant_memories)
493
+ else:
494
+ response = previous_response
495
+
496
+ # Evaluate the response
497
+ evaluation, score = self.evaluate(task, response)
498
+
499
+ # Generate reflection
500
+ reflection = self.reflect(task, response, evaluation)
501
+
502
+ # Store in memory
503
+ memory_entry = {
504
+ "task": task,
505
+ "response": response,
506
+ "evaluation": evaluation,
507
+ "reflection": reflection,
508
+ "score": score,
509
+ "iteration": iteration,
510
+ }
511
+
512
+ self.memory.add_short_term_memory(memory_entry)
513
+
514
+ # For high-quality reflections or final iterations, add to long-term memory
515
+ if score > 0.8 or iteration == self.max_loops - 1:
516
+ self.memory.add_long_term_memory(memory_entry)
517
+
518
+ # Return results of this step
519
+ return {
520
+ "task": task,
521
+ "response": response,
522
+ "evaluation": evaluation,
523
+ "reflection": reflection,
524
+ "score": score,
525
+ "iteration": iteration,
526
+ }
527
+
528
+ def run(
529
+ self, tasks: List[str], include_intermediates: bool = False
530
+ ) -> List[Any]:
531
+ """
532
+ Execute the Reflexion process for a list of tasks.
533
+
534
+ Args:
535
+ tasks (List[str]): List of tasks to process
536
+ include_intermediates (bool): Whether to include intermediate iterations in results
537
+
538
+ Returns:
539
+ List[Any]: Final responses or complete iteration history
540
+ """
541
+ all_results = []
542
+
543
+ for task_idx, task in enumerate(tasks):
544
+ logger.info(f"Processing task {task_idx+1}/{len(tasks)}")
545
+
546
+ iterations = []
547
+ best_response = None
548
+ best_score = -1
549
+
550
+ # Run through multiple iterations of reflection
551
+ for iteration in range(self.max_loops):
552
+ logger.debug(
553
+ f"Starting iteration {iteration+1}/{self.max_loops}"
554
+ )
555
+
556
+ # In first iteration, generate new response
557
+ # In subsequent iterations, refine previous response
558
+ if iteration == 0:
559
+ step_result = self.step(task, iteration)
560
+ step_result["response"]
561
+ else:
562
+ # Refine previous response
563
+ prev_result = iterations[-1]
564
+ refined_response = self.refine(
565
+ task,
566
+ prev_result["response"],
567
+ prev_result["evaluation"],
568
+ prev_result["reflection"],
569
+ )
570
+
571
+ # Evaluate and reflect on the refined response
572
+ step_result = self.step(
573
+ task, iteration, refined_response
574
+ )
575
+
576
+ iterations.append(step_result)
577
+
578
+ # Track best response based on evaluation score
579
+ if step_result["score"] > best_score:
580
+ best_response = step_result["response"]
581
+ best_score = step_result["score"]
582
+
583
+ # If score is very high, we can stop early
584
+ if step_result["score"] > 0.9:
585
+ logger.debug(
586
+ f"Score {step_result['score']} exceeds threshold. Stopping early."
587
+ )
588
+ break
589
+
590
+ # Add to conversation history (simplified)
591
+ self.conversation.add("user", task)
592
+ self.conversation.add("assistant", best_response)
593
+
594
+ # Determine what to return
595
+ if include_intermediates:
596
+ all_results.append(iterations)
597
+ else:
598
+ all_results.append(best_response)
599
+
600
+ return all_results
601
+
602
+
603
+ # # Example usage
604
+ # if __name__ == "__main__":
605
+ # # Initialize the Reflexion Agent
606
+ # agent = ReflexionAgent(
607
+ # agent_name="reflexion-agent",
608
+ # model_name="gpt-4o", # Using OpenAI's model
609
+ # max_loops=1, # Maximum of 3 reflection iterations
610
+ # )
611
+
612
+ # # Example tasks
613
+ # tasks = [
614
+ # "Explain QFT to a high school student.",
615
+ # ]
616
+
617
+ # # Run the agent
618
+ # results = agent.run(tasks)
619
+
620
+ # # Print results
621
+ # for i, result in enumerate(results):
622
+ # print(f"\n\nTASK {i+1}:")
623
+ # print(f"{tasks[i]}\n")
624
+ # print("FINAL RESPONSE:")
625
+ # print(f"{result}")