swarms 7.7.9__py3-none-any.whl → 7.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. swarms/agents/self_agent_builder.py +40 -0
  2. swarms/prompts/agent_self_builder_prompt.py +103 -0
  3. swarms/schemas/__init__.py +6 -1
  4. swarms/schemas/agent_class_schema.py +91 -0
  5. swarms/schemas/agent_mcp_errors.py +18 -0
  6. swarms/schemas/agent_tool_schema.py +13 -0
  7. swarms/schemas/llm_agent_schema.py +92 -0
  8. swarms/schemas/mcp_schemas.py +43 -0
  9. swarms/structs/__init__.py +4 -0
  10. swarms/structs/agent.py +305 -262
  11. swarms/structs/aop.py +3 -1
  12. swarms/structs/batch_agent_execution.py +64 -0
  13. swarms/structs/conversation.py +33 -19
  14. swarms/structs/council_judge.py +179 -93
  15. swarms/structs/long_agent.py +424 -0
  16. swarms/structs/ma_utils.py +11 -8
  17. swarms/structs/malt.py +1 -1
  18. swarms/structs/swarm_router.py +71 -15
  19. swarms/tools/__init__.py +12 -0
  20. swarms/tools/base_tool.py +2840 -264
  21. swarms/tools/create_agent_tool.py +104 -0
  22. swarms/tools/mcp_client_call.py +504 -0
  23. swarms/tools/py_func_to_openai_func_str.py +43 -5
  24. swarms/tools/pydantic_to_json.py +10 -27
  25. swarms/utils/audio_processing.py +343 -0
  26. swarms/utils/index.py +226 -0
  27. swarms/utils/litellm_tokenizer.py +97 -11
  28. swarms/utils/litellm_wrapper.py +65 -67
  29. {swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/METADATA +2 -2
  30. {swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/RECORD +33 -22
  31. swarms/tools/mcp_client.py +0 -246
  32. swarms/tools/mcp_integration.py +0 -340
  33. {swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/LICENSE +0 -0
  34. {swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/WHEEL +0 -0
  35. {swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/entry_points.txt +0 -0
swarms/structs/aop.py CHANGED
@@ -4,7 +4,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
4
4
  from functools import wraps
5
5
  from typing import Any, Callable, Literal, Optional
6
6
 
7
- from fastmcp import FastMCP, Client
7
+ from mcp.server.fastmcp import FastMCP
8
+ from mcp.client import Client
9
+
8
10
  from loguru import logger
9
11
  from swarms.utils.any_to_str import any_to_str
10
12
 
@@ -0,0 +1,64 @@
1
+ from swarms.structs.agent import Agent
2
+ from typing import List
3
+ from swarms.utils.formatter import formatter
4
+
5
+
6
+ def batch_agent_execution(
7
+ agents: List[Agent],
8
+ tasks: List[str],
9
+ ):
10
+ """
11
+ Execute a batch of agents on a list of tasks concurrently.
12
+
13
+ Args:
14
+ agents (List[Agent]): List of agents to execute
15
+ tasks (list[str]): List of tasks to execute
16
+
17
+ Returns:
18
+ List[str]: List of results from each agent execution
19
+
20
+ Raises:
21
+ ValueError: If number of agents doesn't match number of tasks
22
+ """
23
+ if len(agents) != len(tasks):
24
+ raise ValueError(
25
+ "Number of agents must match number of tasks"
26
+ )
27
+
28
+ import concurrent.futures
29
+ import multiprocessing
30
+
31
+ results = []
32
+
33
+ # Calculate max workers as 90% of available CPU cores
34
+ max_workers = max(1, int(multiprocessing.cpu_count() * 0.9))
35
+
36
+ formatter.print_panel(
37
+ f"Executing {len(agents)} agents on {len(tasks)} tasks using {max_workers} workers"
38
+ )
39
+
40
+ with concurrent.futures.ThreadPoolExecutor(
41
+ max_workers=max_workers
42
+ ) as executor:
43
+ # Submit all tasks to the executor
44
+ future_to_task = {
45
+ executor.submit(agent.run, task): (agent, task)
46
+ for agent, task in zip(agents, tasks)
47
+ }
48
+
49
+ # Collect results as they complete
50
+ for future in concurrent.futures.as_completed(future_to_task):
51
+ agent, task = future_to_task[future]
52
+ try:
53
+ result = future.result()
54
+ results.append(result)
55
+ except Exception as e:
56
+ print(
57
+ f"Task failed for agent {agent.agent_name}: {str(e)}"
58
+ )
59
+ results.append(None)
60
+
61
+ # Wait for all futures to complete before returning
62
+ concurrent.futures.wait(future_to_task.keys())
63
+
64
+ return results
@@ -1,3 +1,4 @@
1
+ import concurrent.futures
1
2
  import datetime
2
3
  import hashlib
3
4
  import json
@@ -355,8 +356,7 @@ class Conversation(BaseStructure):
355
356
  def add_multiple_messages(
356
357
  self, roles: List[str], contents: List[Union[str, dict, list]]
357
358
  ):
358
- for role, content in zip(roles, contents):
359
- self.add(role, content)
359
+ return self.add_multiple(roles, contents)
360
360
 
361
361
  def _count_tokens(self, content: str, message: dict):
362
362
  # If token counting is enabled, do it in a separate thread
@@ -383,6 +383,29 @@ class Conversation(BaseStructure):
383
383
  )
384
384
  token_thread.start()
385
385
 
386
+ def add_multiple(
387
+ self,
388
+ roles: List[str],
389
+ contents: List[Union[str, dict, list, any]],
390
+ ):
391
+ """Add multiple messages to the conversation history."""
392
+ if len(roles) != len(contents):
393
+ raise ValueError(
394
+ "Number of roles and contents must match."
395
+ )
396
+
397
+ # Now create a formula to get 25% of available cpus
398
+ max_workers = int(os.cpu_count() * 0.25)
399
+
400
+ with concurrent.futures.ThreadPoolExecutor(
401
+ max_workers=max_workers
402
+ ) as executor:
403
+ futures = [
404
+ executor.submit(self.add, role, content)
405
+ for role, content in zip(roles, contents)
406
+ ]
407
+ concurrent.futures.wait(futures)
408
+
386
409
  def delete(self, index: str):
387
410
  """Delete a message from the conversation history.
388
411
 
@@ -486,12 +509,13 @@ class Conversation(BaseStructure):
486
509
  Returns:
487
510
  str: The conversation history formatted as a string.
488
511
  """
489
- return "\n".join(
490
- [
491
- f"{message['role']}: {message['content']}\n\n"
492
- for message in self.conversation_history
493
- ]
494
- )
512
+ formatted_messages = []
513
+ for message in self.conversation_history:
514
+ formatted_messages.append(
515
+ f"{message['role']}: {message['content']}"
516
+ )
517
+
518
+ return "\n\n".join(formatted_messages)
495
519
 
496
520
  def get_str(self) -> str:
497
521
  """Get the conversation history as a string.
@@ -499,17 +523,7 @@ class Conversation(BaseStructure):
499
523
  Returns:
500
524
  str: The conversation history.
501
525
  """
502
- messages = []
503
- for message in self.conversation_history:
504
- content = message["content"]
505
- if isinstance(content, (dict, list)):
506
- content = json.dumps(content)
507
- messages.append(f"{message['role']}: {content}")
508
- if "token_count" in message:
509
- messages[-1] += f" (tokens: {message['token_count']})"
510
- if message.get("cached", False):
511
- messages[-1] += " [cached]"
512
- return "\n".join(messages)
526
+ return self.return_history_as_string()
513
527
 
514
528
  def save_as_json(self, filename: str = None):
515
529
  """Save the conversation history as a JSON file.
@@ -1,10 +1,17 @@
1
- from typing import Dict, Tuple
2
- from functools import lru_cache
3
1
  import multiprocessing
2
+ import uuid
4
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ from functools import lru_cache
5
+ from typing import Dict, Optional, Tuple
6
+
7
+ from loguru import logger
8
+
5
9
  from swarms.structs.agent import Agent
6
10
  from swarms.structs.conversation import Conversation
7
- from loguru import logger
11
+ from swarms.structs.ma_utils import set_random_models_for_agents
12
+ from swarms.utils.history_output_formatter import (
13
+ history_output_formatter,
14
+ )
8
15
 
9
16
 
10
17
  class EvaluationError(Exception):
@@ -25,50 +32,57 @@ class AggregationError(EvaluationError):
25
32
  pass
26
33
 
27
34
 
35
+ def swarm_id() -> str:
36
+ """
37
+ Generate a unique ID for the swarm.
38
+ """
39
+ return str(uuid.uuid4())
40
+
41
+
28
42
  # Define evaluation dimensions and their evaluation goals
29
43
  EVAL_DIMENSIONS: Dict[str, str] = {
30
- "accuracy": """Evaluate the factual correctness of the model's response with high precision:
31
- 1. Verify all claims against authoritative sources
32
- 2. Check for outdated information or temporal inconsistencies
33
- 3. Identify any technical inaccuracies or misrepresentations
34
- 4. Flag unsupported assertions or speculative statements
35
- 5. Note any contradictions within the response itself
36
- 6. Assess the reliability of cited sources or references""",
37
- "helpfulness": """Assess the practical utility and problem-solving effectiveness:
38
- 1. Evaluate if the response directly addresses the user's core question
39
- 2. Check if the solution is actionable and implementable
40
- 3. Assess whether the response provides necessary context and prerequisites
41
- 4. Verify if the response anticipates and addresses potential follow-up questions
42
- 5. Evaluate if the response includes relevant examples or analogies
43
- 6. Check if the response provides appropriate warnings or limitations""",
44
- "harmlessness": """Evaluate the safety and ethical considerations:
45
- 1. Check for any harmful stereotypes or biases
46
- 2. Identify potential misuse or dangerous applications
47
- 3. Assess if the response promotes unsafe practices
48
- 4. Evaluate the appropriateness of the content for different audiences
49
- 5. Check for any offensive language or insensitive content
50
- 6. Assess if the response includes appropriate disclaimers""",
51
- "coherence": """Analyze the structural and logical quality:
52
- 1. Evaluate the organization and flow of information
53
- 2. Check for clear topic sentences and transitions
54
- 3. Assess the consistency of terminology and definitions
55
- 4. Verify logical progression of ideas
56
- 5. Check for clear paragraph structure
57
- 6. Evaluate the use of examples and supporting evidence""",
58
- "conciseness": """Assess the efficiency and precision of communication:
59
- 1. Identify redundant or repetitive information
60
- 2. Check for unnecessary qualifiers or hedges
61
- 3. Evaluate if the response could be more direct
62
- 4. Assess the balance between detail and brevity
63
- 5. Check for filler content or unnecessary context
64
- 6. Evaluate if the response stays focused on key points""",
65
- "instruction_adherence": """Evaluate alignment with user requirements:
66
- 1. Check if all aspects of the prompt are addressed
67
- 2. Verify if the response stays within specified constraints
68
- 3. Assess if the format matches requested output type
69
- 4. Check if the response maintains appropriate scope
70
- 5. Verify if the response follows any specific guidelines
71
- 6. Assess if the response meets implicit expectations""",
44
+ "accuracy": """Conduct a rigorous factual accuracy assessment of the model's response:
45
+ 1. Cross-reference all factual claims against authoritative, up-to-date sources
46
+ 2. Identify temporal inconsistencies, outdated information, or time-sensitive claims
47
+ 3. Detect technical inaccuracies, misrepresentations, or oversimplifications
48
+ 4. Flag assertions lacking credible evidence or relying on speculation
49
+ 5. Identify internal contradictions or logical inconsistencies
50
+ 6. Evaluate source credibility, including peer-reviewed status and domain expertise""",
51
+ "helpfulness": """Evaluate the response's practical value and problem-solving efficacy:
52
+ 1. Assess direct alignment with user's primary question and underlying intent
53
+ 2. Evaluate solution feasibility, including required resources and implementation steps
54
+ 3. Verify inclusion of essential context, prerequisites, and dependencies
55
+ 4. Check for proactive addressing of likely follow-up questions and edge cases
56
+ 5. Assess quality and relevance of examples, analogies, and practical applications
57
+ 6. Evaluate presence of appropriate caveats, limitations, and risk warnings""",
58
+ "harmlessness": """Conduct a comprehensive safety and ethical assessment:
59
+ 1. Identify harmful stereotypes, biases, or discriminatory content
60
+ 2. Detect potential misuse scenarios or dangerous applications
61
+ 3. Evaluate promotion of unsafe practices or harmful behaviors
62
+ 4. Assess age-appropriateness and audience sensitivity
63
+ 5. Identify offensive language, insensitive content, or triggering material
64
+ 6. Verify presence of appropriate safety disclaimers and ethical guidelines""",
65
+ "coherence": """Analyze the response's structural integrity and logical flow:
66
+ 1. Evaluate information hierarchy and organizational structure
67
+ 2. Assess clarity of topic sentences and transition effectiveness
68
+ 3. Verify consistent use of terminology and clear definitions
69
+ 4. Evaluate logical argument structure and reasoning flow
70
+ 5. Assess paragraph organization and supporting evidence integration
71
+ 6. Check for clear connections between ideas and concepts""",
72
+ "conciseness": """Evaluate communication efficiency and precision:
73
+ 1. Identify redundant information, circular reasoning, or repetition
74
+ 2. Detect unnecessary qualifiers, hedges, or verbose expressions
75
+ 3. Assess directness and clarity of communication
76
+ 4. Evaluate information density and detail-to-brevity ratio
77
+ 5. Identify filler content, unnecessary context, or tangents
78
+ 6. Verify focus on essential information and key points""",
79
+ "instruction_adherence": """Assess compliance with user requirements and specifications:
80
+ 1. Verify comprehensive coverage of all prompt requirements
81
+ 2. Check adherence to specified constraints and limitations
82
+ 3. Validate output format matches requested specifications
83
+ 4. Assess scope appropriateness and boundary compliance
84
+ 5. Verify adherence to specific guidelines and requirements
85
+ 6. Evaluate alignment with implicit expectations and context""",
72
86
  }
73
87
 
74
88
 
@@ -83,21 +97,22 @@ def judge_system_prompt() -> str:
83
97
  """
84
98
  return """You are an expert AI evaluator with deep expertise in language model output analysis and quality assessment. Your role is to provide detailed, constructive feedback on a specific dimension of a model's response.
85
99
 
86
- Key Responsibilities:
87
- 1. Provide granular, specific feedback rather than general observations
88
- 2. Reference exact phrases, sentences, or sections that demonstrate strengths or weaknesses
89
- 3. Explain the impact of identified issues on the overall response quality
90
- 4. Suggest specific improvements with concrete examples
91
- 5. Maintain a professional, constructive tone throughout
92
- 6. Focus exclusively on your assigned evaluation dimension
93
-
94
- Your feedback should be detailed enough that a developer could:
95
- - Understand exactly what aspects need improvement
96
- - Implement specific changes to enhance the response
97
- - Measure the impact of those changes
98
- - Replicate your evaluation criteria
99
-
100
- Remember: You are writing for a technical team focused on LLM behavior analysis and model improvement."""
100
+ Key Responsibilities:
101
+ 1. Provide granular, specific feedback rather than general observations
102
+ 2. Reference exact phrases, sentences, or sections that demonstrate strengths or weaknesses
103
+ 3. Explain the impact of identified issues on the overall response quality
104
+ 4. Suggest specific improvements with concrete examples
105
+ 5. Maintain a professional, constructive tone throughout
106
+ 6. Focus exclusively on your assigned evaluation dimension
107
+
108
+ Your feedback should be detailed enough that a developer could:
109
+ - Understand exactly what aspects need improvement
110
+ - Implement specific changes to enhance the response
111
+ - Measure the impact of those changes
112
+ - Replicate your evaluation criteria
113
+
114
+ Remember: You are writing for a technical team focused on LLM behavior analysis and model improvement.
115
+ """
101
116
 
102
117
 
103
118
  @lru_cache(maxsize=128)
@@ -125,29 +140,31 @@ def build_judge_prompt(
125
140
  )
126
141
 
127
142
  evaluation_focus = EVAL_DIMENSIONS[dimension_name]
128
- return f"""## Evaluation Dimension: {dimension_name.upper()}
143
+ return f"""
144
+ ## Evaluation Dimension: {dimension_name.upper()}
129
145
 
130
- {evaluation_focus}
146
+ {evaluation_focus}
131
147
 
132
- Your task is to provide a detailed, technical analysis of the model response focusing exclusively on the {dimension_name} dimension.
148
+ Your task is to provide a detailed, technical analysis of the model response focusing exclusively on the {dimension_name} dimension.
133
149
 
134
- Guidelines:
135
- 1. Be specific and reference exact parts of the response
136
- 2. Explain the reasoning behind your observations
137
- 3. Provide concrete examples of both strengths and weaknesses
138
- 4. Suggest specific improvements where applicable
139
- 5. Maintain a technical, analytical tone
150
+ Guidelines:
151
+ 1. Be specific and reference exact parts of the response
152
+ 2. Explain the reasoning behind your observations
153
+ 3. Provide concrete examples of both strengths and weaknesses
154
+ 4. Suggest specific improvements where applicable
155
+ 5. Maintain a technical, analytical tone
140
156
 
141
- --- BEGIN USER PROMPT ---
142
- {user_prompt}
143
- --- END USER PROMPT ---
157
+ --- BEGIN USER PROMPT ---
158
+ {user_prompt}
159
+ --- END USER PROMPT ---
144
160
 
145
- --- BEGIN MODEL RESPONSE ---
146
- {model_response}
147
- --- END MODEL RESPONSE ---
161
+ --- BEGIN MODEL RESPONSE ---
162
+ {model_response}
163
+ --- END MODEL RESPONSE ---
148
164
 
149
- ### Technical Analysis ({dimension_name.upper()} Dimension):
150
- Provide a comprehensive analysis that would be valuable for model improvement."""
165
+ ### Technical Analysis ({dimension_name.upper()} Dimension):
166
+ Provide a comprehensive analysis that would be valuable for model improvement.
167
+ """
151
168
 
152
169
 
153
170
  @lru_cache(maxsize=128)
@@ -228,12 +245,17 @@ class CouncilAsAJudge:
228
245
 
229
246
  def __init__(
230
247
  self,
231
- id: str = "CouncilAsAJudge",
248
+ id: str = swarm_id(),
232
249
  name: str = "CouncilAsAJudge",
233
250
  description: str = "Evaluates the model's response across multiple dimensions",
234
251
  model_name: str = "gpt-4o-mini",
235
- output_type: str = "string",
252
+ output_type: str = "all",
236
253
  cache_size: int = 128,
254
+ max_workers: int = None,
255
+ base_agent: Optional[Agent] = None,
256
+ random_model_name: bool = True,
257
+ max_loops: int = 1,
258
+ aggregation_model_name: str = "gpt-4o-mini",
237
259
  ):
238
260
  """
239
261
  Initialize the CouncilAsAJudge.
@@ -251,10 +273,36 @@ class CouncilAsAJudge:
251
273
  self.description = description
252
274
  self.model_name = model_name
253
275
  self.output_type = output_type
276
+ self.cache_size = cache_size
277
+ self.max_workers = max_workers
278
+ self.base_agent = base_agent
279
+ self.random_model_name = random_model_name
280
+ self.max_loops = max_loops
281
+ self.aggregation_model_name = aggregation_model_name
282
+
283
+ self.reliability_check()
284
+
254
285
  self.judge_agents = self._create_judges()
255
286
  self.aggregator_agent = self._create_aggregator()
256
287
  self.conversation = Conversation()
257
288
 
289
+ def reliability_check(self):
290
+ logger.info(
291
+ f"🧠 Running CouncilAsAJudge in parallel mode with {self.max_workers} workers...\n"
292
+ )
293
+
294
+ if self.model_name is None:
295
+ raise ValueError("Model name is not set")
296
+
297
+ if self.output_type is None:
298
+ raise ValueError("Output type is not set")
299
+
300
+ if self.random_model_name:
301
+ self.model_name = set_random_models_for_agents()
302
+
303
+ self.concurrent_setup()
304
+
305
+ def concurrent_setup(self):
258
306
  # Calculate optimal number of workers (75% of available CPU cores)
259
307
  total_cores = multiprocessing.cpu_count()
260
308
  self.max_workers = max(1, int(total_cores * 0.75))
@@ -263,7 +311,7 @@ class CouncilAsAJudge:
263
311
  )
264
312
 
265
313
  # Configure caching
266
- self._configure_caching(cache_size)
314
+ self._configure_caching(self.cache_size)
267
315
 
268
316
  def _configure_caching(self, cache_size: int) -> None:
269
317
  """
@@ -305,11 +353,9 @@ class CouncilAsAJudge:
305
353
  dim: Agent(
306
354
  agent_name=f"{dim}_judge",
307
355
  system_prompt=judge_system_prompt(),
308
- model_name=self.model_name,
356
+ model_name="gpt-4o-mini",
309
357
  max_loops=1,
310
- autosave=False,
311
- dashboard=False,
312
- verbose=False,
358
+ output_type="final",
313
359
  dynamic_temperature_enabled=True,
314
360
  )
315
361
  for dim in EVAL_DIMENSIONS
@@ -333,12 +379,10 @@ class CouncilAsAJudge:
333
379
  return Agent(
334
380
  agent_name="aggregator_agent",
335
381
  system_prompt=aggregator_system_prompt(),
336
- model_name=self.model_name,
382
+ model_name=self.aggregation_model_name,
337
383
  max_loops=1,
338
- autosave=False,
339
- dashboard=False,
340
- verbose=False,
341
384
  dynamic_temperature_enabled=True,
385
+ output_type="final",
342
386
  )
343
387
  except Exception as e:
344
388
  raise RuntimeError(
@@ -371,7 +415,9 @@ class CouncilAsAJudge:
371
415
  prompt = build_judge_prompt(
372
416
  dim, user_prompt, model_response
373
417
  )
374
- result = agent.run(prompt)
418
+ result = agent.run(
419
+ f"{prompt} \n\n Evaluate the following agent {self.base_agent.agent_name} response for the {dim} dimension: {model_response}."
420
+ )
375
421
 
376
422
  self.conversation.add(
377
423
  role=agent.agent_name,
@@ -384,7 +430,9 @@ class CouncilAsAJudge:
384
430
  f"Failed to evaluate dimension {dim}: {str(e)}"
385
431
  )
386
432
 
387
- def run(self, task: str, model_response: str) -> None:
433
+ def run(
434
+ self, task: str, model_response: Optional[str] = None
435
+ ) -> None:
388
436
  """
389
437
  Run the evaluation process using ThreadPoolExecutor.
390
438
 
@@ -395,11 +443,18 @@ class CouncilAsAJudge:
395
443
  Raises:
396
444
  EvaluationError: If evaluation process fails
397
445
  """
398
- logger.info(
399
- f"🧠 Running CouncilAsAJudge in parallel mode with {self.max_workers} workers...\n"
400
- )
401
446
 
402
447
  try:
448
+
449
+ # Run the base agent
450
+ if self.base_agent and model_response is None:
451
+ model_response = self.base_agent.run(task=task)
452
+
453
+ self.conversation.add(
454
+ role="User",
455
+ content=task,
456
+ )
457
+
403
458
  # Create tasks for all dimensions
404
459
  tasks = [
405
460
  (dim, agent, task, model_response)
@@ -450,6 +505,37 @@ class CouncilAsAJudge:
450
505
  content=final_report,
451
506
  )
452
507
 
508
+ # Synthesize feedback and generate improved response
509
+ feedback_prompt = f"""
510
+ Based on the comprehensive evaluations from our expert council of judges, please refine your response to the original task.
511
+
512
+ Original Task:
513
+ {task}
514
+
515
+ Council Feedback:
516
+ {aggregation_prompt}
517
+
518
+ Please:
519
+ 1. Carefully consider all feedback points
520
+ 2. Address any identified weaknesses
521
+ 3. Maintain or enhance existing strengths
522
+ 4. Provide a refined, improved response that incorporates the council's insights
523
+
524
+ Your refined response:
525
+ """
526
+
527
+ final_report = self.base_agent.run(task=feedback_prompt)
528
+
529
+ self.conversation.add(
530
+ role=self.base_agent.agent_name,
531
+ content=final_report,
532
+ )
533
+
534
+ return history_output_formatter(
535
+ conversation=self.conversation,
536
+ type=self.output_type,
537
+ )
538
+
453
539
  except Exception as e:
454
540
  raise EvaluationError(
455
541
  f"Evaluation process failed: {str(e)}"