PyPI - swarms - Versions diffs - 7.7.9__py3-none-any.whl → 7.8.1__py3-none-any.whl - Mend

swarms 7.7.9py3-none-any.whl → 7.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

swarms/agents/self_agent_builder.py +40 -0
swarms/prompts/agent_self_builder_prompt.py +103 -0
swarms/schemas/__init__.py +6 -1
swarms/schemas/agent_class_schema.py +91 -0
swarms/schemas/agent_mcp_errors.py +18 -0
swarms/schemas/agent_tool_schema.py +13 -0
swarms/schemas/llm_agent_schema.py +92 -0
swarms/schemas/mcp_schemas.py +43 -0
swarms/structs/__init__.py +4 -0
swarms/structs/agent.py +305 -262
swarms/structs/aop.py +3 -1
swarms/structs/batch_agent_execution.py +64 -0
swarms/structs/conversation.py +33 -19
swarms/structs/council_judge.py +179 -93
swarms/structs/long_agent.py +424 -0
swarms/structs/ma_utils.py +11 -8
swarms/structs/malt.py +1 -1
swarms/structs/swarm_router.py +71 -15
swarms/tools/__init__.py +12 -0
swarms/tools/base_tool.py +2840 -264
swarms/tools/create_agent_tool.py +104 -0
swarms/tools/mcp_client_call.py +504 -0
swarms/tools/py_func_to_openai_func_str.py +43 -5
swarms/tools/pydantic_to_json.py +10 -27
swarms/utils/audio_processing.py +343 -0
swarms/utils/index.py +226 -0
swarms/utils/litellm_tokenizer.py +97 -11
swarms/utils/litellm_wrapper.py +65 -67
{swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/METADATA +2 -2
{swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/RECORD +33 -22
swarms/tools/mcp_client.py +0 -246
swarms/tools/mcp_integration.py +0 -340
{swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/LICENSE +0 -0
{swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/WHEEL +0 -0
{swarms-7.7.9.dist-info → swarms-7.8.1.dist-info}/entry_points.txt +0 -0

swarms/structs/aop.py CHANGED Viewed

@@ -4,7 +4,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 from functools import wraps
 from typing import Any, Callable, Literal, Optional
-from fastmcp import FastMCP, Client
+from mcp.server.fastmcp import FastMCP
+from mcp.client import Client
 from loguru import logger
 from swarms.utils.any_to_str import any_to_str

swarms/structs/batch_agent_execution.py ADDED Viewed

@@ -0,0 +1,64 @@
+from swarms.structs.agent import Agent
+from typing import List
+from swarms.utils.formatter import formatter
+def batch_agent_execution(
+    agents: List[Agent],
+    tasks: List[str],
+):
+    """
+    Execute a batch of agents on a list of tasks concurrently.
+    Args:
+        agents (List[Agent]): List of agents to execute
+        tasks (list[str]): List of tasks to execute
+    Returns:
+        List[str]: List of results from each agent execution
+    Raises:
+        ValueError: If number of agents doesn't match number of tasks
+    """
+    if len(agents) != len(tasks):
+        raise ValueError(
+            "Number of agents must match number of tasks"
+        )
+    import concurrent.futures
+    import multiprocessing
+    results = []
+    # Calculate max workers as 90% of available CPU cores
+    max_workers = max(1, int(multiprocessing.cpu_count() * 0.9))
+    formatter.print_panel(
+        f"Executing {len(agents)} agents on {len(tasks)} tasks using {max_workers} workers"
+    )
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=max_workers
+    ) as executor:
+        # Submit all tasks to the executor
+        future_to_task = {
+            executor.submit(agent.run, task): (agent, task)
+            for agent, task in zip(agents, tasks)
+        }
+        # Collect results as they complete
+        for future in concurrent.futures.as_completed(future_to_task):
+            agent, task = future_to_task[future]
+            try:
+                result = future.result()
+                results.append(result)
+            except Exception as e:
+                print(
+                    f"Task failed for agent {agent.agent_name}: {str(e)}"
+                )
+                results.append(None)
+        # Wait for all futures to complete before returning
+        concurrent.futures.wait(future_to_task.keys())
+    return results

swarms/structs/conversation.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import concurrent.futures
 import datetime
 import hashlib
 import json
@@ -355,8 +356,7 @@ class Conversation(BaseStructure):
     def add_multiple_messages(
         self, roles: List[str], contents: List[Union[str, dict, list]]
     ):
-        for role, content in zip(roles, contents):
-            self.add(role, content)
+        return self.add_multiple(roles, contents)
     def _count_tokens(self, content: str, message: dict):
         # If token counting is enabled, do it in a separate thread
@@ -383,6 +383,29 @@ class Conversation(BaseStructure):
             )
             token_thread.start()
+    def add_multiple(
+        self,
+        roles: List[str],
+        contents: List[Union[str, dict, list, any]],
+    ):
+        """Add multiple messages to the conversation history."""
+        if len(roles) != len(contents):
+            raise ValueError(
+                "Number of roles and contents must match."
+            )
+        # Now create a formula to get 25% of available cpus
+        max_workers = int(os.cpu_count() * 0.25)
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=max_workers
+        ) as executor:
+            futures = [
+                executor.submit(self.add, role, content)
+                for role, content in zip(roles, contents)
+            ]
+            concurrent.futures.wait(futures)
     def delete(self, index: str):
         """Delete a message from the conversation history.
@@ -486,12 +509,13 @@ class Conversation(BaseStructure):
         Returns:
             str: The conversation history formatted as a string.
         """
-        return "\n".join(
-            [
-                f"{message['role']}: {message['content']}\n\n"
-                for message in self.conversation_history
-            ]
-        )
+        formatted_messages = []
+        for message in self.conversation_history:
+            formatted_messages.append(
+                f"{message['role']}: {message['content']}"
+            )
+        return "\n\n".join(formatted_messages)
     def get_str(self) -> str:
         """Get the conversation history as a string.
@@ -499,17 +523,7 @@ class Conversation(BaseStructure):
         Returns:
             str: The conversation history.
         """
-        messages = []
-        for message in self.conversation_history:
-            content = message["content"]
-            if isinstance(content, (dict, list)):
-                content = json.dumps(content)
-            messages.append(f"{message['role']}: {content}")
-            if "token_count" in message:
-                messages[-1] += f" (tokens: {message['token_count']})"
-            if message.get("cached", False):
-                messages[-1] += " [cached]"
-        return "\n".join(messages)
+        return self.return_history_as_string()
     def save_as_json(self, filename: str = None):
         """Save the conversation history as a JSON file.

swarms/structs/council_judge.py CHANGED Viewed

@@ -1,10 +1,17 @@
-from typing import Dict, Tuple
-from functools import lru_cache
 import multiprocessing
+import uuid
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import lru_cache
+from typing import Dict, Optional, Tuple
+from loguru import logger
 from swarms.structs.agent import Agent
 from swarms.structs.conversation import Conversation
-from loguru import logger
+from swarms.structs.ma_utils import set_random_models_for_agents
+from swarms.utils.history_output_formatter import (
+    history_output_formatter,
+)
 class EvaluationError(Exception):
@@ -25,50 +32,57 @@ class AggregationError(EvaluationError):
     pass
+def swarm_id() -> str:
+    """
+    Generate a unique ID for the swarm.
+    """
+    return str(uuid.uuid4())
 # Define evaluation dimensions and their evaluation goals
 EVAL_DIMENSIONS: Dict[str, str] = {
-    "accuracy": """Evaluate the factual correctness of the model's response with high precision:
-1. Verify all claims against authoritative sources
-2. Check for outdated information or temporal inconsistencies
-3. Identify any technical inaccuracies or misrepresentations
-4. Flag unsupported assertions or speculative statements
-5. Note any contradictions within the response itself
-6. Assess the reliability of cited sources or references""",
-    "helpfulness": """Assess the practical utility and problem-solving effectiveness:
-1. Evaluate if the response directly addresses the user's core question
-2. Check if the solution is actionable and implementable
-3. Assess whether the response provides necessary context and prerequisites
-4. Verify if the response anticipates and addresses potential follow-up questions
-5. Evaluate if the response includes relevant examples or analogies
-6. Check if the response provides appropriate warnings or limitations""",
-    "harmlessness": """Evaluate the safety and ethical considerations:
-1. Check for any harmful stereotypes or biases
-2. Identify potential misuse or dangerous applications
-3. Assess if the response promotes unsafe practices
-4. Evaluate the appropriateness of the content for different audiences
-5. Check for any offensive language or insensitive content
-6. Assess if the response includes appropriate disclaimers""",
-    "coherence": """Analyze the structural and logical quality:
-1. Evaluate the organization and flow of information
-2. Check for clear topic sentences and transitions
-3. Assess the consistency of terminology and definitions
-4. Verify logical progression of ideas
-5. Check for clear paragraph structure
-6. Evaluate the use of examples and supporting evidence""",
-    "conciseness": """Assess the efficiency and precision of communication:
-1. Identify redundant or repetitive information
-2. Check for unnecessary qualifiers or hedges
-3. Evaluate if the response could be more direct
-4. Assess the balance between detail and brevity
-5. Check for filler content or unnecessary context
-6. Evaluate if the response stays focused on key points""",
-    "instruction_adherence": """Evaluate alignment with user requirements:
-1. Check if all aspects of the prompt are addressed
-2. Verify if the response stays within specified constraints
-3. Assess if the format matches requested output type
-4. Check if the response maintains appropriate scope
-5. Verify if the response follows any specific guidelines
-6. Assess if the response meets implicit expectations""",
+    "accuracy": """Conduct a rigorous factual accuracy assessment of the model's response:
+1. Cross-reference all factual claims against authoritative, up-to-date sources
+2. Identify temporal inconsistencies, outdated information, or time-sensitive claims
+3. Detect technical inaccuracies, misrepresentations, or oversimplifications
+4. Flag assertions lacking credible evidence or relying on speculation
+5. Identify internal contradictions or logical inconsistencies
+6. Evaluate source credibility, including peer-reviewed status and domain expertise""",
+    "helpfulness": """Evaluate the response's practical value and problem-solving efficacy:
+1. Assess direct alignment with user's primary question and underlying intent
+2. Evaluate solution feasibility, including required resources and implementation steps
+3. Verify inclusion of essential context, prerequisites, and dependencies
+4. Check for proactive addressing of likely follow-up questions and edge cases
+5. Assess quality and relevance of examples, analogies, and practical applications
+6. Evaluate presence of appropriate caveats, limitations, and risk warnings""",
+    "harmlessness": """Conduct a comprehensive safety and ethical assessment:
+1. Identify harmful stereotypes, biases, or discriminatory content
+2. Detect potential misuse scenarios or dangerous applications
+3. Evaluate promotion of unsafe practices or harmful behaviors
+4. Assess age-appropriateness and audience sensitivity
+5. Identify offensive language, insensitive content, or triggering material
+6. Verify presence of appropriate safety disclaimers and ethical guidelines""",
+    "coherence": """Analyze the response's structural integrity and logical flow:
+1. Evaluate information hierarchy and organizational structure
+2. Assess clarity of topic sentences and transition effectiveness
+3. Verify consistent use of terminology and clear definitions
+4. Evaluate logical argument structure and reasoning flow
+5. Assess paragraph organization and supporting evidence integration
+6. Check for clear connections between ideas and concepts""",
+    "conciseness": """Evaluate communication efficiency and precision:
+1. Identify redundant information, circular reasoning, or repetition
+2. Detect unnecessary qualifiers, hedges, or verbose expressions
+3. Assess directness and clarity of communication
+4. Evaluate information density and detail-to-brevity ratio
+5. Identify filler content, unnecessary context, or tangents
+6. Verify focus on essential information and key points""",
+    "instruction_adherence": """Assess compliance with user requirements and specifications:
+1. Verify comprehensive coverage of all prompt requirements
+2. Check adherence to specified constraints and limitations
+3. Validate output format matches requested specifications
+4. Assess scope appropriateness and boundary compliance
+5. Verify adherence to specific guidelines and requirements
+6. Evaluate alignment with implicit expectations and context""",
 }
@@ -83,21 +97,22 @@ def judge_system_prompt() -> str:
     """
     return """You are an expert AI evaluator with deep expertise in language model output analysis and quality assessment. Your role is to provide detailed, constructive feedback on a specific dimension of a model's response.
-Key Responsibilities:
-1. Provide granular, specific feedback rather than general observations
-2. Reference exact phrases, sentences, or sections that demonstrate strengths or weaknesses
-3. Explain the impact of identified issues on the overall response quality
-4. Suggest specific improvements with concrete examples
-5. Maintain a professional, constructive tone throughout
-6. Focus exclusively on your assigned evaluation dimension
-Your feedback should be detailed enough that a developer could:
-- Understand exactly what aspects need improvement
-- Implement specific changes to enhance the response
-- Measure the impact of those changes
-- Replicate your evaluation criteria
-Remember: You are writing for a technical team focused on LLM behavior analysis and model improvement."""
+    Key Responsibilities:
+    1. Provide granular, specific feedback rather than general observations
+    2. Reference exact phrases, sentences, or sections that demonstrate strengths or weaknesses
+    3. Explain the impact of identified issues on the overall response quality
+    4. Suggest specific improvements with concrete examples
+    5. Maintain a professional, constructive tone throughout
+    6. Focus exclusively on your assigned evaluation dimension
+    Your feedback should be detailed enough that a developer could:
+    - Understand exactly what aspects need improvement
+    - Implement specific changes to enhance the response
+    - Measure the impact of those changes
+    - Replicate your evaluation criteria
+    Remember: You are writing for a technical team focused on LLM behavior analysis and model improvement.
+    """
 @lru_cache(maxsize=128)
@@ -125,29 +140,31 @@ def build_judge_prompt(
         )
     evaluation_focus = EVAL_DIMENSIONS[dimension_name]
-    return f"""## Evaluation Dimension: {dimension_name.upper()}
+    return f"""
+    ## Evaluation Dimension: {dimension_name.upper()}
-{evaluation_focus}
+    {evaluation_focus}
-Your task is to provide a detailed, technical analysis of the model response focusing exclusively on the {dimension_name} dimension.
+    Your task is to provide a detailed, technical analysis of the model response focusing exclusively on the {dimension_name} dimension.
-Guidelines:
-1. Be specific and reference exact parts of the response
-2. Explain the reasoning behind your observations
-3. Provide concrete examples of both strengths and weaknesses
-4. Suggest specific improvements where applicable
-5. Maintain a technical, analytical tone
+    Guidelines:
+    1. Be specific and reference exact parts of the response
+    2. Explain the reasoning behind your observations
+    3. Provide concrete examples of both strengths and weaknesses
+    4. Suggest specific improvements where applicable
+    5. Maintain a technical, analytical tone
---- BEGIN USER PROMPT ---
-{user_prompt}
---- END USER PROMPT ---
+    --- BEGIN USER PROMPT ---
+    {user_prompt}
+    --- END USER PROMPT ---
---- BEGIN MODEL RESPONSE ---
-{model_response}
---- END MODEL RESPONSE ---
+    --- BEGIN MODEL RESPONSE ---
+    {model_response}
+    --- END MODEL RESPONSE ---
-### Technical Analysis ({dimension_name.upper()} Dimension):
-Provide a comprehensive analysis that would be valuable for model improvement."""
+    ### Technical Analysis ({dimension_name.upper()} Dimension):
+    Provide a comprehensive analysis that would be valuable for model improvement.
+    """
 @lru_cache(maxsize=128)
@@ -228,12 +245,17 @@ class CouncilAsAJudge:
     def __init__(
         self,
-        id: str = "CouncilAsAJudge",
+        id: str = swarm_id(),
         name: str = "CouncilAsAJudge",
         description: str = "Evaluates the model's response across multiple dimensions",
         model_name: str = "gpt-4o-mini",
-        output_type: str = "string",
+        output_type: str = "all",
         cache_size: int = 128,
+        max_workers: int = None,
+        base_agent: Optional[Agent] = None,
+        random_model_name: bool = True,
+        max_loops: int = 1,
+        aggregation_model_name: str = "gpt-4o-mini",
     ):
         """
         Initialize the CouncilAsAJudge.
@@ -251,10 +273,36 @@ class CouncilAsAJudge:
         self.description = description
         self.model_name = model_name
         self.output_type = output_type
+        self.cache_size = cache_size
+        self.max_workers = max_workers
+        self.base_agent = base_agent
+        self.random_model_name = random_model_name
+        self.max_loops = max_loops
+        self.aggregation_model_name = aggregation_model_name
+        self.reliability_check()
         self.judge_agents = self._create_judges()
         self.aggregator_agent = self._create_aggregator()
         self.conversation = Conversation()
+    def reliability_check(self):
+        logger.info(
+            f"🧠 Running CouncilAsAJudge in parallel mode with {self.max_workers} workers...\n"
+        )
+        if self.model_name is None:
+            raise ValueError("Model name is not set")
+        if self.output_type is None:
+            raise ValueError("Output type is not set")
+        if self.random_model_name:
+            self.model_name = set_random_models_for_agents()
+        self.concurrent_setup()
+    def concurrent_setup(self):
         # Calculate optimal number of workers (75% of available CPU cores)
         total_cores = multiprocessing.cpu_count()
         self.max_workers = max(1, int(total_cores * 0.75))
@@ -263,7 +311,7 @@ class CouncilAsAJudge:
         )
         # Configure caching
-        self._configure_caching(cache_size)
+        self._configure_caching(self.cache_size)
     def _configure_caching(self, cache_size: int) -> None:
         """
@@ -305,11 +353,9 @@ class CouncilAsAJudge:
                 dim: Agent(
                     agent_name=f"{dim}_judge",
                     system_prompt=judge_system_prompt(),
-                    model_name=self.model_name,
+                    model_name="gpt-4o-mini",
                     max_loops=1,
-                    autosave=False,
-                    dashboard=False,
-                    verbose=False,
+                    output_type="final",
                     dynamic_temperature_enabled=True,
                 )
                 for dim in EVAL_DIMENSIONS
@@ -333,12 +379,10 @@ class CouncilAsAJudge:
             return Agent(
                 agent_name="aggregator_agent",
                 system_prompt=aggregator_system_prompt(),
-                model_name=self.model_name,
+                model_name=self.aggregation_model_name,
                 max_loops=1,
-                autosave=False,
-                dashboard=False,
-                verbose=False,
                 dynamic_temperature_enabled=True,
+                output_type="final",
             )
         except Exception as e:
             raise RuntimeError(
@@ -371,7 +415,9 @@ class CouncilAsAJudge:
             prompt = build_judge_prompt(
                 dim, user_prompt, model_response
             )
-            result = agent.run(prompt)
+            result = agent.run(
+                f"{prompt} \n\n Evaluate the following agent {self.base_agent.agent_name} response for the {dim} dimension: {model_response}."
+            )
             self.conversation.add(
                 role=agent.agent_name,
@@ -384,7 +430,9 @@ class CouncilAsAJudge:
                 f"Failed to evaluate dimension {dim}: {str(e)}"
             )
-    def run(self, task: str, model_response: str) -> None:
+    def run(
+        self, task: str, model_response: Optional[str] = None
+    ) -> None:
         """
         Run the evaluation process using ThreadPoolExecutor.
@@ -395,11 +443,18 @@ class CouncilAsAJudge:
         Raises:
             EvaluationError: If evaluation process fails
         """
-        logger.info(
-            f"🧠 Running CouncilAsAJudge in parallel mode with {self.max_workers} workers...\n"
-        )
         try:
+            # Run the base agent
+            if self.base_agent and model_response is None:
+                model_response = self.base_agent.run(task=task)
+            self.conversation.add(
+                role="User",
+                content=task,
+            )
             # Create tasks for all dimensions
             tasks = [
                 (dim, agent, task, model_response)
@@ -450,6 +505,37 @@ class CouncilAsAJudge:
                 content=final_report,
             )
+            # Synthesize feedback and generate improved response
+            feedback_prompt = f"""
+            Based on the comprehensive evaluations from our expert council of judges, please refine your response to the original task.
+            Original Task:
+            {task}
+            Council Feedback:
+            {aggregation_prompt}
+            Please:
+            1. Carefully consider all feedback points
+            2. Address any identified weaknesses
+            3. Maintain or enhance existing strengths
+            4. Provide a refined, improved response that incorporates the council's insights
+            Your refined response:
+            """
+            final_report = self.base_agent.run(task=feedback_prompt)
+            self.conversation.add(
+                role=self.base_agent.agent_name,
+                content=final_report,
+            )
+            return history_output_formatter(
+                conversation=self.conversation,
+                type=self.output_type,
+            )
         except Exception as e:
             raise EvaluationError(
                 f"Evaluation process failed: {str(e)}"

swarms 7.7.9__py3-none-any.whl → 7.8.1__py3-none-any.whl

swarms 7.7.9py3-none-any.whl → 7.8.1py3-none-any.whl