PyPI - tooluniverse - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend - Supply Chain Defender

tooluniverse 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tooluniverse might be problematic. Click here for more details.

Files changed (20) hide show

tooluniverse/compose_scripts/tool_metadata_generator.py ADDED Viewed

@@ -0,0 +1,369 @@
+"""
+Tool Metadata Generation Pipeline
+Generates comprehensive metadata for a list of tools by extracting details from their configuration files
+"""
+def compose(arguments, tooluniverse, call_tool):
+    """
+    Main composition function for Tool Metadata Generation
+    Args:
+        arguments (dict): Input arguments containing a list of tool config JSONs as well as a tool_type_mappings dict for non-API tools (e.g., {'Databases': ['XMLTool']})
+        tooluniverse: ToolUniverse instance
+        call_tool: Function to call other tools
+    Returns:
+        list: List of tool metadata dictionaries (JSON-compatible)
+    """
+    import json
+    import warnings
+    from collections import Counter
+    def _parse_agent_output(output, tool_name="Unknown Tool"):
+        """Helper to parse varied agent outputs (JSON string, wrapped dict) into a dict."""
+        if isinstance(output, str):
+            try:
+                return json.loads(output)
+            except json.JSONDecodeError:
+                print(f"Failed to parse JSON string from {tool_name}; received: {output[:200]}")
+                return {} # Return empty dict on failure to prevent crash
+        if isinstance(output, dict) and 'success' in output and 'result' in output:
+            # Handle wrapped output like {'success': True, 'result': '{...}'}
+            inner_result = output.get('result')
+            if isinstance(inner_result, str) and inner_result.strip():
+                try:
+                    return json.loads(inner_result)
+                except json.JSONDecodeError:
+                    print(f"Failed to parse inner result JSON from {tool_name}; using empty metadata.")
+                    return {}
+            elif isinstance(inner_result, dict):
+                return inner_result # Result is already a dict
+            else:
+                return {} # No valid inner result
+        return {}
+    DEFAULT_TOOL_TYPE_MAPPINGS = {
+        "Embedding Store": ["EmbeddingDatabase"],
+        "Database": ["XMLTool", "DatasetTool"],
+        "Scientific Software Package": ["PackageTool"],
+        "AI Agent": ["AgenticTool"],
+        "ML Model": ["ADMETAITool","AlphaFoldRESTTool","boltz2_docking","compute_depmap24q2_gene_correlations","run_compass_prediction","run_pinnacle_ppi_retrieval","run_transcriptformer_embedding_retrieval","get_abstract_from_patent_app_number","get_claims_from_patent_app_number","get_full_text_from_patent_app_number"],
+        "Human Expert Feedback": ["mcp_auto_loader_human_expert","consult_human_expert","get_expert_response","get_expert_status","list_pending_expert_requests","submit_expert_response"],
+        "MCP": ["MCPAutoLoaderTool","MCPClientTool","MCPProxyTool"],
+        "Compositional Tool": ["ComposeTool"],
+        "Tool Finder Tool": ["ToolFinderEmbedding","ToolFinderLLM","ToolFinderKeyword"],
+        "Special Tool": ["Finish","CallAgent"]
+    }
+    # Step 0: Parse inputs and set up variables
+    tool_configs = arguments.get('tool_configs', [])
+    tool_type_mappings = arguments.get('tool_type_mappings', {})
+    add_existing_tooluniverse_labels = arguments.get('add_existing_tooluniverse_labels', True)
+    max_new_tooluniverse_labels = arguments.get('max_new_tooluniverse_labels', 0)
+    # Merge tool type mappings with defaults, prioritizing user-provided mappings
+    for key, value in DEFAULT_TOOL_TYPE_MAPPINGS.items():
+        if key not in tool_type_mappings:
+            tool_type_mappings[key] = value
+    warnings.warn("Warning: Augmenting your provided tool_type_mappings with default tool_type_mappings to ensure compatibility with existing ToolUniverse tools. The default tool_type_mappings are:\n" + json.dumps(DEFAULT_TOOL_TYPE_MAPPINGS, indent=4))
+    # Add existing ToolUniverse labels if specified
+    tool_labels_set = set()
+    if add_existing_tooluniverse_labels:
+        # Load existing standardized tool metadata (list of dicts each containing a 'tags' field)
+        # Use importlib.resources to avoid absolute paths so this works inside the installed package.
+        try:
+            try:
+                from importlib import resources as importlib_resources  # Py3.9+
+            except ImportError:  # pragma: no cover
+                import importlib_resources  # type: ignore
+            # Access the JSON file inside the package (tooluniverse/website_data/v3_standardized_tags.json)
+            json_path = importlib_resources.files('tooluniverse.website_data').joinpath('v3_standardized_tags.json')
+            with json_path.open('r', encoding='utf-8') as f:
+                existing_metadata_list = json.load(f)
+            if isinstance(existing_metadata_list, list):
+                for item in existing_metadata_list:
+                    if isinstance(item, dict):
+                        tags = item.get('tags', [])
+                        if isinstance(tags, list):
+                            for tag in tags:
+                                if isinstance(tag, str) and tag.strip():
+                                    tool_labels_set.add(tag.strip())
+        except Exception as e:  # Fail gracefully; downstream logic will just proceed without enrichment
+            print(f"Failed to load existing ToolUniverse labels: {e}")
+    if not tool_configs:
+        return []
+    # Step 1: Generate detailed metadata for each tool
+    all_tool_metadata = []
+    for tool_config in tool_configs:
+        tool_config_str = json.dumps(tool_config)
+        try:
+            metadata_params = {
+                'tool_config': tool_config_str,
+                'tool_type_mappings': tool_type_mappings
+            }
+            generated_metadata = {}
+            for _ in range(5):  # Retry up to 5 times
+                raw_output = call_tool('ToolMetadataGenerator', metadata_params)
+                generated_metadata = _parse_agent_output(raw_output, 'ToolMetadataGenerator')
+                if generated_metadata:  # If the result is not empty, break
+                    break
+            # Attempt to enrich tags using LabelGenerator if tags are empty or default
+            try:
+                # Prepare inputs for LabelGenerator
+                tool_name = tool_config.get('name') or generated_metadata.get('name') or ''
+                tool_description = tool_config.get('description') or generated_metadata.get('description') or ''
+                # The parameter schema may be nested under parameter->properties
+                param_properties = tool_config.get('parameter', {}).get('properties', {})
+                # Convert parameters to a JSON-like string representation (without importing json to keep dependencies minimal)
+                # Safe string construction
+                def _stringify_params(props):
+                    parts = []
+                    for k, v in props.items():
+                        if isinstance(v, dict):
+                            type_val = v.get('type', 'unknown')
+                            desc_val = v.get('description', '')
+                            parts.append(f"\"{k}\": {{ 'type': '{type_val}', 'description': '{desc_val}' }}")
+                        else:
+                            parts.append(f"\"{k}\": " + repr(v))
+                    return '{' + ', '.join(parts) + '}'
+                tool_parameters_str = _stringify_params(param_properties)
+                category = tool_config.get('category') or tool_config.get('type') or generated_metadata.get('category') or ''
+                label_params = {
+                    'tool_name': tool_name,
+                    'tool_description': tool_description,
+                    'tool_parameters': tool_parameters_str,
+                    'category': category,
+                    'existing_labels': json.dumps(list(tool_labels_set))
+                }
+                label_result = call_tool('LabelGenerator', label_params)
+                label_result = _parse_agent_output(label_result, 'LabelGenerator')
+                # Parse label_result which may be dict or JSON string
+                labels = []
+                if isinstance(label_result, dict):
+                    labels = label_result.get('labels', [])
+                # Replace tags
+                if labels:
+                    generated_metadata['tags'] = labels
+            except Exception as tag_exc:
+                print(f"Label generation failed for tool {tool_config.get('name', 'N/A')}: {tag_exc}")
+            all_tool_metadata.append(generated_metadata)
+        except Exception as e:
+            print(f"Failed to generate metadata for tool {tool_config.get('name', 'N/A')}: {e}")
+            # Optionally, append an error object or skip the tool
+            all_tool_metadata.append({
+                'error': f"Metadata generation failed for {tool_config.get('name', 'N/A')}",
+                'details': str(e)
+            })
+    # Step 2: Validate schema
+    validated_metadata = []
+    schema_template = {
+        "id": "",
+        "name": "",
+        "description": "",
+        "detailed_description": "",
+        "toolType": "api",
+        "tags": [],
+        "category": "",
+        "lab": "",
+        "source": "",
+        "version": "v1.0.0",
+        "reviewed": False,
+        "isValidated": False,
+        "usageStats": "0 uses",
+        "capabilities": [],
+        "limitations": [],
+        "parameters": {},
+        "inputSchema": {},
+        "exampleInput": {},
+        "apiEndpoints": []
+    }
+    for metadata in all_tool_metadata:
+        if 'error' in metadata:
+            validated_metadata.append(metadata)
+            continue
+        validated_item = {}
+        for key, default_value in schema_template.items():
+            value = metadata.get(key, default_value)
+            if not isinstance(value, type(default_value)):
+                # Attempt to gracefully handle simple type mismatches or reset
+                if isinstance(default_value, list) and not isinstance(value, list):
+                    value = []
+                elif isinstance(default_value, dict) and not isinstance(value, dict):
+                    value = {}
+                elif isinstance(default_value, str) and not isinstance(value, str):
+                    value = str(value) if value is not None else ""
+                elif isinstance(default_value, bool) and not isinstance(value, bool):
+                    value = bool(value)
+                else:
+                    value = default_value # Fallback to default if type is complex/unexpected
+            validated_item[key] = value
+        validated_metadata.append(validated_item)
+    all_tool_metadata = validated_metadata
+    # Step 3: Standardize sources and tags using ToolMetadataStandardizer
+    try:
+        source_list = []
+        for tool in all_tool_metadata:
+            if 'error' not in tool and tool.get('source'):
+                source_list.append(tool.get('source'))
+        # Standardize sources
+        if source_list:
+            standardizer_params = {'metadata_list': list(set(source_list))}
+            standardized_sources_map = call_tool('ToolMetadataStandardizer', standardizer_params)
+            standardized_sources_map = _parse_agent_output(standardized_sources_map, 'ToolMetadataStandardizer')
+            print("Standardized sources mapping:", standardized_sources_map)
+            # Create a reverse map for easy lookup
+            source_to_standard_map = {}
+            for standard_name, raw_names in standardized_sources_map.items():
+                for raw_name in raw_names:
+                    source_to_standard_map[raw_name] = standard_name
+            # Update the source in each metadata object
+            for tool_metadata in all_tool_metadata:
+                if 'error' not in tool_metadata:
+                    original_source = tool_metadata.get('source')
+                    if original_source in source_to_standard_map:
+                        tool_metadata['source'] = source_to_standard_map[original_source]
+    except Exception as e:
+        print(f"An error occurred during source standardization: {e}")
+    try:
+        # Step 4: Standardize tags, with an optional second pass to meet label limits
+        all_raw_tags = []
+        for tool in all_tool_metadata:
+            if 'error' not in tool and isinstance(tool.get('tags'), list):
+                all_raw_tags.extend(tool.get('tags', []))
+        # Filter out existing labels before standardization
+        tags_to_standardize = [tag for tag in set(all_raw_tags) if tag not in tool_labels_set]
+        if max_new_tooluniverse_labels <= 0:
+            # If no new labels are allowed, skip standardization and just remove new tags
+            for tool_metadata in all_tool_metadata:
+                if 'error' not in tool_metadata and isinstance(tool_metadata.get('tags'), list):
+                    original_tags = tool_metadata.get('tags', [])
+                    filtered_tags = [tag for tag in original_tags if tag in tool_labels_set]
+                    tool_metadata['tags'] = sorted(list(set(filtered_tags)))
+            return all_tool_metadata  # Return early since no further processing is needed
+        tag_to_standard_map = {}
+        if tags_to_standardize:
+            # Iteratively standardize tags for up to 5 passes to meet the label limit.
+            current_tags_to_standardize = list(set(tags_to_standardize))
+            # This map will store the final standardized version for each original raw tag.
+            tag_to_standard_map = {tag: tag for tag in tags_to_standardize}
+            for i in range(5): # Loop for up to 5 standardization passes
+                num_tags = len(current_tags_to_standardize)
+                # If the number of tags is within the limit, no more standardization is needed.
+                if max_new_tooluniverse_labels > 0 and num_tags <= max_new_tooluniverse_labels:
+                    print(f"Tag count ({num_tags}) is within the limit ({max_new_tooluniverse_labels}). Stopping standardization.")
+                    break
+                print(f"Pass {i+1}: Standardizing {num_tags} tags.")
+                # Set the limit for the standardizer tool.
+                # Use a default high limit if max_new_tooluniverse_labels is not set, otherwise use the specified limit.
+                limit = max_new_tooluniverse_labels if max_new_tooluniverse_labels > 0 else 150
+                standardizer_params = {
+                    'metadata_list': current_tags_to_standardize,
+                    'limit': limit
+                }
+                print(f"Pass {i+1} input tags: ", current_tags_to_standardize)
+                # Call the standardizer tool and parse the output, with retries.
+                pass_output_map = {}
+                for _ in range(5):  # Retry up to 5 times
+                    raw_output = call_tool('ToolMetadataStandardizer', standardizer_params)
+                    pass_output_map = _parse_agent_output(raw_output, 'ToolMetadataStandardizer')
+                    if pass_output_map:  # If the result is not empty, break
+                        break
+                print(f"Pass {i+1} standardized tags mapping:", pass_output_map)
+                # Create a reverse map for the current pass for easy lookup.
+                # Maps a tag from the input list to its new standardized version.
+                pass_reverse_map = {}
+                for standard_tag, raw_tags_in_pass in pass_output_map.items():
+                    for raw_tag in raw_tags_in_pass:
+                        pass_reverse_map[raw_tag] = standard_tag
+                # Update the final mapping by chaining the new standardization.
+                # For each original tag, find its current mapping and see if it was further standardized in this pass.
+                for original_tag, current_standard_tag in tag_to_standard_map.items():
+                    # If the current standard tag was part of this pass's input and got re-mapped, update it.
+                    if current_standard_tag in pass_reverse_map:
+                        tag_to_standard_map[original_tag] = pass_reverse_map[current_standard_tag]
+                # The new set of tags for the next pass are the keys of the current pass's output.
+                current_tags_to_standardize = sorted(list(pass_output_map.keys()))
+                # If the standardizer returns an empty map, it means no further consolidation is possible.
+                if not current_tags_to_standardize:
+                    print("No further tag consolidation possible. Stopping.")
+                    break
+            # Update tags in each metadata object using the final mapping
+            for tool_metadata in all_tool_metadata:
+                if 'error' not in tool_metadata and isinstance(tool_metadata.get('tags'), list):
+                    original_tags = tool_metadata.get('tags', [])
+                    # For each original tag, use its standardized version if available, otherwise keep the original.
+                    # This correctly handles tags that were already in tool_labels_set and thus not standardized.
+                    standardized_tags = {tag_to_standard_map.get(tag, tag) for tag in original_tags}
+                    tool_metadata['tags'] = sorted(list(standardized_tags))
+    except Exception as e:
+        print(f"An error occurred during tag standardization: {e}")
+    # Step 5: Remove tags that occur only once across the entire dataset,
+    # but only for tags that are new (not pre-existing in tooluniverse)
+    try:
+        # Flatten the list of all new tags from all tools, ignoring error entries
+        all_new_tags_flat = [
+            tag
+            for tool_metadata in all_tool_metadata
+            if 'error' not in tool_metadata and isinstance(tool_metadata.get('tags'), list)
+            for tag in tool_metadata.get('tags', [])
+            if tag not in tool_labels_set
+        ]
+        if all_new_tags_flat:
+            # Count the frequency of each new tag
+            new_tag_counts = Counter(all_new_tags_flat)
+            # Identify new tags that appear more than once
+            new_tags_to_keep = {tag for tag, count in new_tag_counts.items() if count > 1}
+            # Filter the tags in each tool's metadata
+            for tool_metadata in all_tool_metadata:
+                if 'error' not in tool_metadata and isinstance(tool_metadata.get('tags'), list):
+                    original_tags = tool_metadata.get('tags', [])
+                    # Keep all pre-existing tags, and only new tags that appear more than once
+                    filtered_tags = [
+                        tag for tag in original_tags
+                        if tag in tool_labels_set or tag in new_tags_to_keep
+                    ]
+                    tool_metadata['tags'] = sorted(list(set(filtered_tags)))
+    except Exception as e:
+        print(f"An error occurred during single-occurrence tag removal: {e}")
+    return all_tool_metadata

tooluniverse/data/agentic_tools.json CHANGED Viewed

@@ -1077,12 +1077,13 @@
         "type": "AgenticTool",
         "name": "LabelGenerator",
         "description": "Generates relevant keyword labels for tools based on their name, description, parameters, and category. Creates a comprehensive list of tags for tool discovery and categorization.",
-        "prompt": "You are an expert in tool categorization and keyword generation. Your task is to generate relevant, descriptive labels/keywords for a tool that will help with discovery and organization.\n\n## TOOL INFORMATION\n- **Tool Name**: {tool_name}\n- **Description**: {tool_description}\n- **Parameters**: {tool_parameters}\n- **Category**: {category}\n\n## LABEL GENERATION INSTRUCTIONS\nAnalyze the tool information and generate a comprehensive list of relevant labels/keywords that:\n\n1. **Capture Tool Functionality**: What the tool does, its purpose\n2. **Describe Input/Output Types**: Data types, formats, domains\n3. **Indicate Use Cases**: When and where this tool would be useful\n4. **Reference Technical Domains**: Scientific fields, technologies, methodologies\n5. **Include Semantic Variations**: Synonyms, related terms, alternative descriptions\n\n## LABELING GUIDELINES\n- Generate 8-15 relevant labels\n- Use lowercase, hyphenated format (e.g., 'protein-analysis', 'molecular-weight')\n- Include both specific and general terms\n- Avoid overly generic terms like 'tool', 'utility', 'helper'\n- Include domain-specific terminology when applicable\n- Consider both technical and user-friendly terms\n\n## EXAMPLES\nFor a protein sequence analyzer:\n- Technical: ['protein-analysis', 'sequence-processing', 'bioinformatics', 'amino-acid-composition']\n- Functional: ['molecular-biology', 'protein-characterization', 'structural-analysis']\n- Use-case: ['research-tool', 'computational-biology', 'biochemistry']\n\nReturn a JSON object with the following structure:\n```json\n{\n  \"labels\": [\"keyword1\", \"keyword2\", \"keyword3\", ...],\n  \"rationale\": \"Brief explanation of label selection strategy\"\n}\n```",
+        "prompt": "You are an expert in tool categorization and keyword generation. Your task is to generate relevant, descriptive labels/keywords for a tool that will help with discovery and organization.\n\n## TOOL INFORMATION\n- **Tool Name**: {tool_name}\n- **Description**: {tool_description}\n- **Parameters**: {tool_parameters}\n- **Category**: {category}\n\n## EXISTING LABELS\n{existing_labels}\n\n## LABEL GENERATION INSTRUCTIONS\nAnalyze the tool information and generate a comprehensive list of relevant labels/keywords that:\n\n1. **Capture Tool Functionality**: What the tool does, its purpose\n2. **Describe Input/Output Types**: Data types, formats, domains\n3. **Indicate Use Cases**: When and where this tool would be useful\n4. **Reference Technical Domains**: Scientific fields, technologies, methodologies\n5. **Include Semantic Variations**: Synonyms, related terms, alternative descriptions\n\n## LABELING GUIDELINES\n- Generate 8-15 relevant labels\n- Use lowercase, hyphenated format (e.g., 'protein-analysis', 'molecular-weight')\n- Reuse existing labels (if provided) when the existing labels are a good fit. It is okay to generate new labels if the existing ones are not suitable, or if there is another label you think is fitting that is not in the existing labels set.\n- Include both specific and general terms\n- Avoid overly generic terms like 'tool', 'utility', 'helper'\n- Include domain-specific terminology when applicable\n- Consider both technical and user-friendly terms\n\n## EXAMPLES\nFor a protein sequence analyzer:\n- Technical: ['protein-analysis', 'sequence-processing', 'bioinformatics', 'amino-acid-composition']\n- Functional: ['molecular-biology', 'protein-characterization', 'structural-analysis']\n- Use-case: ['research-tool', 'computational-biology', 'biochemistry']\n\nReturn a JSON object with the following structure:\n```json\n{\n  \"labels\": [\"keyword1\", \"keyword2\", \"keyword3\", ...],\n  \"rationale\": \"Brief explanation of label selection strategy\"\n}\n```",
         "input_arguments": [
             "tool_name",
             "tool_description",
             "tool_parameters",
-            "category"
+            "category",
+            "existing_labels"
         ],
         "parameter": {
             "type": "object",
@@ -1106,6 +1107,11 @@
                     "type": "string",
                     "description": "The general category or domain the tool belongs to",
                     "required": true
+                },
+                "existing_labels": {
+                    "type": "string",
+                    "description": "JSON array string of existing labels to consider reusing (optional)",
+                    "required": false
                 }
             },
             "required": [
@@ -1124,33 +1130,142 @@
         }
     },
     {
-    "type": "AgenticTool",
-    "name": "call_agentic_human",
-    "description": "Produces a concise, practical answer that emulates how a well-informed human would respond to the question.",
-    "prompt": "You are not an AI. You are a knowledgeable human expert speaking naturally. Read the user's question carefully and respond as a thoughtful person would, giving a clear, direct, and context-aware answer. Do not mention AI, ChatGPT, or models. If the question involves uncertainty or opinion, share a reasoned perspective in a human tone.\n\nQuestion: {question}\n\nAnswer:",
-    "input_arguments": [
-        "question"
-    ],
-    "parameter": {
-        "type": "object",
-        "properties": {
-        "question": {
-            "type": "string",
-            "description": "The user's question to be answered in a human-like manner.",
-            "required": true
+        "type": "AgenticTool",
+        "name": "call_agentic_human",
+        "description": "Produces a concise, practical answer that emulates how a well-informed human would respond to the question.",
+        "prompt": "You are not an AI. You are a knowledgeable human expert speaking naturally. Read the user's question carefully and respond as a thoughtful person would, giving a clear, direct, and context-aware answer. Do not mention AI, ChatGPT, or models. If the question involves uncertainty or opinion, share a reasoned perspective in a human tone.\n\nQuestion: {question}\n\nAnswer:",
+        "input_arguments": [
+            "question"
+        ],
+        "parameter": {
+            "type": "object",
+            "properties": {
+            "question": {
+                "type": "string",
+                "description": "The user's question to be answered in a human-like manner.",
+                "required": true
+            }
+            },
+            "required": [
+            "question"
+            ]
+        },
+        "configs": {
+            "api_type": "CHATGPT",
+            "model_id": "o4-mini-0416",
+            "temperature": 0.7,
+            "max_new_tokens": 1024,
+            "return_json": false
         }
+    },
+    {
+        "type": "AgenticTool",
+        "name": "ToolMetadataGenerator",
+        "description": "Generates a JSON structure with the metadata of a tool in ToolUniverse, given the JSON configuration of the tool.",
+        "prompt": "You are an expert in processing ToolUniverse tool configurations. Your task is to extract and generate key metadata from a given tool's JSON configuration and return it as a new, structured JSON object.\n\n**Input Tool Configuration:**\n```json\n{tool_config}\n```\n\n**Tool Type Mappings (for simplifying toolType):**\n```json\n{tool_type_mappings}\n```\n\n**Instructions:**\nFrom the input configuration, generate a new JSON object with the specified structure. All fields enclosed in '<','>' are placeholders for instructions; you should generate a specific value for the tool based on its configuration. Fields not in brackets should use the default values provided.\n\n**Output JSON Structure:**\n```json\n{\n    \"id\": \"<generate a new uuid>\",\n    \"name\": \"<extract from tool_config.name>\",\n    \"description\": \"<extract and tool_config.description and slightly summarize it if it is too long>\",\n    \"detailed_description\": \"<extract from tool_config.description>\",\n    \"toolType\": \"<if tool_config.type or tool_config.name appears in tool_type_mappings dict in one of the lists (among the dict's values), extract the corresponding key and set it as the simplified toolType. otherwise, set toolType to be 'API' (the default)>\",\n    \"tags\": [],\n    \"category\": \"<extract from tool_config.type>\",\n    \"lab\": \"Zitnik Lab\",\n    \"source\": \"<extract the name of the database, package, model, or write 'Agentic'>\",\n    \"version\": \"v1.0.0\",\n    \"reviewed\": true,\n    \"isValidated\": true,\n    \"usageStats\": \"100+ uses\",\n    \"capabilities\": [\n      \"<list capabilities strictly derivable from tool_config>\"\n    ],\n    \"limitations\": [\n      \"May require refinement\"\n    ],\n    \"parameters\": {<for each parameter key include an object with type and description>},\n    \"inputSchema\": <echo tool_config.parameter exactly>,\n    \"exampleInput\": <JSON object with example values for each parameter>,\n    \"apiEndpoints\": [\n      {\n        \"method\": \"MCP\",\n        \"url\": \"https://tooluniversemcpserver.onrender.com/mcp/\"\n      }\n    ]\n}\n```\n\nReturn ONLY the final JSON object with no extra commentary.",
+        "input_arguments": [
+            "tool_config",
+            "tool_type_mappings"
+        ],
+        "parameter": {
+            "type": "object",
+            "properties": {
+                "tool_config": {
+                    "type": "string",
+                    "description": "JSON string of the tool configuration to extract metadata from",
+                    "required": true
+                },
+                "tool_type_mappings": {
+                    "type": "object",
+                    "description": "A mapping from a simplified toolType to a list of tool_config.type that fall under the toolType (e.g., {'Databases': ['XMLTool']})",
+                    "required": false
+                }
+            },
+            "required": [
+                "tool_config"
+            ]
         },
-        "required": [
-        "question"
-        ]
+        "configs": {
+            "api_type": "CHATGPT",
+            "model_id": "o4-mini-0416",
+            "temperature": 0.7,
+            "max_new_tokens": 8192,
+            "return_json": true,
+            "return_metadata": false
+        }
     },
-    "configs": {
-        "api_type": "CHATGPT",
-        "model_id": "o4-mini-0416",
-        "temperature": 0.7,
-        "max_new_tokens": 1024,
-        "return_json": false
-    }
+    {
+        "type": "AgenticTool",
+        "name": "ToolMetadataStandardizer",
+        "description": "Standardizes and groups semantically equivalent metadata strings (e.g., sources, tags) into canonical forms for consistent downstream usage.",
+        "prompt": "You are an expert in metadata normalization and canonicalization. Given a list of raw metadata strings (sources, tags, categories, etc.), produce a JSON object that maps a SINGLE canonical (standardized) string to the list of ALL raw variants from the input that correspond to that canonical form.\n\nINPUT LIST (raw values):\n{metadata_list}\n\nOPTIONAL LIMIT:\n{limit}\n\nTASK:\nReturn ONLY a JSON object (no markdown, no explanations) of the form:\n{\n  \"canonical_value_1\": [\"variant_a\", \"variant_b\"],\n  \"canonical_value_2\": [\"variant_c\"],\n  ...\n}\n\n**LIMIT CONSTRAINT:**\nIf a `limit` is provided, you MUST group terms more aggressively to ensure the number of canonical keys in the output JSON does not exceed the limit. Every raw string must still be mapped to one of the canonical strings. **However, this aggressive grouping must be balanced. Avoid creating overly broad, uninformative categories (e.g., 'data', 'science', 'metadata'). The canonical labels must still clearly distinguish between different technical capabilities and scientific fields.**\n\n**STANDARDIZATION RULES (apply in order):**\n\n**Part 1: Grammatical & Syntactic Normalization**\n1. Trim whitespace; collapse internal repeated whitespace to a single space.\n2. Case fold (lowercase) for comparison, but canonical output SHOULD use a clean, title or widely-recognized uppercase style for well-known acronyms (retain ALLCAPS for <=5 letter well-known biomedical / data acronyms like NCBI, FDA, NIH, EMA, WHO, API). For general words use lowercase-hyphen style (e.g., \"gene-expression\").\n3. Remove surrounding quotes and trailing punctuation (periods, commas, semicolons).\n4. Replace underscores, spaces, and consecutive separators with a single hyphen (e.g., 'Gene  Expression', 'gene_expression' -> 'gene-expression').\n5. Treat hyphen and space variants as equivalent (protein-analysis == protein analysis).\n6. Singular vs plural: treat plural forms as the same (e.g., \"dataset\" and \"datasets\"). Use singular in canonical unless plural is the widely accepted form (e.g., 'omics').\n7. Common stop punctuation (&, /, :) removed unless they encode a standard acronym combination. For constructs like 'R&D' keep as 'R-and-d'.\n8. Strip leading articles (the, a, an) unless part of proper noun (e.g., 'The Cancer Genome Atlas' -> keep).\n9. Collapse obvious expansions to standard acronyms when unambiguous (\"national center for biotechnology information\" -> NCBI, \"food and drug administration\" -> FDA).\n10. If a term is already a concise, recognized proper noun or database name (e.g., 'DrugBank', 'ChEMBL', 'PubChem'), keep its conventional casing as canonical and group all variants to it.\n\n**Part 2: Semantic & Hierarchical Grouping (MOST IMPORTANT)**\n*After applying grammatical normalization, perform the following semantic groupings to create more general, reusable labels.*\n11. **Generalize Specific Terms:** This is the most critical rule. Collapse specific sub-topics into a broader, more general parent category. The goal is to make labels applicable across multiple tools. For example, group 'bioinformatics-ontology' and 'bioinformatics-library-overview' under the single canonical label 'bioinformatics'. **Crucially, do not over-generalize to the point of losing meaning. A category like 'bioinformatics' is good, but a category like 'science' is too broad and uninformative.**\n12. **Hierarchy Collapse:** If terms represent a clear parent-child relationship (e.g., 'genomics' and 'gene-expression-analysis'), group them under the more general parent term ('genomics').\n13. **Synonym & Function Grouping:** Group clear synonyms or terms describing the same function (e.g., 'visualization', 'plotting', 'charting') under a single canonical term (e.g., 'data-visualization').\n14. **Prioritize Broad Concepts:** When choosing a canonical key for a semantic group, always select the most general and widely understood term. For example, prefer 'protein-analysis' over 'protein-folding-simulation'.\n\n**Part 3: Final Output Formatting**\n15. Always include the original raw variants EXACTLY as they appeared in the input list (before any normalization) inside the variant arrays (deduplicate within each list, preserving original order of first appearance).\n16. The canonical key MUST be a clean, user-presentable string (no surrounding whitespace, no trailing punctuation).\n17. Every input value must appear in exactly one array in the output.\n\n**CANONICAL KEY SELECTION HEURISTICS (when multiple variants map to same group):\n- Prefer the most general, high-level concept (e.g., 'bioinformatics' over 'sequence-alignment').\n- Prefer widely recognized product / database / organization name with correct branding (DrugBank, PubChem, UniProt, Ensembl).\n- Else prefer the shortest unambiguous normalized form.\n- Else use hyphenated lowercase normalized form.\n\nOUTPUT REQUIREMENTS:\n- Pure JSON object.\n- Keys: canonical strings.\n- Values: arrays of raw variants (length >=1).\n- Do NOT return commentary, explanations, or fields other than the mapping.\n\nIf input list is empty, return {}.",
+        "input_arguments": [
+            "metadata_list",
+            "limit"
+        ],
+        "parameter": {
+            "type": "object",
+            "properties": {
+                "metadata_list": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    },
+                    "description": "List of raw metadata strings (e.g., sources, tags) to standardize and group.",
+                    "required": true
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "If provided, the maximum number of canonical strings to return. The LLM will group terms more aggressively to meet this limit, ensuring all raw strings are mapped.",
+                    "required": false
+                }
+            },
+            "required": [
+                "metadata_list"
+            ]
+        },
+        "configs": {
+            "api_type": "CHATGPT",
+            "model_id": "o4-mini-0416",
+            "temperature": 0.7,
+            "max_new_tokens": 13192,
+            "return_json": true
+        }
+    },
+    {
+        "type": "AgenticTool",
+        "name": "ToolRelationshipDetector",
+        "description": "Analyzes a primary tool against a list of other tools to identify meaningful, directional data flow compatibilities for scientific workflows. Returns a list of compatible pairs with direction and rationale.",
+        "prompt": "You are an expert in tool composition and scientific workflow design. Your task is to determine the directional data flow compatibility between a primary tool (Tool A) and each tool in a provided list of other tools. Directional compatibility means that the output of one tool can be meaningfully and frequently used as an input to another tool as part of a logical scientific discovery process.\n\n**Your Task:**\n1.  For each tool in the list of 'Other Tools', independently evaluate its pairwise relationship with Tool A.\n2.  Identify only the pairs where the output of one tool is **frequently and logically** used as input to the other and could feasibly be part of a scientific workflow.\n3.  For each such meaningful pair found, determine the data flow direction (`A->B`, `B->A`, or `both`) and provide a 10-15 word rationale for your choice referencing the inputs and outputs of the tools. \n\n**Primary Tool (Tool A):**\n{tool_a}\n\n**List of Other Tools:**\n{other_tools}\n\n**Output Format:**\nReturn ONLY a valid JSON object containing a single key, `relationships`, which holds a list of JSON objects. Each object in the list represents a meaningful relationship. If no meaningful relationships are found, return an empty list.\n\n```json\n{\n  \"relationships\": [\n    {\n      \"tool_b_name\": \"<Name of the tool from the list>\",\n      \"direction\": \"<'A->B'|'B->A'|'both'>\"\n    \"rationale\": \"<10-15 word rationale>\"\n  ]\n}\n```",
+        "input_arguments": [
+            "tool_a",
+            "other_tools"
+        ],
+        "parameter": {
+            "type": "object",
+            "properties": {
+                "tool_a": {
+                    "type": "string",
+                    "description": "JSON string for the primary tool configuration (Tool A).",
+                    "required": true
+                },
+                "other_tools": {
+                    "type": "string",
+                    "description": "JSON string of a list of other tool configurations to compare against Tool A.",
+                    "required": true
+                }
+            },
+            "required": [
+                "tool_a",
+                "other_tools"
+            ]
+        },
+        "configs": {
+            "api_type": "CHATGPT",
+            "model_id": "o4-mini-0416",
+            "temperature": 0.2,
+            "max_new_tokens": 8192,
+            "return_json": true
+        }
     }
-]
+]