PyPI - tooluniverse - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

tooluniverse 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tooluniverse might be problematic. Click here for more details.

Files changed (20) hide show

tooluniverse/data/alphafold_tools.json CHANGED Viewed

@@ -1,85 +1,227 @@
 [
   {
-    "name": "alphafold_get_prediction_by_uniprot_id",
-    "description": "Retrieve AlphaFold-predicted 3D protein structure metadata and download links for a given UniProt accession ID.",
+    "name": "alphafold_get_prediction",
+    "description": "Retrieve full AlphaFold 3D structure predictions for a given protein. Input must be a UniProt accession (e.g., 'P69905'), UniProt entry name (e.g., 'HBA_HUMAN'), or CRC64 checksum. Returns residue-level metadata including sequence, per-residue confidence scores (pLDDT), and structure download links (PDB, CIF, PAE). If you do not know the accession, first call `uniprot_search` to resolve it from a protein/gene name, or `UniProt_get_entry_by_accession` if you already have the accession and want UniProt details. For a quick overview, use `alphafold_get_summary`. For mutation/variant impact, see `alphafold_get_annotations.",
     "type": "AlphaFoldRESTTool",
     "parameter": {
       "type": "object",
       "properties": {
-        "uniprot_id": {
+        "qualifier": {
           "type": "string",
-          "description": "UniProt accession ID (e.g., 'P69905' for hemoglobin alpha)."
+          "description": "Protein identifier: UniProt accession (e.g., 'P69905'), entry name (e.g., 'HBA_HUMAN'), or CRC64 checksum."
+        },
+        "sequence_checksum": {
+          "type": "string",
+          "description": "Optional CRC64 checksum of the UniProt sequence."
         }
       },
-      "required": ["uniprot_id"]
+      "required": ["qualifier"]
     },
     "fields": {
-      "endpoint": "/prediction/{uniprot_id}",
+      "endpoint": "/prediction/{qualifier}",
       "return_format": "JSON"
     },
     "return_schema": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "toolUsed": { "type": "string" },
+          "providerId": { "type": "string" },
+          "entityType": { "type": "string" },
+          "isUniProt": { "type": "boolean" },
+          "modelEntityId": { "type": "string" },
+          "modelCreatedDate": { "type": "string" },
+          "sequenceVersionDate": { "type": "string" },
+          "globalMetricValue": { "type": "number" },
+          "fractionPlddtVeryLow": { "type": "number" },
+          "fractionPlddtLow": { "type": "number" },
+          "fractionPlddtConfident": { "type": "number" },
+          "fractionPlddtVeryHigh": { "type": "number" },
+          "latestVersion": { "type": "integer" },
+          "allVersions": { "type": "array", "items": { "type": "integer" } },
+          "sequence": { "type": "string" },
+          "sequenceStart": { "type": "integer" },
+          "sequenceEnd": { "type": "integer" },
+          "sequenceChecksum": { "type": "string" },
+          "isUniProtReviewed": { "type": "boolean" },
+          "gene": { "type": "string" },
+          "uniprotAccession": { "type": "string" },
+          "uniprotId": { "type": "string" },
+          "uniprotDescription": { "type": "string" },
+          "taxId": { "type": "integer" },
+          "organismScientificName": { "type": "string" },
+          "isUniProtReferenceProteome": { "type": "boolean" },
+          "organismCommonNames": { "type": "array", "items": { "type": "string" } },
+          "organismSynonyms": { "type": "array", "items": { "type": "string" } },
+          "geneSynonyms": { "type": "array", "items": { "type": "string" } },
+          "proteinFullNames": { "type": "array", "items": { "type": "string" } },
+          "proteinShortNames": { "type": "array", "items": { "type": "string" } },
+          "complexName": { "type": "string" },
+          "stoichiometry": { "type": "number" },
+          "ipTM": { "type": "number" },
+          "ipSAE": { "type": "number" },
+          "keywords": { "type": "array", "items": { "type": "string" } },
+          "taxonomyLineage": { "type": "array", "items": { "type": "string" } },
+          "functions": { "type": "array", "items": { "type": "string" } },
+          "alternativeNames": { "type": "array", "items": { "type": "string" } },
+          "catalyticActivities": { "type": "array", "items": { "type": "string" } },
+          "bcifUrl": { "type": "string" },
+          "cifUrl": { "type": "string" },
+          "pdbUrl": { "type": "string" },
+          "paeImageUrl": { "type": "string" },
+          "paeDocUrl": { "type": "string" },
+          "amAnnotationsUrl": { "type": "string" },
+          "amAnnotationsHg19Url": { "type": "string" },
+          "amAnnotationsHg38Url": { "type": "string" },
+          "entryId": { "type": "string" },
+          "isReviewed": { "type": "boolean" },
+          "isReferenceProteome": { "type": "boolean" },
+          "uniprotStart": { "type": "integer" },
+          "uniprotEnd": { "type": "integer" },
+          "uniprotSequence": { "type": "string" }
+        }
+      }
+    }
+  },
+  {
+    "name": "alphafold_get_summary",
+    "description": "Retrieve summary details of AlphaFold 3D models for a given protein. Input must be a UniProt accession, entry name, or CRC64 checksum. Returns lightweight information such as sequence length, coverage, confidence scores, experimental method, resolution, oligomeric state, and structural entities. If you only know the protein/gene name, first use `uniprot_search` to find the accession. For full residue-level 3D predictions with downloadable coordinates, call `alphafold_get_prediction`. For curated variants, see `UniProt_get_disease_variants_by_accession`; for predicted mutation effects, use `alphafold_get_annotations`.",
+    "type": "AlphaFoldRESTTool",
+    "parameter": {
       "type": "object",
-      "description": "AlphaFold predicted protein structure metadata.",
       "properties": {
-        "uniprotAccession": {
-          "type": "string",
-          "description": "UniProt accession ID"
-        },
-        "uniprotDescription": {
-          "type": "string",
-          "description": "Protein description"
-        },
-        "organismScientificName": {
-          "type": "string",
-          "description": "Scientific name of the source organism"
-        },
-        "gene": {
-          "type": "string",
-          "description": "Gene name if available"
-        },
-        "modelCreatedDate": {
-          "type": "string",
-          "description": "Date this AlphaFold model was created"
-        },
-        "globalMetricValue": {
-          "type": "number",
-          "description": "Global pLDDT confidence score"
-        },
-        "fractionPlddtVeryLow": {
-          "type": "number",
-          "description": "Fraction of residues with very low pLDDT (<50)"
-        },
-        "fractionPlddtLow": {
-          "type": "number",
-          "description": "Fraction of residues with low pLDDT (50–70)"
-        },
-        "fractionPlddtConfident": {
-          "type": "number",
-          "description": "Fraction of residues with confident pLDDT (70–90)"
-        },
-        "fractionPlddtVeryHigh": {
-          "type": "number",
-          "description": "Fraction of residues with very high pLDDT (>90)"
-        },
-        "pdbUrl": {
-          "type": "string",
-          "description": "Download link for PDB file"
-        },
-        "cifUrl": {
-          "type": "string",
-          "description": "Download link for CIF file"
-        },
-        "bcifUrl": {
+        "qualifier": {
           "type": "string",
-          "description": "Download link for BCIF file"
+          "description": "Protein identifier: UniProt accession, entry name, or CRC64 checksum."
+        }
+      },
+      "required": ["qualifier"]
+    },
+    "fields": {
+      "endpoint": "/uniprot/summary/{qualifier}.json",
+      "return_format": "JSON"
+    },
+    "return_schema": {
+      "type": "object",
+      "properties": {
+        "uniprot_entry": {
+          "type": "object",
+          "properties": {
+            "ac": { "type": "string" },
+            "id": { "type": "string" },
+            "uniprot_checksum": { "type": "string" },
+            "sequence_length": { "type": "integer" },
+            "segment_start": { "type": "integer" },
+            "segment_end": { "type": "integer" }
+          }
         },
-        "paeImageUrl": {
+        "structures": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "summary": {
+                "type": "object",
+                "properties": {
+                  "model_identifier": { "type": "string" },
+                  "model_category": { "type": "string" },
+                  "model_url": { "type": "string" },
+                  "model_format": { "type": "string" },
+                  "model_type": { "type": "string" },
+                  "model_page_url": { "type": "string" },
+                  "provider": { "type": "string" },
+                  "number_of_conformers": { "type": "integer" },
+                  "ensemble_sample_url": { "type": "string" },
+                  "ensemble_sample_format": { "type": "string" },
+                  "created": { "type": "string" },
+                  "sequence_identity": { "type": "number" },
+                  "uniprot_start": { "type": "integer" },
+                  "uniprot_end": { "type": "integer" },
+                  "coverage": { "type": "number" },
+                  "experimental_method": { "type": "string" },
+                  "resolution": { "type": "number" },
+                  "confidence_type": { "type": "string" },
+                  "confidence_version": { "type": "string" },
+                  "confidence_avg_local_score": { "type": "number" },
+                  "oligomeric_state": { "type": "string" },
+                  "preferred_assembly_id": { "type": "string" },
+                  "entities": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "entity_type": { "type": "string" },
+                        "entity_poly_type": { "type": "string" },
+                        "identifier": { "type": "string" },
+                        "identifier_category": { "type": "string" },
+                        "description": { "type": "string" },
+                        "chain_ids": { "type": "array", "items": { "type": "string" } }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  {
+    "name": "alphafold_get_annotations",
+    "description": "Retrieve AlphaFold variant annotations (e.g., missense mutations) for a given UniProt accession (e.g., 'P69905'). Input must be a UniProt accession, entry name, or CRC64 checksum, along with an annotation type (currently only 'MUTAGEN'). Use this tool to explore predicted pathogenicity or functional effects of substitutions. If you only have a protein/gene name, resolve it with `uniprot_search`. For experimentally curated variants, use `UniProt_get_disease_variants_by_accession`. To view the full 3D structure, call `alphafold_get_prediction`; for overall model metadata, use `alphafold_get_summary`.",
+    "type": "AlphaFoldRESTTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "qualifier": {
           "type": "string",
-          "description": "Predicted aligned error plot image URL"
+          "description": "Protein identifier: UniProt accession, entry name, or CRC64 checksum."
         },
-        "paeDocUrl": {
+        "type": {
           "type": "string",
-          "description": "Predicted aligned error JSON URL"
+          "description": "Annotation type (currently only 'MUTAGEN' is supported).",
+          "enum": ["MUTAGEN"]
+        }
+      },
+      "required": ["qualifier", "type"]
+    },
+    "fields": {
+      "endpoint": "/annotations/{qualifier}.json",
+      "return_format": "JSON"
+    },
+    "return_schema": {
+      "type": "object",
+      "properties": {
+        "accession": { "type": "string" },
+        "id": { "type": "string" },
+        "sequence": { "type": "string" },
+        "annotation": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "type": { "type": "string" },
+              "description": { "type": "string" },
+              "source_name": { "type": "string" },
+              "source_url": { "type": "string" },
+              "evidence": { "type": "string" },
+              "residues": { "type": "array", "items": { "type": "integer" } },
+              "regions": {
+                "type": "array",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "start": { "type": "integer" },
+                    "end": { "type": "integer" },
+                    "annotation_value": { "type": "string" },
+                    "unit": { "type": "string" }
+                  }
+                }
+              }
+            }
+          }
         }
       }
     }

tooluniverse/data/compose_tools.json CHANGED Viewed

@@ -199,4 +199,67 @@
     "composition_file": "biomarker_discovery.py",
     "composition_function": "compose"
   }
+  ,
+  {
+    "type": "ComposeTool",
+    "name": "ToolMetadataGenerationPipeline",
+    "description": "Generates standardized metadata for a batch of ToolUniverse tool configurations by calling ToolMetadataGenerator, LabelGenerator, and ToolMetadataStandardizer for sources and tags.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "tool_configs": {
+          "type": "array",
+          "description": "List of raw tool configuration JSON objects to extract and standardize metadata for",
+          "items": {"type": "object"}
+        },
+        "tool_type_mappings": {
+          "type": "object",
+          "description": "Mapping of simplified toolType (keys) to lists of tool 'type' values belonging to each simplified category (e.g., {'Databases': ['XMLTool']})",
+          "default": {}
+        },
+        "add_existing_tooluniverse_labels": {
+          "type": "boolean",
+          "description": "Whether to include labels from existing ToolUniverse tools when labeling the metadata configs of the new tools. It is strongly recommended that this is set to true to minimize the number of new labels created and the possibility of redundant labels.",
+          "default": true
+        },
+        "max_new_tooluniverse_labels": {
+          "type": "integer",
+          "description": "The maximum number of new ToolUniverse labels to use in the metadata configs of the new tools. The existing ToolUniverse labels will be used first, and then new labels will be created as needed up to this limit. If the limit is reached, the least relevant new labels will be discarded. Please try to use as few new labels as possible to avoid excessive labels.",
+          "default": 0
+        }
+      },
+      "required": ["tool_configs"]
+    },
+    "auto_load_dependencies": true,
+    "fail_on_missing_tools": false,
+    "required_tools": [
+      "ToolMetadataGenerator",
+      "LabelGenerator",
+      "ToolMetadataStandardizer"
+    ],
+    "composition_file": "tool_metadata_generator.py",
+    "composition_function": "compose"
+  },
+  {
+    "type": "ComposeTool",
+    "name": "ToolGraphGenerationPipeline",
+    "description": "Generates a directed tool relationship graph among provided tool configs using ToolRelationshipDetector to infer data-flow compatibility.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "tool_configs": {"type": "array", "description": "List of tool configuration objects", "items": {"type": "object"}},
+        "max_tools": {"type": "integer", "description": "Optional max number of tools to process (debug)"},
+        "output_path": {"type": "string", "description": "Path for output graph JSON", "default": "./tool_relationship_graph.json"},
+        "save_intermediate_every": {"type": "integer", "description": "Checkpoint every N processed pairs", "default": 5000}
+      },
+      "required": ["tool_configs"]
+    },
+    "auto_load_dependencies": true,
+    "fail_on_missing_tools": false,
+    "required_tools": [
+      "ToolRelationshipDetector"
+    ],
+    "composition_file": "tool_graph_generation.py",
+    "composition_function": "compose"
+  }
 ]

tooluniverse/data/special_tools.json CHANGED Viewed

@@ -1,5 +1,6 @@
 [
   {
+    "type": "SpecialTool",
     "name": "Finish",
     "description": "Indicate the end of multi-step reasoning.",
     "parameter": {
@@ -8,6 +9,7 @@
     }
   },
   {
+    "type": "SpecialTool",
     "name": "CallAgent",
     "description": "Give a solution plan to the agent and let it solve the problem. Solution plan should reflect a distinct method, approach, or viewpoint to solve the given question. Call these function multiple times, and each solution plan should start with different aspects of the question, for example, genes, phenotypes, diseases, or drugs, etc. The CallAgent will achieve the task based on the plan, so only give the plan instead of unverified information.",
     "parameter": {

tooluniverse/test/test_alphafold_tool.py CHANGED Viewed

@@ -1,46 +1,54 @@
 import json
-from typing import Any, Dict, List
 import os
+from typing import Any, Dict, List
 from tooluniverse import ToolUniverse
-# Load expected schema fields dynamically from the JSON definition
+# Load all tool schemas from JSON
 schema_path = os.path.join(
     os.path.dirname(__file__), "..", "data", "alphafold_tools.json"
 )
 with open(schema_path) as f:
-    schema = json.load(f)[0]["return_schema"]["properties"]
+    tools_json = json.load(f)
+schemas = {tool["name"]: tool["return_schema"] for tool in tools_json}
 tooluni = ToolUniverse()
 tooluni.load_tools()
-# Test cases: 3 valid, 1 invalid UniProt ID, and 1 missing parameter
 test_queries: List[Dict[str, Any]] = [
+    # Hemoglobin subunit alpha (valid)
     {
-        "name": "alphafold_get_prediction_by_uniprot_id",
-        "arguments": {"uniprot_id": "Q14596"},
+        "name": "alphafold_get_prediction",
+        "arguments": {"qualifier": "P69905"},
     },
+    # Invalid
     {
-        "name": "alphafold_get_prediction_by_uniprot_id",
-        "arguments": {"uniprot_id": "Q9BUR4"},
+        "name": "alphafold_get_prediction",
+        "arguments": {"qualifier": "XXX123"},
     },
+    # Missing param
     {
-        "name": "alphafold_get_prediction_by_uniprot_id",
-        "arguments": {"uniprot_id": "Q8W3K0"},
+        "name": "alphafold_get_prediction",
+        "arguments": {},
     },
+    # Summary: valid
     {
-        "name": "alphafold_get_prediction_by_uniprot_id",
-        "arguments": {"uniprot_id": "XXX123"},
-    },  # invalid
+        "name": "alphafold_get_summary",
+        "arguments": {"qualifier": "P69905"},
+    },
+    # Annotations (valid + invalid type)
     {
-        "name": "alphafold_get_prediction_by_uniprot_id",
-        "arguments": {},
-    },  # missing param
+        "name": "alphafold_get_annotations",
+        "arguments": {"qualifier": "P69905", "type": "MUTAGEN"},
+    },
+    {
+        "name": "alphafold_get_annotations",
+        "arguments": {"qualifier": "P69905", "type": "INVALID"},
+    },
 ]
 for idx, query in enumerate(test_queries, 1):
-    uid = query["arguments"].get("uniprot_id")
-    label = f"UniProt ID: {uid}" if uid else "No UniProt ID"
-    print(f"\n[{idx}] Running {query['name']} with {label}")
+    print(f"\n[{idx}] Running {query['name']} with {query['arguments']}")
     result = tooluni.run(query)
     # Handle errors
@@ -51,21 +59,50 @@ for idx, query in enumerate(test_queries, 1):
         continue
     # Handle success
-    data = result.get("data", [])
+    data = result.get("data")
     if not data:
         print("No data returned.")
         continue
-    first = data[0]
-    print("SUCCESS")
-    print(f"   {first.get('uniprotDescription')} ({first.get('uniprotAccession')})")
-    print(f"   Organism: {first.get('organismScientificName')}")
-    print(f"   Avg pLDDT: {first.get('globalMetricValue')}")
-    print(f"   Structure (PDB): {first.get('pdbUrl')}")
+    # Schema validation (check only top-level keys)
+    schema = schemas[query["name"]]
+    expected_keys = schema.get("properties", {}).keys()
+    # Handle list vs dict results
+    if isinstance(data, list) and data:
+        record = data[0]
+    elif isinstance(data, dict):
+        record = data
+    else:
+        record = {}
-    # Schema validation
-    missing = [k for k in schema.keys() if k not in first]
+    missing = [k for k in expected_keys if k not in record]
     if missing:
         print(f"   INVALID Missing expected fields: {missing}")
     else:
-        print("   All expected schema fields present")
+        print("   SUCCESS All expected schema fields present")
+    # Show highlights depending on tool
+    if query["name"] == "alphafold_get_prediction":
+        if "uniprotDescription" in record:
+            print(
+                f"   {record.get('uniprotDescription')} ({record.get('uniprotAccession')})"
+            )
+            print(f"   Organism: {record.get('organismScientificName')}")
+            print(f"   Avg pLDDT: {record.get('globalMetricValue')}")
+    elif query["name"] == "alphafold_get_summary":
+        entry = record.get("uniprot_entry", {})
+        structures = record.get("structures", [])
+        print(f"   UniProt AC: {entry.get('ac')}, ID: {entry.get('id')}")
+        print(f"   Sequence length: {entry.get('sequence_length')}")
+        print(f"   Structures returned: {len(structures)}")
+    elif query["name"] == "alphafold_get_annotations":
+        annotations = record.get("annotation", [])
+        print(f"   Accession: {record.get('accession')}")
+        print(f"   Total annotations: {len(annotations)}")
+        if annotations:
+            first_ann = annotations[0]
+            print(f"   First annotation type: {first_ann.get('type')}")
+            print(f"   First annotation description: {first_ann.get('description')}")

tooluniverse 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

Potentially problematic release.

tooluniverse 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl