tooluniverse 1.0.11.1__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/alphafold_tool.py +47 -7
- tooluniverse/base_tool.py +9 -1
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/alphafold_tools.json +7 -12
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/uniprot_tools.json +13 -5
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +185 -17
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/python_executor_tool.py +43 -13
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp.py +17 -25
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +14 -6
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/uniprot_tool.py +51 -4
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +65 -24
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
tooluniverse/alphafold_tool.py
CHANGED
|
@@ -11,7 +11,7 @@ ALPHAFOLD_BASE_URL = "https://alphafold.ebi.ac.uk/api"
|
|
|
11
11
|
class AlphaFoldRESTTool(BaseTool):
|
|
12
12
|
"""
|
|
13
13
|
AlphaFold Protein Structure Database API tool.
|
|
14
|
-
Generic wrapper for AlphaFold API endpoints
|
|
14
|
+
Generic wrapper for AlphaFold API endpoints from alphafold_tools.json.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
def __init__(self, tool_config):
|
|
@@ -22,6 +22,7 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
22
22
|
self.endpoint_template: str = fields["endpoint"]
|
|
23
23
|
self.required: List[str] = parameter.get("required", [])
|
|
24
24
|
self.output_format: str = fields.get("return_format", "JSON")
|
|
25
|
+
self.auto_query_params: Dict[str, Any] = fields.get("auto_query_params", {})
|
|
25
26
|
|
|
26
27
|
def _build_url(self, arguments: Dict[str, Any]) -> str | Dict[str, Any]:
|
|
27
28
|
# Example: endpoint_template = "/annotations/{qualifier}.json"
|
|
@@ -40,14 +41,18 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
40
41
|
# Now url_path = "/annotations/P69905.json"
|
|
41
42
|
|
|
42
43
|
# Treat all remaining args as query parameters
|
|
43
|
-
# "type" wasn
|
|
44
|
+
# "type" wasn't a placeholder, so it becomes a query param
|
|
44
45
|
query_args = {k: v for k, v in arguments.items() if k not in used}
|
|
46
|
+
|
|
47
|
+
# Add auto_query_params from config (e.g., type=MUTAGEN)
|
|
48
|
+
query_args.update(self.auto_query_params)
|
|
49
|
+
|
|
45
50
|
if query_args:
|
|
46
51
|
from urllib.parse import urlencode
|
|
47
52
|
|
|
48
53
|
url_path += "?" + urlencode(query_args)
|
|
49
54
|
|
|
50
|
-
# Final
|
|
55
|
+
# Final example: annotations/P69905.json?type=MUTAGEN
|
|
51
56
|
return ALPHAFOLD_BASE_URL + url_path
|
|
52
57
|
|
|
53
58
|
def _make_request(self, url: str) -> Dict[str, Any]:
|
|
@@ -62,9 +67,37 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
62
67
|
},
|
|
63
68
|
)
|
|
64
69
|
except Exception as e:
|
|
65
|
-
return {
|
|
70
|
+
return {
|
|
71
|
+
"error": "Request to AlphaFold API failed",
|
|
72
|
+
"detail": str(e),
|
|
73
|
+
}
|
|
66
74
|
|
|
67
75
|
if resp.status_code == 404:
|
|
76
|
+
# Try to provide more context about 404 errors
|
|
77
|
+
# Check if protein exists in AlphaFold DB
|
|
78
|
+
try:
|
|
79
|
+
qualifier_match = re.search(r"/annotations/([^/]+)\.json", url)
|
|
80
|
+
if qualifier_match:
|
|
81
|
+
accession = qualifier_match.group(1)
|
|
82
|
+
base = ALPHAFOLD_BASE_URL
|
|
83
|
+
check_url = f"{base}/uniprot/summary/{accession}.json"
|
|
84
|
+
check_resp = requests.get(check_url, timeout=10)
|
|
85
|
+
if check_resp.status_code == 200:
|
|
86
|
+
return {
|
|
87
|
+
"error": "No MUTAGEN annotations available",
|
|
88
|
+
"reason": (
|
|
89
|
+
"Protein exists in AlphaFold DB but "
|
|
90
|
+
"has no MUTAGEN annotations"
|
|
91
|
+
),
|
|
92
|
+
"endpoint": url,
|
|
93
|
+
}
|
|
94
|
+
else:
|
|
95
|
+
return {
|
|
96
|
+
"error": "Protein not found in AlphaFold DB",
|
|
97
|
+
"endpoint": url,
|
|
98
|
+
}
|
|
99
|
+
except Exception:
|
|
100
|
+
pass # Fall through to generic error
|
|
68
101
|
return {"error": "Not found", "endpoint": url}
|
|
69
102
|
if resp.status_code != 200:
|
|
70
103
|
return {
|
|
@@ -98,9 +131,13 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
98
131
|
if self.output_format.upper() == "JSON":
|
|
99
132
|
try:
|
|
100
133
|
data = resp.json()
|
|
101
|
-
if not data:
|
|
134
|
+
if not data or (isinstance(data, dict) and not data):
|
|
102
135
|
return {
|
|
103
|
-
"error": "
|
|
136
|
+
"error": "No MUTAGEN annotations available",
|
|
137
|
+
"reason": (
|
|
138
|
+
"Protein exists in AlphaFold DB but "
|
|
139
|
+
"has no MUTAGEN annotations from UniProt"
|
|
140
|
+
),
|
|
104
141
|
"endpoint": url,
|
|
105
142
|
"query": arguments,
|
|
106
143
|
}
|
|
@@ -124,4 +161,7 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
124
161
|
}
|
|
125
162
|
|
|
126
163
|
# Fallback for non-JSON output
|
|
127
|
-
return {
|
|
164
|
+
return {
|
|
165
|
+
"data": resp.text,
|
|
166
|
+
"metadata": {"endpoint": url, "query": arguments},
|
|
167
|
+
}
|
tooluniverse/base_tool.py
CHANGED
|
@@ -183,7 +183,15 @@ class BaseTool:
|
|
|
183
183
|
try:
|
|
184
184
|
import jsonschema
|
|
185
185
|
|
|
186
|
-
|
|
186
|
+
# Filter out internal control parameters before validation
|
|
187
|
+
# Only filter known internal parameters, not all underscore-prefixed params
|
|
188
|
+
# to allow optional streaming parameter _tooluniverse_stream
|
|
189
|
+
internal_params = {"ctx", "_tooluniverse_stream"}
|
|
190
|
+
filtered_arguments = {
|
|
191
|
+
k: v for k, v in arguments.items() if k not in internal_params
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
jsonschema.validate(filtered_arguments, schema)
|
|
187
195
|
return None
|
|
188
196
|
except jsonschema.ValidationError as e:
|
|
189
197
|
return ToolValidationError(
|
tooluniverse/build_optimizer.py
CHANGED
|
@@ -6,15 +6,52 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Dict, Any, Set, Tuple
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def
|
|
10
|
-
"""
|
|
9
|
+
def _normalize_value(value: Any) -> Any:
|
|
10
|
+
"""Recursively normalize values for consistent hashing."""
|
|
11
|
+
if isinstance(value, dict):
|
|
12
|
+
# Sort dictionary keys and normalize values
|
|
13
|
+
return {k: _normalize_value(v) for k, v in sorted(value.items())}
|
|
14
|
+
elif isinstance(value, list):
|
|
15
|
+
# Normalize list elements
|
|
16
|
+
return [_normalize_value(item) for item in value]
|
|
17
|
+
elif isinstance(value, (str, int, float, bool)) or value is None:
|
|
18
|
+
return value
|
|
19
|
+
else:
|
|
20
|
+
# Convert other types to string representation for hashing
|
|
21
|
+
return str(value)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def calculate_tool_hash(tool_config: Dict[str, Any], verbose: bool = False) -> str:
|
|
25
|
+
"""Calculate a hash for tool configuration to detect changes.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
tool_config: Tool configuration dictionary
|
|
29
|
+
verbose: If True, print excluded fields (for debugging)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
MD5 hash string of the normalized configuration
|
|
33
|
+
"""
|
|
34
|
+
# Fields to exclude from hash calculation (metadata/timestamp fields)
|
|
35
|
+
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
|
|
36
|
+
|
|
11
37
|
# Create a normalized version of the config for hashing
|
|
12
38
|
normalized_config = {}
|
|
13
|
-
|
|
14
|
-
if key not in ["timestamp", "last_updated", "created_at"]:
|
|
15
|
-
normalized_config[key] = value
|
|
39
|
+
excluded_values = []
|
|
16
40
|
|
|
17
|
-
|
|
41
|
+
for key, value in sorted(tool_config.items()):
|
|
42
|
+
if key not in excluded_fields:
|
|
43
|
+
# Recursively normalize nested structures
|
|
44
|
+
normalized_config[key] = _normalize_value(value)
|
|
45
|
+
elif verbose:
|
|
46
|
+
excluded_values.append(key)
|
|
47
|
+
|
|
48
|
+
if verbose and excluded_values:
|
|
49
|
+
print(f" Excluded fields from hash: {', '.join(excluded_values)}")
|
|
50
|
+
|
|
51
|
+
# Use consistent JSON serialization with sorted keys
|
|
52
|
+
config_str = json.dumps(
|
|
53
|
+
normalized_config, sort_keys=True, separators=(",", ":"), ensure_ascii=False
|
|
54
|
+
)
|
|
18
55
|
return hashlib.md5(config_str.encode("utf-8")).hexdigest()
|
|
19
56
|
|
|
20
57
|
|
|
@@ -59,29 +96,85 @@ def cleanup_orphaned_files(tools_dir: Path, current_tool_names: Set[str]) -> int
|
|
|
59
96
|
return cleaned_count
|
|
60
97
|
|
|
61
98
|
|
|
99
|
+
def _compare_configs(old_config: Dict[str, Any], new_config: Dict[str, Any]) -> list:
|
|
100
|
+
"""Compare two configs and return list of changed field paths."""
|
|
101
|
+
changes = []
|
|
102
|
+
|
|
103
|
+
all_keys = set(old_config.keys()) | set(new_config.keys())
|
|
104
|
+
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
|
|
105
|
+
|
|
106
|
+
for key in all_keys:
|
|
107
|
+
if key in excluded_fields:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
old_val = old_config.get(key)
|
|
111
|
+
new_val = new_config.get(key)
|
|
112
|
+
|
|
113
|
+
if old_val != new_val:
|
|
114
|
+
changes.append(key)
|
|
115
|
+
|
|
116
|
+
return changes
|
|
117
|
+
|
|
118
|
+
|
|
62
119
|
def get_changed_tools(
|
|
63
|
-
current_tools: Dict[str, Any],
|
|
64
|
-
|
|
65
|
-
|
|
120
|
+
current_tools: Dict[str, Any],
|
|
121
|
+
metadata_file: Path,
|
|
122
|
+
force_regenerate: bool = False,
|
|
123
|
+
verbose: bool = False,
|
|
124
|
+
) -> Tuple[list, list, list, Dict[str, list]]:
|
|
125
|
+
"""Get lists of new, changed, and unchanged tools.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
current_tools: Dictionary of current tool configurations
|
|
129
|
+
metadata_file: Path to metadata file storing previous hashes
|
|
130
|
+
force_regenerate: If True, mark all tools as changed
|
|
131
|
+
verbose: If True, provide detailed change information
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Tuple of (new_tools, changed_tools, unchanged_tools, change_details)
|
|
135
|
+
where change_details maps tool_name -> list of changed field names
|
|
136
|
+
"""
|
|
66
137
|
old_metadata = load_metadata(metadata_file)
|
|
67
138
|
new_metadata = {}
|
|
68
139
|
new_tools = []
|
|
69
140
|
changed_tools = []
|
|
70
141
|
unchanged_tools = []
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
142
|
+
change_details: Dict[str, list] = {}
|
|
143
|
+
|
|
144
|
+
if force_regenerate:
|
|
145
|
+
print("🔄 Force regeneration enabled - all tools will be regenerated")
|
|
146
|
+
for tool_name, tool_config in current_tools.items():
|
|
147
|
+
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
|
|
148
|
+
new_metadata[tool_name] = current_hash
|
|
149
|
+
if tool_name in old_metadata:
|
|
150
|
+
changed_tools.append(tool_name)
|
|
151
|
+
change_details[tool_name] = ["force_regenerate"]
|
|
152
|
+
else:
|
|
153
|
+
new_tools.append(tool_name)
|
|
154
|
+
else:
|
|
155
|
+
for tool_name, tool_config in current_tools.items():
|
|
156
|
+
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
|
|
157
|
+
new_metadata[tool_name] = current_hash
|
|
158
|
+
|
|
159
|
+
old_hash = old_metadata.get(tool_name)
|
|
160
|
+
if old_hash is None:
|
|
161
|
+
new_tools.append(tool_name)
|
|
162
|
+
if verbose:
|
|
163
|
+
print(f" ✨ New tool detected: {tool_name}")
|
|
164
|
+
elif old_hash != current_hash:
|
|
165
|
+
changed_tools.append(tool_name)
|
|
166
|
+
# Try to identify which fields changed (if we have the old config)
|
|
167
|
+
# Note: We only have hashes, so we can't do detailed field comparison
|
|
168
|
+
# This would require storing full configs, which we avoid for size reasons
|
|
169
|
+
change_details[tool_name] = ["hash_mismatch"]
|
|
170
|
+
if verbose:
|
|
171
|
+
print(
|
|
172
|
+
f" 🔄 Tool changed: {tool_name} (hash: {old_hash[:8]}... -> {current_hash[:8]}...)"
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
unchanged_tools.append(tool_name)
|
|
83
176
|
|
|
84
177
|
# Save updated metadata
|
|
85
178
|
save_metadata(new_metadata, metadata_file)
|
|
86
179
|
|
|
87
|
-
return new_tools, changed_tools, unchanged_tools
|
|
180
|
+
return new_tools, changed_tools, unchanged_tools, change_details
|
|
@@ -387,31 +387,26 @@
|
|
|
387
387
|
},
|
|
388
388
|
{
|
|
389
389
|
"name": "alphafold_get_annotations",
|
|
390
|
-
"description": "Retrieve AlphaFold
|
|
390
|
+
"description": "Retrieve AlphaFold MUTAGEN annotations for a given UniProt accession. Returns experimental mutagenesis data mapped onto protein structures from UniProt. The qualifier must be a UniProt ACCESSION (e.g., 'P69905'). Note: Not all proteins have MUTAGEN annotations available in the database.",
|
|
391
391
|
"type": "AlphaFoldRESTTool",
|
|
392
392
|
"parameter": {
|
|
393
393
|
"type": "object",
|
|
394
394
|
"properties": {
|
|
395
395
|
"qualifier": {
|
|
396
396
|
"type": "string",
|
|
397
|
-
"description": "
|
|
398
|
-
},
|
|
399
|
-
"type": {
|
|
400
|
-
"type": "string",
|
|
401
|
-
"description": "Annotation type (currently only 'MUTAGEN' is supported).",
|
|
402
|
-
"enum": [
|
|
403
|
-
"MUTAGEN"
|
|
404
|
-
]
|
|
397
|
+
"description": "UniProt ACCESSION (e.g., 'P69905'). Must be an accession number, not an entry name."
|
|
405
398
|
}
|
|
406
399
|
},
|
|
407
400
|
"required": [
|
|
408
|
-
"qualifier"
|
|
409
|
-
"type"
|
|
401
|
+
"qualifier"
|
|
410
402
|
]
|
|
411
403
|
},
|
|
412
404
|
"fields": {
|
|
413
405
|
"endpoint": "/annotations/{qualifier}.json",
|
|
414
|
-
"return_format": "JSON"
|
|
406
|
+
"return_format": "JSON",
|
|
407
|
+
"auto_query_params": {
|
|
408
|
+
"type": "MUTAGEN"
|
|
409
|
+
}
|
|
415
410
|
},
|
|
416
411
|
"return_schema": {
|
|
417
412
|
"type": "object",
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "ENCODE_search_experiments",
|
|
4
|
+
"type": "ENCODESearchTool",
|
|
5
|
+
"description": "Search ENCODE functional genomics experiments (e.g., ChIP-seq, ATAC-seq) by assay/target/organism/status. Use to discover datasets and access experiment-level metadata.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"assay_title": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Assay name filter (e.g., 'ChIP-seq', 'ATAC-seq')."
|
|
12
|
+
},
|
|
13
|
+
"target": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "Target filter (e.g., 'CTCF')."
|
|
16
|
+
},
|
|
17
|
+
"organism": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Organism filter (e.g., 'Homo sapiens', 'Mus musculus')."
|
|
20
|
+
},
|
|
21
|
+
"status": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"default": "released",
|
|
24
|
+
"description": "Record status filter (default 'released')."
|
|
25
|
+
},
|
|
26
|
+
"limit": {
|
|
27
|
+
"type": "integer",
|
|
28
|
+
"default": 10,
|
|
29
|
+
"minimum": 1,
|
|
30
|
+
"maximum": 100,
|
|
31
|
+
"description": "Max number of results (1–100)."
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"fields": {
|
|
36
|
+
"endpoint": "https://www.encodeproject.org/search/",
|
|
37
|
+
"format": "json"
|
|
38
|
+
},
|
|
39
|
+
"return_schema": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"description": "ENCODE experiments search response",
|
|
42
|
+
"properties": {
|
|
43
|
+
"status": {"type": "string"},
|
|
44
|
+
"data": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"total": {"type": "integer"},
|
|
48
|
+
"@graph": {
|
|
49
|
+
"type": "array",
|
|
50
|
+
"items": {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"properties": {
|
|
53
|
+
"accession": {"type": "string"},
|
|
54
|
+
"assay_title": {"type": "string"},
|
|
55
|
+
"target": {"type": "object"},
|
|
56
|
+
"organism": {"type": "string"},
|
|
57
|
+
"status": {"type": "string"}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"url": {"type": "string"}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"test_examples": [
|
|
67
|
+
{"assay_title": "ChIP-seq", "limit": 1},
|
|
68
|
+
{"assay_title": "ATAC-seq", "limit": 1}
|
|
69
|
+
],
|
|
70
|
+
"label": ["ENCODE", "Experiment", "Search"],
|
|
71
|
+
"metadata": {
|
|
72
|
+
"tags": ["functional-genomics", "chip-seq", "atac-seq"],
|
|
73
|
+
"estimated_execution_time": "< 3 seconds"
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"name": "ENCODE_list_files",
|
|
78
|
+
"type": "ENCODEFilesTool",
|
|
79
|
+
"description": "List ENCODE files with filters (file_format, output_type, assay). Use to programmatically retrieve downloadable artifact metadata (FASTQ, BAM, bigWig, peaks).",
|
|
80
|
+
"parameter": {
|
|
81
|
+
"type": "object",
|
|
82
|
+
"properties": {
|
|
83
|
+
"file_type": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"description": "File type filter (e.g., 'fastq', 'bam', 'bigWig')."
|
|
86
|
+
},
|
|
87
|
+
"assay_title": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "Assay filter (e.g., 'ChIP-seq')."
|
|
90
|
+
},
|
|
91
|
+
"limit": {
|
|
92
|
+
"type": "integer",
|
|
93
|
+
"default": 10,
|
|
94
|
+
"minimum": 1,
|
|
95
|
+
"maximum": 100,
|
|
96
|
+
"description": "Max number of results (1–100)."
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"fields": {
|
|
101
|
+
"endpoint": "https://www.encodeproject.org/search/",
|
|
102
|
+
"format": "json"
|
|
103
|
+
},
|
|
104
|
+
"return_schema": {
|
|
105
|
+
"type": "object",
|
|
106
|
+
"description": "ENCODE files search response",
|
|
107
|
+
"properties": {
|
|
108
|
+
"status": {"type": "string"},
|
|
109
|
+
"data": {
|
|
110
|
+
"type": "object",
|
|
111
|
+
"properties": {
|
|
112
|
+
"total": {"type": "integer"},
|
|
113
|
+
"@graph": {
|
|
114
|
+
"type": "array",
|
|
115
|
+
"items": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"accession": {"type": "string"},
|
|
119
|
+
"file_format": {"type": "string"},
|
|
120
|
+
"output_type": {"type": "string"},
|
|
121
|
+
"file_type": {"type": "string"}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"url": {"type": "string"}
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
"test_examples": [
|
|
131
|
+
{"file_type": "fastq", "limit": 1}
|
|
132
|
+
],
|
|
133
|
+
"label": ["ENCODE", "File", "Search"],
|
|
134
|
+
"metadata": {
|
|
135
|
+
"tags": ["downloads", "artifacts", "metadata"],
|
|
136
|
+
"estimated_execution_time": "< 3 seconds"
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
]
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "GBIF_search_species",
|
|
4
|
+
"type": "GBIFTool",
|
|
5
|
+
"description": "Find taxa by keyword (scientific/common names) in GBIF. Use to resolve organism names to stable taxon keys (rank, lineage) for downstream biodiversity/occurrence queries.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Search string for species/taxa (supports scientific/common names), e.g., 'Homo', 'Atlantic cod'."
|
|
12
|
+
},
|
|
13
|
+
"limit": {
|
|
14
|
+
"type": "integer",
|
|
15
|
+
"default": 10,
|
|
16
|
+
"minimum": 1,
|
|
17
|
+
"maximum": 300,
|
|
18
|
+
"description": "Maximum number of results to return (1–300)."
|
|
19
|
+
},
|
|
20
|
+
"offset": {
|
|
21
|
+
"type": "integer",
|
|
22
|
+
"default": 0,
|
|
23
|
+
"minimum": 0,
|
|
24
|
+
"description": "Result offset for pagination (0-based)."
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"required": ["query"]
|
|
28
|
+
},
|
|
29
|
+
"fields": {
|
|
30
|
+
"endpoint": "https://api.gbif.org/v1/species/search",
|
|
31
|
+
"format": "json"
|
|
32
|
+
},
|
|
33
|
+
"return_schema": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"description": "GBIF species search response",
|
|
36
|
+
"properties": {
|
|
37
|
+
"status": {"type": "string"},
|
|
38
|
+
"data": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"count": {"type": "integer"},
|
|
42
|
+
"results": {
|
|
43
|
+
"type": "array",
|
|
44
|
+
"items": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"key": {"type": "integer", "description": "taxonKey"},
|
|
48
|
+
"scientificName": {"type": "string"},
|
|
49
|
+
"rank": {"type": "string"},
|
|
50
|
+
"kingdom": {"type": "string"},
|
|
51
|
+
"phylum": {"type": "string"},
|
|
52
|
+
"class": {"type": "string"},
|
|
53
|
+
"order": {"type": "string"},
|
|
54
|
+
"family": {"type": "string"},
|
|
55
|
+
"genus": {"type": "string"}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"url": {"type": "string"}
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"test_examples": [
|
|
65
|
+
{"query": "Homo", "limit": 1},
|
|
66
|
+
{"query": "Gadus", "limit": 1}
|
|
67
|
+
],
|
|
68
|
+
"label": ["GBIF", "Taxonomy", "Search"],
|
|
69
|
+
"metadata": {
|
|
70
|
+
"tags": ["biodiversity", "taxonomy", "species", "search"],
|
|
71
|
+
"estimated_execution_time": "< 2 seconds"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"name": "GBIF_search_occurrences",
|
|
76
|
+
"type": "GBIFOccurrenceTool",
|
|
77
|
+
"description": "Retrieve species occurrence records from GBIF with optional filters (taxonKey, country, coordinates). Use for distribution mapping, presence-only modeling, and sampling context.",
|
|
78
|
+
"parameter": {
|
|
79
|
+
"type": "object",
|
|
80
|
+
"properties": {
|
|
81
|
+
"taxonKey": {
|
|
82
|
+
"type": "integer",
|
|
83
|
+
"description": "GBIF taxon key to filter occurrences by a specific taxon (from species search)."
|
|
84
|
+
},
|
|
85
|
+
"country": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"description": "ISO 3166-1 alpha-2 country code filter (e.g., 'US', 'CN')."
|
|
88
|
+
},
|
|
89
|
+
"hasCoordinate": {
|
|
90
|
+
"type": "boolean",
|
|
91
|
+
"default": true,
|
|
92
|
+
"description": "Only return records with valid latitude/longitude coordinates when true."
|
|
93
|
+
},
|
|
94
|
+
"limit": {
|
|
95
|
+
"type": "integer",
|
|
96
|
+
"default": 10,
|
|
97
|
+
"minimum": 1,
|
|
98
|
+
"maximum": 300,
|
|
99
|
+
"description": "Maximum number of results to return (1–300)."
|
|
100
|
+
},
|
|
101
|
+
"offset": {
|
|
102
|
+
"type": "integer",
|
|
103
|
+
"default": 0,
|
|
104
|
+
"minimum": 0,
|
|
105
|
+
"description": "Result offset for pagination (0-based)."
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
"fields": {
|
|
110
|
+
"endpoint": "https://api.gbif.org/v1/occurrence/search",
|
|
111
|
+
"format": "json"
|
|
112
|
+
},
|
|
113
|
+
"return_schema": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"description": "GBIF occurrence search response",
|
|
116
|
+
"properties": {
|
|
117
|
+
"status": {"type": "string"},
|
|
118
|
+
"data": {
|
|
119
|
+
"type": "object",
|
|
120
|
+
"properties": {
|
|
121
|
+
"count": {"type": "integer"},
|
|
122
|
+
"results": {
|
|
123
|
+
"type": "array",
|
|
124
|
+
"items": {
|
|
125
|
+
"type": "object",
|
|
126
|
+
"properties": {
|
|
127
|
+
"key": {"type": "integer"},
|
|
128
|
+
"speciesKey": {"type": "integer"},
|
|
129
|
+
"scientificName": {"type": "string"},
|
|
130
|
+
"decimalLatitude": {"type": "number"},
|
|
131
|
+
"decimalLongitude": {"type": "number"},
|
|
132
|
+
"eventDate": {"type": "string"},
|
|
133
|
+
"countryCode": {"type": "string"}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
"url": {"type": "string"}
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"test_examples": [
|
|
143
|
+
{"hasCoordinate": true, "limit": 1},
|
|
144
|
+
{"country": "US", "limit": 1}
|
|
145
|
+
],
|
|
146
|
+
"label": ["GBIF", "Occurrence", "Geospatial"],
|
|
147
|
+
"metadata": {
|
|
148
|
+
"tags": ["biodiversity", "occurrence", "distribution", "geospatial"],
|
|
149
|
+
"estimated_execution_time": "< 3 seconds"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
]
|