tooluniverse 1.0.11.2__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +46 -8
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +17 -44
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +58 -17
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
tooluniverse/build_optimizer.py
CHANGED
|
@@ -6,15 +6,52 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Dict, Any, Set, Tuple
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def
|
|
10
|
-
"""
|
|
9
|
+
def _normalize_value(value: Any) -> Any:
|
|
10
|
+
"""Recursively normalize values for consistent hashing."""
|
|
11
|
+
if isinstance(value, dict):
|
|
12
|
+
# Sort dictionary keys and normalize values
|
|
13
|
+
return {k: _normalize_value(v) for k, v in sorted(value.items())}
|
|
14
|
+
elif isinstance(value, list):
|
|
15
|
+
# Normalize list elements
|
|
16
|
+
return [_normalize_value(item) for item in value]
|
|
17
|
+
elif isinstance(value, (str, int, float, bool)) or value is None:
|
|
18
|
+
return value
|
|
19
|
+
else:
|
|
20
|
+
# Convert other types to string representation for hashing
|
|
21
|
+
return str(value)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def calculate_tool_hash(tool_config: Dict[str, Any], verbose: bool = False) -> str:
|
|
25
|
+
"""Calculate a hash for tool configuration to detect changes.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
tool_config: Tool configuration dictionary
|
|
29
|
+
verbose: If True, print excluded fields (for debugging)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
MD5 hash string of the normalized configuration
|
|
33
|
+
"""
|
|
34
|
+
# Fields to exclude from hash calculation (metadata/timestamp fields)
|
|
35
|
+
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
|
|
36
|
+
|
|
11
37
|
# Create a normalized version of the config for hashing
|
|
12
38
|
normalized_config = {}
|
|
13
|
-
|
|
14
|
-
if key not in ["timestamp", "last_updated", "created_at"]:
|
|
15
|
-
normalized_config[key] = value
|
|
39
|
+
excluded_values = []
|
|
16
40
|
|
|
17
|
-
|
|
41
|
+
for key, value in sorted(tool_config.items()):
|
|
42
|
+
if key not in excluded_fields:
|
|
43
|
+
# Recursively normalize nested structures
|
|
44
|
+
normalized_config[key] = _normalize_value(value)
|
|
45
|
+
elif verbose:
|
|
46
|
+
excluded_values.append(key)
|
|
47
|
+
|
|
48
|
+
if verbose and excluded_values:
|
|
49
|
+
print(f" Excluded fields from hash: {', '.join(excluded_values)}")
|
|
50
|
+
|
|
51
|
+
# Use consistent JSON serialization with sorted keys
|
|
52
|
+
config_str = json.dumps(
|
|
53
|
+
normalized_config, sort_keys=True, separators=(",", ":"), ensure_ascii=False
|
|
54
|
+
)
|
|
18
55
|
return hashlib.md5(config_str.encode("utf-8")).hexdigest()
|
|
19
56
|
|
|
20
57
|
|
|
@@ -59,29 +96,85 @@ def cleanup_orphaned_files(tools_dir: Path, current_tool_names: Set[str]) -> int
|
|
|
59
96
|
return cleaned_count
|
|
60
97
|
|
|
61
98
|
|
|
99
|
+
def _compare_configs(old_config: Dict[str, Any], new_config: Dict[str, Any]) -> list:
|
|
100
|
+
"""Compare two configs and return list of changed field paths."""
|
|
101
|
+
changes = []
|
|
102
|
+
|
|
103
|
+
all_keys = set(old_config.keys()) | set(new_config.keys())
|
|
104
|
+
excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
|
|
105
|
+
|
|
106
|
+
for key in all_keys:
|
|
107
|
+
if key in excluded_fields:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
old_val = old_config.get(key)
|
|
111
|
+
new_val = new_config.get(key)
|
|
112
|
+
|
|
113
|
+
if old_val != new_val:
|
|
114
|
+
changes.append(key)
|
|
115
|
+
|
|
116
|
+
return changes
|
|
117
|
+
|
|
118
|
+
|
|
62
119
|
def get_changed_tools(
|
|
63
|
-
current_tools: Dict[str, Any],
|
|
64
|
-
|
|
65
|
-
|
|
120
|
+
current_tools: Dict[str, Any],
|
|
121
|
+
metadata_file: Path,
|
|
122
|
+
force_regenerate: bool = False,
|
|
123
|
+
verbose: bool = False,
|
|
124
|
+
) -> Tuple[list, list, list, Dict[str, list]]:
|
|
125
|
+
"""Get lists of new, changed, and unchanged tools.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
current_tools: Dictionary of current tool configurations
|
|
129
|
+
metadata_file: Path to metadata file storing previous hashes
|
|
130
|
+
force_regenerate: If True, mark all tools as changed
|
|
131
|
+
verbose: If True, provide detailed change information
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Tuple of (new_tools, changed_tools, unchanged_tools, change_details)
|
|
135
|
+
where change_details maps tool_name -> list of changed field names
|
|
136
|
+
"""
|
|
66
137
|
old_metadata = load_metadata(metadata_file)
|
|
67
138
|
new_metadata = {}
|
|
68
139
|
new_tools = []
|
|
69
140
|
changed_tools = []
|
|
70
141
|
unchanged_tools = []
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
142
|
+
change_details: Dict[str, list] = {}
|
|
143
|
+
|
|
144
|
+
if force_regenerate:
|
|
145
|
+
print("🔄 Force regeneration enabled - all tools will be regenerated")
|
|
146
|
+
for tool_name, tool_config in current_tools.items():
|
|
147
|
+
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
|
|
148
|
+
new_metadata[tool_name] = current_hash
|
|
149
|
+
if tool_name in old_metadata:
|
|
150
|
+
changed_tools.append(tool_name)
|
|
151
|
+
change_details[tool_name] = ["force_regenerate"]
|
|
152
|
+
else:
|
|
153
|
+
new_tools.append(tool_name)
|
|
154
|
+
else:
|
|
155
|
+
for tool_name, tool_config in current_tools.items():
|
|
156
|
+
current_hash = calculate_tool_hash(tool_config, verbose=verbose)
|
|
157
|
+
new_metadata[tool_name] = current_hash
|
|
158
|
+
|
|
159
|
+
old_hash = old_metadata.get(tool_name)
|
|
160
|
+
if old_hash is None:
|
|
161
|
+
new_tools.append(tool_name)
|
|
162
|
+
if verbose:
|
|
163
|
+
print(f" ✨ New tool detected: {tool_name}")
|
|
164
|
+
elif old_hash != current_hash:
|
|
165
|
+
changed_tools.append(tool_name)
|
|
166
|
+
# Try to identify which fields changed (if we have the old config)
|
|
167
|
+
# Note: We only have hashes, so we can't do detailed field comparison
|
|
168
|
+
# This would require storing full configs, which we avoid for size reasons
|
|
169
|
+
change_details[tool_name] = ["hash_mismatch"]
|
|
170
|
+
if verbose:
|
|
171
|
+
print(
|
|
172
|
+
f" 🔄 Tool changed: {tool_name} (hash: {old_hash[:8]}... -> {current_hash[:8]}...)"
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
unchanged_tools.append(tool_name)
|
|
83
176
|
|
|
84
177
|
# Save updated metadata
|
|
85
178
|
save_metadata(new_metadata, metadata_file)
|
|
86
179
|
|
|
87
|
-
return new_tools, changed_tools, unchanged_tools
|
|
180
|
+
return new_tools, changed_tools, unchanged_tools, change_details
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "ENCODE_search_experiments",
|
|
4
|
+
"type": "ENCODESearchTool",
|
|
5
|
+
"description": "Search ENCODE functional genomics experiments (e.g., ChIP-seq, ATAC-seq) by assay/target/organism/status. Use to discover datasets and access experiment-level metadata.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"assay_title": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Assay name filter (e.g., 'ChIP-seq', 'ATAC-seq')."
|
|
12
|
+
},
|
|
13
|
+
"target": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "Target filter (e.g., 'CTCF')."
|
|
16
|
+
},
|
|
17
|
+
"organism": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Organism filter (e.g., 'Homo sapiens', 'Mus musculus')."
|
|
20
|
+
},
|
|
21
|
+
"status": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"default": "released",
|
|
24
|
+
"description": "Record status filter (default 'released')."
|
|
25
|
+
},
|
|
26
|
+
"limit": {
|
|
27
|
+
"type": "integer",
|
|
28
|
+
"default": 10,
|
|
29
|
+
"minimum": 1,
|
|
30
|
+
"maximum": 100,
|
|
31
|
+
"description": "Max number of results (1–100)."
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"fields": {
|
|
36
|
+
"endpoint": "https://www.encodeproject.org/search/",
|
|
37
|
+
"format": "json"
|
|
38
|
+
},
|
|
39
|
+
"return_schema": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"description": "ENCODE experiments search response",
|
|
42
|
+
"properties": {
|
|
43
|
+
"status": {"type": "string"},
|
|
44
|
+
"data": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"total": {"type": "integer"},
|
|
48
|
+
"@graph": {
|
|
49
|
+
"type": "array",
|
|
50
|
+
"items": {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"properties": {
|
|
53
|
+
"accession": {"type": "string"},
|
|
54
|
+
"assay_title": {"type": "string"},
|
|
55
|
+
"target": {"type": "object"},
|
|
56
|
+
"organism": {"type": "string"},
|
|
57
|
+
"status": {"type": "string"}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"url": {"type": "string"}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"test_examples": [
|
|
67
|
+
{"assay_title": "ChIP-seq", "limit": 1},
|
|
68
|
+
{"assay_title": "ATAC-seq", "limit": 1}
|
|
69
|
+
],
|
|
70
|
+
"label": ["ENCODE", "Experiment", "Search"],
|
|
71
|
+
"metadata": {
|
|
72
|
+
"tags": ["functional-genomics", "chip-seq", "atac-seq"],
|
|
73
|
+
"estimated_execution_time": "< 3 seconds"
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"name": "ENCODE_list_files",
|
|
78
|
+
"type": "ENCODEFilesTool",
|
|
79
|
+
"description": "List ENCODE files with filters (file_format, output_type, assay). Use to programmatically retrieve downloadable artifact metadata (FASTQ, BAM, bigWig, peaks).",
|
|
80
|
+
"parameter": {
|
|
81
|
+
"type": "object",
|
|
82
|
+
"properties": {
|
|
83
|
+
"file_type": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"description": "File type filter (e.g., 'fastq', 'bam', 'bigWig')."
|
|
86
|
+
},
|
|
87
|
+
"assay_title": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "Assay filter (e.g., 'ChIP-seq')."
|
|
90
|
+
},
|
|
91
|
+
"limit": {
|
|
92
|
+
"type": "integer",
|
|
93
|
+
"default": 10,
|
|
94
|
+
"minimum": 1,
|
|
95
|
+
"maximum": 100,
|
|
96
|
+
"description": "Max number of results (1–100)."
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"fields": {
|
|
101
|
+
"endpoint": "https://www.encodeproject.org/search/",
|
|
102
|
+
"format": "json"
|
|
103
|
+
},
|
|
104
|
+
"return_schema": {
|
|
105
|
+
"type": "object",
|
|
106
|
+
"description": "ENCODE files search response",
|
|
107
|
+
"properties": {
|
|
108
|
+
"status": {"type": "string"},
|
|
109
|
+
"data": {
|
|
110
|
+
"type": "object",
|
|
111
|
+
"properties": {
|
|
112
|
+
"total": {"type": "integer"},
|
|
113
|
+
"@graph": {
|
|
114
|
+
"type": "array",
|
|
115
|
+
"items": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"accession": {"type": "string"},
|
|
119
|
+
"file_format": {"type": "string"},
|
|
120
|
+
"output_type": {"type": "string"},
|
|
121
|
+
"file_type": {"type": "string"}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"url": {"type": "string"}
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
"test_examples": [
|
|
131
|
+
{"file_type": "fastq", "limit": 1}
|
|
132
|
+
],
|
|
133
|
+
"label": ["ENCODE", "File", "Search"],
|
|
134
|
+
"metadata": {
|
|
135
|
+
"tags": ["downloads", "artifacts", "metadata"],
|
|
136
|
+
"estimated_execution_time": "< 3 seconds"
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
]
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "GBIF_search_species",
|
|
4
|
+
"type": "GBIFTool",
|
|
5
|
+
"description": "Find taxa by keyword (scientific/common names) in GBIF. Use to resolve organism names to stable taxon keys (rank, lineage) for downstream biodiversity/occurrence queries.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Search string for species/taxa (supports scientific/common names), e.g., 'Homo', 'Atlantic cod'."
|
|
12
|
+
},
|
|
13
|
+
"limit": {
|
|
14
|
+
"type": "integer",
|
|
15
|
+
"default": 10,
|
|
16
|
+
"minimum": 1,
|
|
17
|
+
"maximum": 300,
|
|
18
|
+
"description": "Maximum number of results to return (1–300)."
|
|
19
|
+
},
|
|
20
|
+
"offset": {
|
|
21
|
+
"type": "integer",
|
|
22
|
+
"default": 0,
|
|
23
|
+
"minimum": 0,
|
|
24
|
+
"description": "Result offset for pagination (0-based)."
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"required": ["query"]
|
|
28
|
+
},
|
|
29
|
+
"fields": {
|
|
30
|
+
"endpoint": "https://api.gbif.org/v1/species/search",
|
|
31
|
+
"format": "json"
|
|
32
|
+
},
|
|
33
|
+
"return_schema": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"description": "GBIF species search response",
|
|
36
|
+
"properties": {
|
|
37
|
+
"status": {"type": "string"},
|
|
38
|
+
"data": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"count": {"type": "integer"},
|
|
42
|
+
"results": {
|
|
43
|
+
"type": "array",
|
|
44
|
+
"items": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"key": {"type": "integer", "description": "taxonKey"},
|
|
48
|
+
"scientificName": {"type": "string"},
|
|
49
|
+
"rank": {"type": "string"},
|
|
50
|
+
"kingdom": {"type": "string"},
|
|
51
|
+
"phylum": {"type": "string"},
|
|
52
|
+
"class": {"type": "string"},
|
|
53
|
+
"order": {"type": "string"},
|
|
54
|
+
"family": {"type": "string"},
|
|
55
|
+
"genus": {"type": "string"}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"url": {"type": "string"}
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"test_examples": [
|
|
65
|
+
{"query": "Homo", "limit": 1},
|
|
66
|
+
{"query": "Gadus", "limit": 1}
|
|
67
|
+
],
|
|
68
|
+
"label": ["GBIF", "Taxonomy", "Search"],
|
|
69
|
+
"metadata": {
|
|
70
|
+
"tags": ["biodiversity", "taxonomy", "species", "search"],
|
|
71
|
+
"estimated_execution_time": "< 2 seconds"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"name": "GBIF_search_occurrences",
|
|
76
|
+
"type": "GBIFOccurrenceTool",
|
|
77
|
+
"description": "Retrieve species occurrence records from GBIF with optional filters (taxonKey, country, coordinates). Use for distribution mapping, presence-only modeling, and sampling context.",
|
|
78
|
+
"parameter": {
|
|
79
|
+
"type": "object",
|
|
80
|
+
"properties": {
|
|
81
|
+
"taxonKey": {
|
|
82
|
+
"type": "integer",
|
|
83
|
+
"description": "GBIF taxon key to filter occurrences by a specific taxon (from species search)."
|
|
84
|
+
},
|
|
85
|
+
"country": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"description": "ISO 3166-1 alpha-2 country code filter (e.g., 'US', 'CN')."
|
|
88
|
+
},
|
|
89
|
+
"hasCoordinate": {
|
|
90
|
+
"type": "boolean",
|
|
91
|
+
"default": true,
|
|
92
|
+
"description": "Only return records with valid latitude/longitude coordinates when true."
|
|
93
|
+
},
|
|
94
|
+
"limit": {
|
|
95
|
+
"type": "integer",
|
|
96
|
+
"default": 10,
|
|
97
|
+
"minimum": 1,
|
|
98
|
+
"maximum": 300,
|
|
99
|
+
"description": "Maximum number of results to return (1–300)."
|
|
100
|
+
},
|
|
101
|
+
"offset": {
|
|
102
|
+
"type": "integer",
|
|
103
|
+
"default": 0,
|
|
104
|
+
"minimum": 0,
|
|
105
|
+
"description": "Result offset for pagination (0-based)."
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
"fields": {
|
|
110
|
+
"endpoint": "https://api.gbif.org/v1/occurrence/search",
|
|
111
|
+
"format": "json"
|
|
112
|
+
},
|
|
113
|
+
"return_schema": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"description": "GBIF occurrence search response",
|
|
116
|
+
"properties": {
|
|
117
|
+
"status": {"type": "string"},
|
|
118
|
+
"data": {
|
|
119
|
+
"type": "object",
|
|
120
|
+
"properties": {
|
|
121
|
+
"count": {"type": "integer"},
|
|
122
|
+
"results": {
|
|
123
|
+
"type": "array",
|
|
124
|
+
"items": {
|
|
125
|
+
"type": "object",
|
|
126
|
+
"properties": {
|
|
127
|
+
"key": {"type": "integer"},
|
|
128
|
+
"speciesKey": {"type": "integer"},
|
|
129
|
+
"scientificName": {"type": "string"},
|
|
130
|
+
"decimalLatitude": {"type": "number"},
|
|
131
|
+
"decimalLongitude": {"type": "number"},
|
|
132
|
+
"eventDate": {"type": "string"},
|
|
133
|
+
"countryCode": {"type": "string"}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
"url": {"type": "string"}
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"test_examples": [
|
|
143
|
+
{"hasCoordinate": true, "limit": 1},
|
|
144
|
+
{"country": "US", "limit": 1}
|
|
145
|
+
],
|
|
146
|
+
"label": ["GBIF", "Occurrence", "Geospatial"],
|
|
147
|
+
"metadata": {
|
|
148
|
+
"tags": ["biodiversity", "occurrence", "distribution", "geospatial"],
|
|
149
|
+
"estimated_execution_time": "< 3 seconds"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "GDC_search_cases",
|
|
4
|
+
"type": "GDCCasesTool",
|
|
5
|
+
"description": "Search cancer cohort cases in NCI GDC by project and filters. Use to retrieve case-level metadata for cohort construction and downstream file queries.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"project_id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "GDC project identifier (e.g., 'TCGA-BRCA')."
|
|
12
|
+
},
|
|
13
|
+
"size": {
|
|
14
|
+
"type": "integer",
|
|
15
|
+
"default": 10,
|
|
16
|
+
"minimum": 1,
|
|
17
|
+
"maximum": 100,
|
|
18
|
+
"description": "Number of results (1–100)."
|
|
19
|
+
},
|
|
20
|
+
"offset": {
|
|
21
|
+
"type": "integer",
|
|
22
|
+
"default": 0,
|
|
23
|
+
"minimum": 0,
|
|
24
|
+
"description": "Offset for pagination (0-based)."
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"fields": {
|
|
29
|
+
"endpoint": "https://api.gdc.cancer.gov/cases",
|
|
30
|
+
"format": "json"
|
|
31
|
+
},
|
|
32
|
+
"return_schema": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "GDC cases response",
|
|
35
|
+
"properties": {
|
|
36
|
+
"status": {"type": "string"},
|
|
37
|
+
"data": {
|
|
38
|
+
"type": "object",
|
|
39
|
+
"properties": {
|
|
40
|
+
"hits": {
|
|
41
|
+
"type": "array",
|
|
42
|
+
"items": {"type": "object"}
|
|
43
|
+
},
|
|
44
|
+
"pagination": {"type": "object"}
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"url": {"type": "string"}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"test_examples": [
|
|
51
|
+
{"project_id": "TCGA-BRCA", "size": 1}
|
|
52
|
+
],
|
|
53
|
+
"label": ["GDC", "Cases", "Oncogenomics"],
|
|
54
|
+
"metadata": {
|
|
55
|
+
"tags": ["oncogenomics", "cohort", "cases"],
|
|
56
|
+
"estimated_execution_time": "< 3 seconds"
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"name": "GDC_list_files",
|
|
61
|
+
"type": "GDCFilesTool",
|
|
62
|
+
"description": "List GDC files filtered by data_type and other fields. Use to identify downloadable artifacts (e.g., expression quantification) for analysis pipelines.",
|
|
63
|
+
"parameter": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {
|
|
66
|
+
"data_type": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"description": "Data type filter (e.g., 'Gene Expression Quantification')."
|
|
69
|
+
},
|
|
70
|
+
"size": {
|
|
71
|
+
"type": "integer",
|
|
72
|
+
"default": 10,
|
|
73
|
+
"minimum": 1,
|
|
74
|
+
"maximum": 100,
|
|
75
|
+
"description": "Number of results (1–100)."
|
|
76
|
+
},
|
|
77
|
+
"offset": {
|
|
78
|
+
"type": "integer",
|
|
79
|
+
"default": 0,
|
|
80
|
+
"minimum": 0,
|
|
81
|
+
"description": "Offset for pagination (0-based)."
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"fields": {
|
|
86
|
+
"endpoint": "https://api.gdc.cancer.gov/files",
|
|
87
|
+
"format": "json"
|
|
88
|
+
},
|
|
89
|
+
"return_schema": {
|
|
90
|
+
"type": "object",
|
|
91
|
+
"description": "GDC files response",
|
|
92
|
+
"properties": {
|
|
93
|
+
"status": {"type": "string"},
|
|
94
|
+
"data": {
|
|
95
|
+
"type": "object",
|
|
96
|
+
"properties": {
|
|
97
|
+
"hits": {
|
|
98
|
+
"type": "array",
|
|
99
|
+
"items": {"type": "object"}
|
|
100
|
+
},
|
|
101
|
+
"pagination": {"type": "object"}
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
"url": {"type": "string"}
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"test_examples": [
|
|
108
|
+
{"data_type": "Gene Expression Quantification", "size": 1}
|
|
109
|
+
],
|
|
110
|
+
"label": ["GDC", "Files", "Oncogenomics"],
|
|
111
|
+
"metadata": {
|
|
112
|
+
"tags": ["downloads", "files", "expression"],
|
|
113
|
+
"estimated_execution_time": "< 3 seconds"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "GTEx_get_expression_summary",
|
|
4
|
+
"type": "GTExExpressionTool",
|
|
5
|
+
"description": "Summarize tissue-specific expression (e.g., median TPM) for a gene across GTEx tissues. Use to profile baseline expression patterns for targets/biomarkers.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"ensembl_gene_id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Ensembl gene identifier (e.g., 'ENSG00000141510' for TP53)."
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"required": ["ensembl_gene_id"]
|
|
15
|
+
},
|
|
16
|
+
"fields": {
|
|
17
|
+
"endpoint": "https://gtexportal.org/api/v2/expression/geneExpression",
|
|
18
|
+
"format": "json"
|
|
19
|
+
},
|
|
20
|
+
"return_schema": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "GTEx expression summary response",
|
|
23
|
+
"properties": {
|
|
24
|
+
"status": {"type": "string"},
|
|
25
|
+
"data": {
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"geneExpression": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"items": {
|
|
31
|
+
"type": "object",
|
|
32
|
+
"properties": {
|
|
33
|
+
"tissueSiteDetailId": {"type": "string"},
|
|
34
|
+
"median": {"type": "number"}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"url": {"type": "string"}
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"test_examples": [
|
|
44
|
+
{"ensembl_gene_id": "ENSG00000141510"}
|
|
45
|
+
],
|
|
46
|
+
"label": ["GTEx", "Expression", "Summary"],
|
|
47
|
+
"metadata": {
|
|
48
|
+
"tags": ["expression", "tissue", "baseline"],
|
|
49
|
+
"estimated_execution_time": "< 2 seconds"
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"name": "GTEx_query_eqtl",
|
|
54
|
+
"type": "GTExEQTLTool",
|
|
55
|
+
"description": "Query GTEx single-tissue eQTL associations for a gene. Use to identify regulatory variants (variantId, pValue, slope) relevant to expression regulation.",
|
|
56
|
+
"parameter": {
|
|
57
|
+
"type": "object",
|
|
58
|
+
"properties": {
|
|
59
|
+
"ensembl_gene_id": {
|
|
60
|
+
"type": "string",
|
|
61
|
+
"description": "Ensembl gene identifier (e.g., 'ENSG00000141510')."
|
|
62
|
+
},
|
|
63
|
+
"page": {
|
|
64
|
+
"type": "integer",
|
|
65
|
+
"default": 1,
|
|
66
|
+
"minimum": 1,
|
|
67
|
+
"description": "Page number (1-based)."
|
|
68
|
+
},
|
|
69
|
+
"size": {
|
|
70
|
+
"type": "integer",
|
|
71
|
+
"default": 10,
|
|
72
|
+
"minimum": 1,
|
|
73
|
+
"maximum": 100,
|
|
74
|
+
"description": "Number of records per page (1–100)."
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"required": ["ensembl_gene_id"]
|
|
78
|
+
},
|
|
79
|
+
"fields": {
|
|
80
|
+
"endpoint": "https://gtexportal.org/api/v2/association/singleTissueEqtl",
|
|
81
|
+
"format": "json"
|
|
82
|
+
},
|
|
83
|
+
"return_schema": {
|
|
84
|
+
"type": "object",
|
|
85
|
+
"description": "GTEx eQTL query response",
|
|
86
|
+
"properties": {
|
|
87
|
+
"status": {"type": "string"},
|
|
88
|
+
"data": {
|
|
89
|
+
"type": "object",
|
|
90
|
+
"properties": {
|
|
91
|
+
"singleTissueEqtl": {
|
|
92
|
+
"type": "array",
|
|
93
|
+
"items": {
|
|
94
|
+
"type": "object",
|
|
95
|
+
"properties": {
|
|
96
|
+
"variantId": {"type": "string"},
|
|
97
|
+
"pValue": {"type": "number"},
|
|
98
|
+
"slope": {"type": "number"}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
"url": {"type": "string"}
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"test_examples": [
|
|
108
|
+
{"ensembl_gene_id": "ENSG00000141510", "page": 1, "size": 5}
|
|
109
|
+
],
|
|
110
|
+
"label": ["GTEx", "eQTL", "Association"],
|
|
111
|
+
"metadata": {
|
|
112
|
+
"tags": ["eqtl", "variant", "regulation"],
|
|
113
|
+
"estimated_execution_time": "< 3 seconds"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
]
|
|
File without changes
|