tooluniverse 1.0.11.1__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/alphafold_tool.py +47 -7
- tooluniverse/base_tool.py +9 -1
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/alphafold_tools.json +7 -12
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/uniprot_tools.json +13 -5
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +185 -17
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/python_executor_tool.py +43 -13
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp.py +17 -25
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +14 -6
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/uniprot_tool.py +51 -4
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +65 -24
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
from urllib.request import Request, urlopen
|
|
5
|
+
|
|
6
|
+
from tooluniverse.tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _http_get(
|
|
10
|
+
url: str,
|
|
11
|
+
headers: Dict[str, str] | None = None,
|
|
12
|
+
timeout: int = 30,
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
req = Request(url, headers=headers or {})
|
|
15
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
16
|
+
data = resp.read()
|
|
17
|
+
try:
|
|
18
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
19
|
+
except Exception:
|
|
20
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@register_tool(
|
|
24
|
+
"GBIFTool",
|
|
25
|
+
config={
|
|
26
|
+
"name": "GBIF_search_species",
|
|
27
|
+
"type": "GBIFTool",
|
|
28
|
+
"description": "Search species via GBIF species/search",
|
|
29
|
+
"parameter": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"query": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Query keyword, e.g., Homo",
|
|
35
|
+
},
|
|
36
|
+
"limit": {
|
|
37
|
+
"type": "integer",
|
|
38
|
+
"default": 10,
|
|
39
|
+
"minimum": 1,
|
|
40
|
+
"maximum": 300,
|
|
41
|
+
},
|
|
42
|
+
"offset": {
|
|
43
|
+
"type": "integer",
|
|
44
|
+
"default": 0,
|
|
45
|
+
"minimum": 0,
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
"required": ["query"],
|
|
49
|
+
},
|
|
50
|
+
"settings": {
|
|
51
|
+
"base_url": "https://api.gbif.org/v1",
|
|
52
|
+
"timeout": 30,
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
class GBIFTool:
|
|
57
|
+
def __init__(self, tool_config=None):
|
|
58
|
+
self.tool_config = tool_config or {}
|
|
59
|
+
|
|
60
|
+
def run(self, arguments: Dict[str, Any]):
|
|
61
|
+
base = self.tool_config.get("settings", {}).get(
|
|
62
|
+
"base_url", "https://api.gbif.org/v1"
|
|
63
|
+
)
|
|
64
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
65
|
+
query_text = arguments.get("query")
|
|
66
|
+
limit = int(arguments.get("limit", 10))
|
|
67
|
+
offset = int(arguments.get("offset", 0))
|
|
68
|
+
|
|
69
|
+
query = {"q": query_text, "limit": limit, "offset": offset}
|
|
70
|
+
url = f"{base}/species/search?{urlencode(query)}"
|
|
71
|
+
try:
|
|
72
|
+
data = _http_get(
|
|
73
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
74
|
+
)
|
|
75
|
+
return {
|
|
76
|
+
"source": "GBIF",
|
|
77
|
+
"endpoint": "species/search",
|
|
78
|
+
"query": query,
|
|
79
|
+
"data": data,
|
|
80
|
+
"success": True,
|
|
81
|
+
}
|
|
82
|
+
except Exception as e:
|
|
83
|
+
return {
|
|
84
|
+
"error": str(e),
|
|
85
|
+
"source": "GBIF",
|
|
86
|
+
"endpoint": "species/search",
|
|
87
|
+
"success": False,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@register_tool(
|
|
92
|
+
"GBIFOccurrenceTool",
|
|
93
|
+
config={
|
|
94
|
+
"name": "GBIF_search_occurrences",
|
|
95
|
+
"type": "GBIFOccurrenceTool",
|
|
96
|
+
"description": "Search occurrences via GBIF occurrence/search",
|
|
97
|
+
"parameter": {
|
|
98
|
+
"type": "object",
|
|
99
|
+
"properties": {
|
|
100
|
+
"taxonKey": {
|
|
101
|
+
"type": "integer",
|
|
102
|
+
"description": "GBIF taxonKey filter",
|
|
103
|
+
},
|
|
104
|
+
"country": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"description": "Country code, e.g., US",
|
|
107
|
+
},
|
|
108
|
+
"hasCoordinate": {"type": "boolean", "default": True},
|
|
109
|
+
"limit": {
|
|
110
|
+
"type": "integer",
|
|
111
|
+
"default": 10,
|
|
112
|
+
"minimum": 1,
|
|
113
|
+
"maximum": 300,
|
|
114
|
+
},
|
|
115
|
+
"offset": {
|
|
116
|
+
"type": "integer",
|
|
117
|
+
"default": 0,
|
|
118
|
+
"minimum": 0,
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
"settings": {
|
|
123
|
+
"base_url": "https://api.gbif.org/v1",
|
|
124
|
+
"timeout": 30,
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
class GBIFOccurrenceTool:
|
|
129
|
+
def __init__(self, tool_config=None):
|
|
130
|
+
self.tool_config = tool_config or {}
|
|
131
|
+
|
|
132
|
+
def run(self, arguments: Dict[str, Any]):
|
|
133
|
+
base = self.tool_config.get("settings", {}).get(
|
|
134
|
+
"base_url", "https://api.gbif.org/v1"
|
|
135
|
+
)
|
|
136
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
137
|
+
|
|
138
|
+
query = {}
|
|
139
|
+
for key in ("taxonKey", "country", "hasCoordinate", "limit", "offset"):
|
|
140
|
+
if key in arguments and arguments[key] is not None:
|
|
141
|
+
query[key] = arguments[key]
|
|
142
|
+
|
|
143
|
+
if "limit" not in query:
|
|
144
|
+
query["limit"] = 10
|
|
145
|
+
if "offset" not in query:
|
|
146
|
+
query["offset"] = 0
|
|
147
|
+
|
|
148
|
+
url = f"{base}/occurrence/search?{urlencode(query)}"
|
|
149
|
+
try:
|
|
150
|
+
data = _http_get(
|
|
151
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
152
|
+
)
|
|
153
|
+
return {
|
|
154
|
+
"source": "GBIF",
|
|
155
|
+
"endpoint": "occurrence/search",
|
|
156
|
+
"query": query,
|
|
157
|
+
"data": data,
|
|
158
|
+
"success": True,
|
|
159
|
+
}
|
|
160
|
+
except Exception as e:
|
|
161
|
+
return {
|
|
162
|
+
"error": str(e),
|
|
163
|
+
"source": "GBIF",
|
|
164
|
+
"endpoint": "occurrence/search",
|
|
165
|
+
"success": False,
|
|
166
|
+
}
|
tooluniverse/gdc_tool.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
from urllib.request import Request, urlopen
|
|
5
|
+
|
|
6
|
+
from tooluniverse.tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _http_get(
|
|
10
|
+
url: str,
|
|
11
|
+
headers: Dict[str, str] | None = None,
|
|
12
|
+
timeout: int = 30,
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
req = Request(url, headers=headers or {})
|
|
15
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
16
|
+
data = resp.read()
|
|
17
|
+
try:
|
|
18
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
19
|
+
except Exception:
|
|
20
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@register_tool(
|
|
24
|
+
"GDCCasesTool",
|
|
25
|
+
config={
|
|
26
|
+
"name": "GDC_search_cases",
|
|
27
|
+
"type": "GDCCasesTool",
|
|
28
|
+
"description": "Search NCI GDC cases via /cases",
|
|
29
|
+
"parameter": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"project_id": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "GDC project identifier (e.g., 'TCGA-BRCA')",
|
|
35
|
+
},
|
|
36
|
+
"size": {
|
|
37
|
+
"type": "integer",
|
|
38
|
+
"default": 10,
|
|
39
|
+
"minimum": 1,
|
|
40
|
+
"maximum": 100,
|
|
41
|
+
"description": "Number of results (1–100)",
|
|
42
|
+
},
|
|
43
|
+
"offset": {
|
|
44
|
+
"type": "integer",
|
|
45
|
+
"default": 0,
|
|
46
|
+
"minimum": 0,
|
|
47
|
+
"description": "Offset for pagination (0-based)",
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
class GDCCasesTool:
|
|
55
|
+
def __init__(self, tool_config=None):
|
|
56
|
+
self.tool_config = tool_config or {}
|
|
57
|
+
|
|
58
|
+
def run(self, arguments: Dict[str, Any]):
|
|
59
|
+
base = self.tool_config.get("settings", {}).get(
|
|
60
|
+
"base_url", "https://api.gdc.cancer.gov"
|
|
61
|
+
)
|
|
62
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
63
|
+
|
|
64
|
+
query: Dict[str, Any] = {}
|
|
65
|
+
if arguments.get("project_id"):
|
|
66
|
+
# Build filters JSON for project_id
|
|
67
|
+
filters = {
|
|
68
|
+
"op": "=",
|
|
69
|
+
"content": {
|
|
70
|
+
"field": "projects.project_id",
|
|
71
|
+
"value": [arguments["project_id"]],
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
query["filters"] = json.dumps(filters)
|
|
75
|
+
if arguments.get("size") is not None:
|
|
76
|
+
query["size"] = int(arguments["size"])
|
|
77
|
+
if arguments.get("offset") is not None:
|
|
78
|
+
query["from"] = int(arguments["offset"])
|
|
79
|
+
|
|
80
|
+
url = f"{base}/cases?{urlencode(query)}"
|
|
81
|
+
try:
|
|
82
|
+
data = _http_get(
|
|
83
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
84
|
+
)
|
|
85
|
+
return {
|
|
86
|
+
"source": "GDC",
|
|
87
|
+
"endpoint": "cases",
|
|
88
|
+
"query": query,
|
|
89
|
+
"data": data,
|
|
90
|
+
"success": True,
|
|
91
|
+
}
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return {
|
|
94
|
+
"error": str(e),
|
|
95
|
+
"source": "GDC",
|
|
96
|
+
"endpoint": "cases",
|
|
97
|
+
"success": False,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@register_tool(
|
|
102
|
+
"GDCFilesTool",
|
|
103
|
+
config={
|
|
104
|
+
"name": "GDC_list_files",
|
|
105
|
+
"type": "GDCFilesTool",
|
|
106
|
+
"description": "List NCI GDC files via /files with optional data_type filter",
|
|
107
|
+
"parameter": {
|
|
108
|
+
"type": "object",
|
|
109
|
+
"properties": {
|
|
110
|
+
"data_type": {
|
|
111
|
+
"type": "string",
|
|
112
|
+
"description": "Data type filter (e.g., 'Gene Expression Quantification')",
|
|
113
|
+
},
|
|
114
|
+
"size": {
|
|
115
|
+
"type": "integer",
|
|
116
|
+
"default": 10,
|
|
117
|
+
"minimum": 1,
|
|
118
|
+
"maximum": 100,
|
|
119
|
+
"description": "Number of results (1–100)",
|
|
120
|
+
},
|
|
121
|
+
"offset": {
|
|
122
|
+
"type": "integer",
|
|
123
|
+
"default": 0,
|
|
124
|
+
"minimum": 0,
|
|
125
|
+
"description": "Offset for pagination (0-based)",
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
|
|
130
|
+
},
|
|
131
|
+
)
|
|
132
|
+
class GDCFilesTool:
|
|
133
|
+
def __init__(self, tool_config=None):
|
|
134
|
+
self.tool_config = tool_config or {}
|
|
135
|
+
|
|
136
|
+
def run(self, arguments: Dict[str, Any]):
|
|
137
|
+
base = self.tool_config.get("settings", {}).get(
|
|
138
|
+
"base_url", "https://api.gdc.cancer.gov"
|
|
139
|
+
)
|
|
140
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
141
|
+
|
|
142
|
+
query: Dict[str, Any] = {}
|
|
143
|
+
if arguments.get("data_type"):
|
|
144
|
+
filters = {
|
|
145
|
+
"op": "=",
|
|
146
|
+
"content": {
|
|
147
|
+
"field": "files.data_type",
|
|
148
|
+
"value": [arguments["data_type"]],
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
query["filters"] = json.dumps(filters)
|
|
152
|
+
if arguments.get("size") is not None:
|
|
153
|
+
query["size"] = int(arguments["size"])
|
|
154
|
+
if arguments.get("offset") is not None:
|
|
155
|
+
query["from"] = int(arguments["offset"])
|
|
156
|
+
|
|
157
|
+
url = f"{base}/files?{urlencode(query)}"
|
|
158
|
+
try:
|
|
159
|
+
data = _http_get(
|
|
160
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
161
|
+
)
|
|
162
|
+
return {
|
|
163
|
+
"source": "GDC",
|
|
164
|
+
"endpoint": "files",
|
|
165
|
+
"query": query,
|
|
166
|
+
"data": data,
|
|
167
|
+
"success": True,
|
|
168
|
+
}
|
|
169
|
+
except Exception as e:
|
|
170
|
+
return {
|
|
171
|
+
"error": str(e),
|
|
172
|
+
"source": "GDC",
|
|
173
|
+
"endpoint": "files",
|
|
174
|
+
"success": False,
|
|
175
|
+
}
|
tooluniverse/generate_tools.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import shutil
|
|
6
6
|
import subprocess
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Dict, Any, Optional, List
|
|
8
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def json_type_to_python(json_type: str) -> str:
|
|
@@ -20,6 +20,55 @@ def json_type_to_python(json_type: str) -> str:
|
|
|
20
20
|
}.get(json_type, "Any")
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def validate_generated_code(
|
|
24
|
+
tool_name: str, tool_config: Dict[str, Any], generated_file: Path
|
|
25
|
+
) -> Tuple[bool, list]:
|
|
26
|
+
"""Validate that generated code matches the tool configuration.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
tool_name: Name of the tool
|
|
30
|
+
tool_config: Original tool configuration
|
|
31
|
+
generated_file: Path to the generated Python file
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Tuple of (is_valid, list_of_issues)
|
|
35
|
+
"""
|
|
36
|
+
issues = []
|
|
37
|
+
|
|
38
|
+
if not generated_file.exists():
|
|
39
|
+
return False, [f"Generated file does not exist: {generated_file}"]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
content = generated_file.read_text(encoding="utf-8")
|
|
43
|
+
|
|
44
|
+
# Check that function name matches tool name
|
|
45
|
+
if f"def {tool_name}(" not in content:
|
|
46
|
+
issues.append(f"Function definition not found for {tool_name}")
|
|
47
|
+
|
|
48
|
+
# Check that all required parameters are present
|
|
49
|
+
schema = tool_config.get("parameter", {}) or {}
|
|
50
|
+
properties = schema.get("properties", {}) or {}
|
|
51
|
+
required = schema.get("required", []) or []
|
|
52
|
+
|
|
53
|
+
for param_name in required:
|
|
54
|
+
# Check if parameter appears in function signature
|
|
55
|
+
if f"{param_name}:" not in content:
|
|
56
|
+
issues.append(
|
|
57
|
+
f"Required parameter '{param_name}' missing from function signature"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Check that all parameters in config appear in generated code
|
|
61
|
+
for param_name in properties.keys():
|
|
62
|
+
# Parameter should appear either in signature or in kwargs
|
|
63
|
+
if f'"{param_name}"' not in content and f"{param_name}:" not in content:
|
|
64
|
+
issues.append(f"Parameter '{param_name}' missing from generated code")
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
issues.append(f"Error reading generated file: {e}")
|
|
68
|
+
|
|
69
|
+
return len(issues) == 0, issues
|
|
70
|
+
|
|
71
|
+
|
|
23
72
|
def generate_tool_file(
|
|
24
73
|
tool_name: str,
|
|
25
74
|
tool_config: Dict[str, Any],
|
|
@@ -403,11 +452,18 @@ def _format_files(paths: List[str]) -> None:
|
|
|
403
452
|
pass
|
|
404
453
|
|
|
405
454
|
|
|
406
|
-
def main(
|
|
455
|
+
def main(
|
|
456
|
+
format_enabled: Optional[bool] = None,
|
|
457
|
+
force_regenerate: bool = False,
|
|
458
|
+
verbose: bool = False,
|
|
459
|
+
) -> None:
|
|
407
460
|
"""Generate tools and format the generated files if enabled.
|
|
408
461
|
|
|
409
|
-
|
|
410
|
-
|
|
462
|
+
Args:
|
|
463
|
+
format_enabled: If None, decide based on TOOLUNIVERSE_SKIP_FORMAT env var
|
|
464
|
+
(skip when set to "1").
|
|
465
|
+
force_regenerate: If True, regenerate all tools regardless of changes
|
|
466
|
+
verbose: If True, print detailed change information
|
|
411
467
|
"""
|
|
412
468
|
from tooluniverse import ToolUniverse
|
|
413
469
|
from .build_optimizer import cleanup_orphaned_files, get_changed_tools
|
|
@@ -428,23 +484,64 @@ def main(format_enabled: Optional[bool] = None) -> None:
|
|
|
428
484
|
|
|
429
485
|
# Check for changes
|
|
430
486
|
metadata_file = output / ".tool_metadata.json"
|
|
431
|
-
|
|
432
|
-
|
|
487
|
+
# Allow override via environment variable or function parameter
|
|
488
|
+
force_regenerate = force_regenerate or (
|
|
489
|
+
os.getenv("TOOLUNIVERSE_FORCE_REGENERATE") == "1"
|
|
490
|
+
)
|
|
491
|
+
verbose = verbose or (os.getenv("TOOLUNIVERSE_VERBOSE") == "1")
|
|
492
|
+
|
|
493
|
+
new_tools, changed_tools, unchanged_tools, change_details = get_changed_tools(
|
|
494
|
+
tu.all_tool_dict,
|
|
495
|
+
metadata_file,
|
|
496
|
+
force_regenerate=force_regenerate,
|
|
497
|
+
verbose=verbose,
|
|
433
498
|
)
|
|
434
499
|
|
|
435
500
|
generated_paths: List[str] = []
|
|
436
501
|
|
|
437
502
|
# Generate only changed tools if there are changes
|
|
438
503
|
if new_tools or changed_tools:
|
|
439
|
-
|
|
504
|
+
total_changed = len(new_tools + changed_tools)
|
|
505
|
+
print(f"🔄 Generating {total_changed} changed tools...")
|
|
506
|
+
if new_tools:
|
|
507
|
+
print(f" ✨ {len(new_tools)} new tools")
|
|
508
|
+
if changed_tools:
|
|
509
|
+
print(f" 🔄 {len(changed_tools)} modified tools")
|
|
510
|
+
if (
|
|
511
|
+
verbose and len(changed_tools) <= 20
|
|
512
|
+
): # Only show details for reasonable number
|
|
513
|
+
for tool_name in changed_tools[:20]:
|
|
514
|
+
print(f" - {tool_name}")
|
|
515
|
+
if len(changed_tools) > 20:
|
|
516
|
+
print(f" ... and {len(changed_tools) - 20} more")
|
|
517
|
+
|
|
518
|
+
validation_errors = []
|
|
440
519
|
for i, (tool_name, tool_config) in enumerate(tu.all_tool_dict.items(), 1):
|
|
441
520
|
if tool_name in new_tools or tool_name in changed_tools:
|
|
442
521
|
path = generate_tool_file(tool_name, tool_config, output)
|
|
443
522
|
generated_paths.append(str(path))
|
|
523
|
+
|
|
524
|
+
# Validate generated code matches configuration
|
|
525
|
+
is_valid, issues = validate_generated_code(tool_name, tool_config, path)
|
|
526
|
+
if not is_valid:
|
|
527
|
+
validation_errors.extend([(tool_name, issue) for issue in issues])
|
|
528
|
+
if verbose:
|
|
529
|
+
print(f" ⚠️ Validation issues for {tool_name}:")
|
|
530
|
+
for issue in issues:
|
|
531
|
+
print(f" - {issue}")
|
|
532
|
+
|
|
444
533
|
if i % 50 == 0:
|
|
445
|
-
print(f" Processed {i} tools...")
|
|
534
|
+
print(f" Processed {i}/{len(tu.all_tool_dict)} tools...")
|
|
535
|
+
|
|
536
|
+
if validation_errors:
|
|
537
|
+
print(f"\n⚠️ Found {len(validation_errors)} validation issue(s):")
|
|
538
|
+
for tool_name, issue in validation_errors[:10]: # Show first 10
|
|
539
|
+
print(f" - {tool_name}: {issue}")
|
|
540
|
+
if len(validation_errors) > 10:
|
|
541
|
+
print(f" ... and {len(validation_errors) - 10} more issues")
|
|
446
542
|
else:
|
|
447
543
|
print("✨ No changes detected, skipping tool generation")
|
|
544
|
+
print(f" 📊 Status: {len(unchanged_tools)} tools unchanged")
|
|
448
545
|
|
|
449
546
|
# Always regenerate __init__.py to include all tools
|
|
450
547
|
init_path = generate_init(list(tu.all_tool_dict.keys()), output)
|
|
@@ -477,5 +574,20 @@ if __name__ == "__main__":
|
|
|
477
574
|
action="store_true",
|
|
478
575
|
help="Do not run formatters on generated files",
|
|
479
576
|
)
|
|
577
|
+
parser.add_argument(
|
|
578
|
+
"--force",
|
|
579
|
+
action="store_true",
|
|
580
|
+
help="Force regeneration of all tools regardless of changes",
|
|
581
|
+
)
|
|
582
|
+
parser.add_argument(
|
|
583
|
+
"--verbose",
|
|
584
|
+
"-v",
|
|
585
|
+
action="store_true",
|
|
586
|
+
help="Print detailed change information",
|
|
587
|
+
)
|
|
480
588
|
args = parser.parse_args()
|
|
481
|
-
main(
|
|
589
|
+
main(
|
|
590
|
+
format_enabled=not args.no_format,
|
|
591
|
+
force_regenerate=args.force,
|
|
592
|
+
verbose=args.verbose,
|
|
593
|
+
)
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
from urllib.request import Request, urlopen
|
|
5
|
+
|
|
6
|
+
from tooluniverse.tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _http_get(
|
|
10
|
+
url: str,
|
|
11
|
+
headers: Dict[str, str] | None = None,
|
|
12
|
+
timeout: int = 30,
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
req = Request(url, headers=headers or {})
|
|
15
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
16
|
+
data = resp.read()
|
|
17
|
+
try:
|
|
18
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
19
|
+
except Exception:
|
|
20
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@register_tool(
|
|
24
|
+
"GTExExpressionTool",
|
|
25
|
+
config={
|
|
26
|
+
"name": "GTEx_get_expression_summary",
|
|
27
|
+
"type": "GTExExpressionTool",
|
|
28
|
+
"description": "Get GTEx expression summary for a gene via /expression/geneExpression",
|
|
29
|
+
"parameter": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"ensembl_gene_id": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Ensembl gene ID, e.g., ENSG00000141510",
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"required": ["ensembl_gene_id"],
|
|
38
|
+
},
|
|
39
|
+
"settings": {"base_url": "https://gtexportal.org/api/v2", "timeout": 30},
|
|
40
|
+
},
|
|
41
|
+
)
|
|
42
|
+
class GTExExpressionTool:
|
|
43
|
+
def __init__(self, tool_config=None):
|
|
44
|
+
self.tool_config = tool_config or {}
|
|
45
|
+
|
|
46
|
+
def run(self, arguments: Dict[str, Any]):
|
|
47
|
+
base = self.tool_config.get("settings", {}).get(
|
|
48
|
+
"base_url", "https://gtexportal.org/api/v2"
|
|
49
|
+
)
|
|
50
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
51
|
+
|
|
52
|
+
query = {"gencodeId": arguments.get("ensembl_gene_id")}
|
|
53
|
+
url = f"{base}/expression/geneExpression?{urlencode(query)}"
|
|
54
|
+
try:
|
|
55
|
+
api_response = _http_get(
|
|
56
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
57
|
+
)
|
|
58
|
+
# Wrap API response to match schema: data.geneExpression should be array
|
|
59
|
+
# API returns {"data": [...], "paging_info": {...}}
|
|
60
|
+
# Schema expects {"data": {"geneExpression": [...]}}
|
|
61
|
+
if isinstance(api_response, dict) and "data" in api_response:
|
|
62
|
+
wrapped_data = {"geneExpression": api_response.get("data", [])}
|
|
63
|
+
else:
|
|
64
|
+
# Fallback if response format is unexpected
|
|
65
|
+
wrapped_data = {
|
|
66
|
+
"geneExpression": (
|
|
67
|
+
api_response if isinstance(api_response, list) else []
|
|
68
|
+
)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
"source": "GTEx",
|
|
73
|
+
"endpoint": "expression/geneExpression",
|
|
74
|
+
"query": query,
|
|
75
|
+
"data": wrapped_data,
|
|
76
|
+
"success": True,
|
|
77
|
+
}
|
|
78
|
+
except Exception as e:
|
|
79
|
+
return {
|
|
80
|
+
"error": str(e),
|
|
81
|
+
"source": "GTEx",
|
|
82
|
+
"endpoint": "expression/geneExpression",
|
|
83
|
+
"success": False,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@register_tool(
|
|
88
|
+
"GTExEQTLTool",
|
|
89
|
+
config={
|
|
90
|
+
"name": "GTEx_query_eqtl",
|
|
91
|
+
"type": "GTExEQTLTool",
|
|
92
|
+
"description": "Query GTEx single-tissue eQTL via /association/singleTissueEqtl",
|
|
93
|
+
"parameter": {
|
|
94
|
+
"type": "object",
|
|
95
|
+
"properties": {
|
|
96
|
+
"ensembl_gene_id": {
|
|
97
|
+
"type": "string",
|
|
98
|
+
"description": "Ensembl gene ID, e.g., ENSG00000141510",
|
|
99
|
+
},
|
|
100
|
+
"page": {
|
|
101
|
+
"type": "integer",
|
|
102
|
+
"default": 1,
|
|
103
|
+
"minimum": 1,
|
|
104
|
+
"description": "Page number (1-based)",
|
|
105
|
+
},
|
|
106
|
+
"size": {
|
|
107
|
+
"type": "integer",
|
|
108
|
+
"default": 10,
|
|
109
|
+
"minimum": 1,
|
|
110
|
+
"maximum": 100,
|
|
111
|
+
"description": "Page size (1–100)",
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
"required": ["ensembl_gene_id"],
|
|
115
|
+
},
|
|
116
|
+
"settings": {"base_url": "https://gtexportal.org/api/v2", "timeout": 30},
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
class GTExEQTLTool:
|
|
120
|
+
def __init__(self, tool_config=None):
|
|
121
|
+
self.tool_config = tool_config or {}
|
|
122
|
+
|
|
123
|
+
def run(self, arguments: Dict[str, Any]):
|
|
124
|
+
base = self.tool_config.get("settings", {}).get(
|
|
125
|
+
"base_url", "https://gtexportal.org/api/v2"
|
|
126
|
+
)
|
|
127
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
128
|
+
|
|
129
|
+
query: Dict[str, Any] = {
|
|
130
|
+
"gencodeId": arguments.get("ensembl_gene_id"),
|
|
131
|
+
}
|
|
132
|
+
if "page" in arguments:
|
|
133
|
+
query["page"] = int(arguments["page"])
|
|
134
|
+
if "size" in arguments:
|
|
135
|
+
query["pageSize"] = int(arguments["size"])
|
|
136
|
+
|
|
137
|
+
url = f"{base}/association/singleTissueEqtl?{urlencode(query)}"
|
|
138
|
+
try:
|
|
139
|
+
api_response = _http_get(
|
|
140
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
141
|
+
)
|
|
142
|
+
# Wrap API response to match schema: data.singleTissueEqtl should be array
|
|
143
|
+
# API returns {"data": [...], "paging_info": {...}}
|
|
144
|
+
# Schema expects {"data": {"singleTissueEqtl": [...]}}
|
|
145
|
+
if isinstance(api_response, dict) and "data" in api_response:
|
|
146
|
+
wrapped_data = {"singleTissueEqtl": api_response.get("data", [])}
|
|
147
|
+
else:
|
|
148
|
+
# Fallback if response format is unexpected
|
|
149
|
+
wrapped_data = {
|
|
150
|
+
"singleTissueEqtl": (
|
|
151
|
+
api_response if isinstance(api_response, list) else []
|
|
152
|
+
)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
"source": "GTEx",
|
|
157
|
+
"endpoint": "association/singleTissueEqtl",
|
|
158
|
+
"query": query,
|
|
159
|
+
"data": wrapped_data,
|
|
160
|
+
"success": True,
|
|
161
|
+
}
|
|
162
|
+
except Exception as e:
|
|
163
|
+
return {
|
|
164
|
+
"error": str(e),
|
|
165
|
+
"source": "GTEx",
|
|
166
|
+
"endpoint": "association/singleTissueEqtl",
|
|
167
|
+
"success": False,
|
|
168
|
+
}
|