tooluniverse 1.0.11.2__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +46 -8
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +17 -44
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +58 -17
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
tooluniverse/execute_function.py
CHANGED
|
@@ -1404,8 +1404,12 @@ class ToolUniverse:
|
|
|
1404
1404
|
# Validate tools have required fields
|
|
1405
1405
|
valid_tools = []
|
|
1406
1406
|
for tool in tools_in_file:
|
|
1407
|
+
# Validate that tool is a dict, has "name" field, and name is a string
|
|
1407
1408
|
if isinstance(tool, dict) and "name" in tool:
|
|
1408
|
-
|
|
1409
|
+
name_value = tool["name"]
|
|
1410
|
+
# Ensure name is a string (not a dict/object) - this filters out schema files
|
|
1411
|
+
if isinstance(name_value, str):
|
|
1412
|
+
valid_tools.append(tool)
|
|
1409
1413
|
|
|
1410
1414
|
return valid_tools
|
|
1411
1415
|
|
|
@@ -1428,7 +1432,13 @@ class ToolUniverse:
|
|
|
1428
1432
|
for _category, file_path in self.tool_files.items():
|
|
1429
1433
|
tools_in_category = self._read_tools_from_file(file_path)
|
|
1430
1434
|
all_tools.extend(tools_in_category)
|
|
1431
|
-
|
|
1435
|
+
# Only add string names to the set (filter out any non-string names as extra safety)
|
|
1436
|
+
tool_names = [
|
|
1437
|
+
tool["name"]
|
|
1438
|
+
for tool in tools_in_category
|
|
1439
|
+
if isinstance(tool.get("name"), str)
|
|
1440
|
+
]
|
|
1441
|
+
all_tool_names.update(tool_names)
|
|
1432
1442
|
|
|
1433
1443
|
# Also include remote tools
|
|
1434
1444
|
try:
|
|
@@ -1441,7 +1451,13 @@ class ToolUniverse:
|
|
|
1441
1451
|
remote_tools = self._read_tools_from_file(fpath)
|
|
1442
1452
|
if remote_tools:
|
|
1443
1453
|
all_tools.extend(remote_tools)
|
|
1444
|
-
|
|
1454
|
+
# Only add string names to the set (filter out any non-string names as extra safety)
|
|
1455
|
+
tool_names = [
|
|
1456
|
+
tool["name"]
|
|
1457
|
+
for tool in remote_tools
|
|
1458
|
+
if isinstance(tool.get("name"), str)
|
|
1459
|
+
]
|
|
1460
|
+
all_tool_names.update(tool_names)
|
|
1445
1461
|
except Exception as e:
|
|
1446
1462
|
warning(f"Warning: Failed to scan remote tools directory: {e}")
|
|
1447
1463
|
|
|
@@ -1465,11 +1481,17 @@ class ToolUniverse:
|
|
|
1465
1481
|
warning(f"Warning: Data directory not found: {data_dir}")
|
|
1466
1482
|
return all_tools, all_tool_names
|
|
1467
1483
|
|
|
1468
|
-
# Recursively find all JSON files
|
|
1484
|
+
# Recursively find all JSON files, excluding schema files
|
|
1469
1485
|
json_files = []
|
|
1470
1486
|
for root, _dirs, files in os.walk(data_dir):
|
|
1487
|
+
# Skip schemas directory (contains JSON schema definition files, not tool configs)
|
|
1488
|
+
if "schemas" in root:
|
|
1489
|
+
continue
|
|
1471
1490
|
for file in files:
|
|
1472
1491
|
if file.lower().endswith(".json"):
|
|
1492
|
+
# Skip files with "schema" in the name
|
|
1493
|
+
if "schema" in file.lower():
|
|
1494
|
+
continue
|
|
1473
1495
|
json_files.append(os.path.join(root, file))
|
|
1474
1496
|
|
|
1475
1497
|
self.logger.debug(f"Found {len(json_files)} JSON files to scan")
|
|
@@ -1479,7 +1501,13 @@ class ToolUniverse:
|
|
|
1479
1501
|
tools_in_file = self._read_tools_from_file(json_file)
|
|
1480
1502
|
if tools_in_file:
|
|
1481
1503
|
all_tools.extend(tools_in_file)
|
|
1482
|
-
|
|
1504
|
+
# Only add string names to the set (filter out any non-string names as extra safety)
|
|
1505
|
+
tool_names = [
|
|
1506
|
+
tool["name"]
|
|
1507
|
+
for tool in tools_in_file
|
|
1508
|
+
if isinstance(tool.get("name"), str)
|
|
1509
|
+
]
|
|
1510
|
+
all_tool_names.update(tool_names)
|
|
1483
1511
|
self.logger.debug(f"Loaded {len(tools_in_file)} tools from {json_file}")
|
|
1484
1512
|
|
|
1485
1513
|
self.logger.info(
|
|
@@ -1868,7 +1896,10 @@ class ToolUniverse:
|
|
|
1868
1896
|
continue
|
|
1869
1897
|
|
|
1870
1898
|
tool_instance = self._ensure_tool_instance(job)
|
|
1871
|
-
if
|
|
1899
|
+
if (
|
|
1900
|
+
not tool_instance
|
|
1901
|
+
or not getattr(tool_instance, "supports_caching", lambda: True)()
|
|
1902
|
+
):
|
|
1872
1903
|
continue
|
|
1873
1904
|
|
|
1874
1905
|
cache_key = tool_instance.get_cache_key(job.arguments or {})
|
|
@@ -2087,7 +2118,10 @@ class ToolUniverse:
|
|
|
2087
2118
|
|
|
2088
2119
|
if cache_enabled:
|
|
2089
2120
|
tool_instance = self._get_tool_instance(function_name, cache=True)
|
|
2090
|
-
if
|
|
2121
|
+
if (
|
|
2122
|
+
tool_instance
|
|
2123
|
+
and getattr(tool_instance, "supports_caching", lambda: True)()
|
|
2124
|
+
):
|
|
2091
2125
|
cache_namespace = tool_instance.get_cache_namespace()
|
|
2092
2126
|
cache_version = tool_instance.get_cache_version()
|
|
2093
2127
|
cache_key = self._make_cache_key(function_name, arguments)
|
|
@@ -2211,7 +2245,11 @@ class ToolUniverse:
|
|
|
2211
2245
|
)
|
|
2212
2246
|
|
|
2213
2247
|
# Cache result if enabled
|
|
2214
|
-
if
|
|
2248
|
+
if (
|
|
2249
|
+
cache_enabled
|
|
2250
|
+
and tool_instance
|
|
2251
|
+
and getattr(tool_instance, "supports_caching", lambda: True)()
|
|
2252
|
+
):
|
|
2215
2253
|
if cache_key is None:
|
|
2216
2254
|
cache_key = self._make_cache_key(function_name, arguments)
|
|
2217
2255
|
if cache_namespace is None:
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
from urllib.request import Request, urlopen
|
|
5
|
+
|
|
6
|
+
from tooluniverse.tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _http_get(
|
|
10
|
+
url: str,
|
|
11
|
+
headers: Dict[str, str] | None = None,
|
|
12
|
+
timeout: int = 30,
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
req = Request(url, headers=headers or {})
|
|
15
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
16
|
+
data = resp.read()
|
|
17
|
+
try:
|
|
18
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
19
|
+
except Exception:
|
|
20
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@register_tool(
|
|
24
|
+
"GBIFTool",
|
|
25
|
+
config={
|
|
26
|
+
"name": "GBIF_search_species",
|
|
27
|
+
"type": "GBIFTool",
|
|
28
|
+
"description": "Search species via GBIF species/search",
|
|
29
|
+
"parameter": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"query": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Query keyword, e.g., Homo",
|
|
35
|
+
},
|
|
36
|
+
"limit": {
|
|
37
|
+
"type": "integer",
|
|
38
|
+
"default": 10,
|
|
39
|
+
"minimum": 1,
|
|
40
|
+
"maximum": 300,
|
|
41
|
+
},
|
|
42
|
+
"offset": {
|
|
43
|
+
"type": "integer",
|
|
44
|
+
"default": 0,
|
|
45
|
+
"minimum": 0,
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
"required": ["query"],
|
|
49
|
+
},
|
|
50
|
+
"settings": {
|
|
51
|
+
"base_url": "https://api.gbif.org/v1",
|
|
52
|
+
"timeout": 30,
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
class GBIFTool:
|
|
57
|
+
def __init__(self, tool_config=None):
|
|
58
|
+
self.tool_config = tool_config or {}
|
|
59
|
+
|
|
60
|
+
def run(self, arguments: Dict[str, Any]):
|
|
61
|
+
base = self.tool_config.get("settings", {}).get(
|
|
62
|
+
"base_url", "https://api.gbif.org/v1"
|
|
63
|
+
)
|
|
64
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
65
|
+
query_text = arguments.get("query")
|
|
66
|
+
limit = int(arguments.get("limit", 10))
|
|
67
|
+
offset = int(arguments.get("offset", 0))
|
|
68
|
+
|
|
69
|
+
query = {"q": query_text, "limit": limit, "offset": offset}
|
|
70
|
+
url = f"{base}/species/search?{urlencode(query)}"
|
|
71
|
+
try:
|
|
72
|
+
data = _http_get(
|
|
73
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
74
|
+
)
|
|
75
|
+
return {
|
|
76
|
+
"source": "GBIF",
|
|
77
|
+
"endpoint": "species/search",
|
|
78
|
+
"query": query,
|
|
79
|
+
"data": data,
|
|
80
|
+
"success": True,
|
|
81
|
+
}
|
|
82
|
+
except Exception as e:
|
|
83
|
+
return {
|
|
84
|
+
"error": str(e),
|
|
85
|
+
"source": "GBIF",
|
|
86
|
+
"endpoint": "species/search",
|
|
87
|
+
"success": False,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@register_tool(
|
|
92
|
+
"GBIFOccurrenceTool",
|
|
93
|
+
config={
|
|
94
|
+
"name": "GBIF_search_occurrences",
|
|
95
|
+
"type": "GBIFOccurrenceTool",
|
|
96
|
+
"description": "Search occurrences via GBIF occurrence/search",
|
|
97
|
+
"parameter": {
|
|
98
|
+
"type": "object",
|
|
99
|
+
"properties": {
|
|
100
|
+
"taxonKey": {
|
|
101
|
+
"type": "integer",
|
|
102
|
+
"description": "GBIF taxonKey filter",
|
|
103
|
+
},
|
|
104
|
+
"country": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"description": "Country code, e.g., US",
|
|
107
|
+
},
|
|
108
|
+
"hasCoordinate": {"type": "boolean", "default": True},
|
|
109
|
+
"limit": {
|
|
110
|
+
"type": "integer",
|
|
111
|
+
"default": 10,
|
|
112
|
+
"minimum": 1,
|
|
113
|
+
"maximum": 300,
|
|
114
|
+
},
|
|
115
|
+
"offset": {
|
|
116
|
+
"type": "integer",
|
|
117
|
+
"default": 0,
|
|
118
|
+
"minimum": 0,
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
"settings": {
|
|
123
|
+
"base_url": "https://api.gbif.org/v1",
|
|
124
|
+
"timeout": 30,
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
class GBIFOccurrenceTool:
|
|
129
|
+
def __init__(self, tool_config=None):
|
|
130
|
+
self.tool_config = tool_config or {}
|
|
131
|
+
|
|
132
|
+
def run(self, arguments: Dict[str, Any]):
|
|
133
|
+
base = self.tool_config.get("settings", {}).get(
|
|
134
|
+
"base_url", "https://api.gbif.org/v1"
|
|
135
|
+
)
|
|
136
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
137
|
+
|
|
138
|
+
query = {}
|
|
139
|
+
for key in ("taxonKey", "country", "hasCoordinate", "limit", "offset"):
|
|
140
|
+
if key in arguments and arguments[key] is not None:
|
|
141
|
+
query[key] = arguments[key]
|
|
142
|
+
|
|
143
|
+
if "limit" not in query:
|
|
144
|
+
query["limit"] = 10
|
|
145
|
+
if "offset" not in query:
|
|
146
|
+
query["offset"] = 0
|
|
147
|
+
|
|
148
|
+
url = f"{base}/occurrence/search?{urlencode(query)}"
|
|
149
|
+
try:
|
|
150
|
+
data = _http_get(
|
|
151
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
152
|
+
)
|
|
153
|
+
return {
|
|
154
|
+
"source": "GBIF",
|
|
155
|
+
"endpoint": "occurrence/search",
|
|
156
|
+
"query": query,
|
|
157
|
+
"data": data,
|
|
158
|
+
"success": True,
|
|
159
|
+
}
|
|
160
|
+
except Exception as e:
|
|
161
|
+
return {
|
|
162
|
+
"error": str(e),
|
|
163
|
+
"source": "GBIF",
|
|
164
|
+
"endpoint": "occurrence/search",
|
|
165
|
+
"success": False,
|
|
166
|
+
}
|
tooluniverse/gdc_tool.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
from urllib.request import Request, urlopen
|
|
5
|
+
|
|
6
|
+
from tooluniverse.tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _http_get(
|
|
10
|
+
url: str,
|
|
11
|
+
headers: Dict[str, str] | None = None,
|
|
12
|
+
timeout: int = 30,
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
req = Request(url, headers=headers or {})
|
|
15
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
16
|
+
data = resp.read()
|
|
17
|
+
try:
|
|
18
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
19
|
+
except Exception:
|
|
20
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@register_tool(
|
|
24
|
+
"GDCCasesTool",
|
|
25
|
+
config={
|
|
26
|
+
"name": "GDC_search_cases",
|
|
27
|
+
"type": "GDCCasesTool",
|
|
28
|
+
"description": "Search NCI GDC cases via /cases",
|
|
29
|
+
"parameter": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"project_id": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "GDC project identifier (e.g., 'TCGA-BRCA')",
|
|
35
|
+
},
|
|
36
|
+
"size": {
|
|
37
|
+
"type": "integer",
|
|
38
|
+
"default": 10,
|
|
39
|
+
"minimum": 1,
|
|
40
|
+
"maximum": 100,
|
|
41
|
+
"description": "Number of results (1–100)",
|
|
42
|
+
},
|
|
43
|
+
"offset": {
|
|
44
|
+
"type": "integer",
|
|
45
|
+
"default": 0,
|
|
46
|
+
"minimum": 0,
|
|
47
|
+
"description": "Offset for pagination (0-based)",
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
class GDCCasesTool:
|
|
55
|
+
def __init__(self, tool_config=None):
|
|
56
|
+
self.tool_config = tool_config or {}
|
|
57
|
+
|
|
58
|
+
def run(self, arguments: Dict[str, Any]):
|
|
59
|
+
base = self.tool_config.get("settings", {}).get(
|
|
60
|
+
"base_url", "https://api.gdc.cancer.gov"
|
|
61
|
+
)
|
|
62
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
63
|
+
|
|
64
|
+
query: Dict[str, Any] = {}
|
|
65
|
+
if arguments.get("project_id"):
|
|
66
|
+
# Build filters JSON for project_id
|
|
67
|
+
filters = {
|
|
68
|
+
"op": "=",
|
|
69
|
+
"content": {
|
|
70
|
+
"field": "projects.project_id",
|
|
71
|
+
"value": [arguments["project_id"]],
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
query["filters"] = json.dumps(filters)
|
|
75
|
+
if arguments.get("size") is not None:
|
|
76
|
+
query["size"] = int(arguments["size"])
|
|
77
|
+
if arguments.get("offset") is not None:
|
|
78
|
+
query["from"] = int(arguments["offset"])
|
|
79
|
+
|
|
80
|
+
url = f"{base}/cases?{urlencode(query)}"
|
|
81
|
+
try:
|
|
82
|
+
data = _http_get(
|
|
83
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
84
|
+
)
|
|
85
|
+
return {
|
|
86
|
+
"source": "GDC",
|
|
87
|
+
"endpoint": "cases",
|
|
88
|
+
"query": query,
|
|
89
|
+
"data": data,
|
|
90
|
+
"success": True,
|
|
91
|
+
}
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return {
|
|
94
|
+
"error": str(e),
|
|
95
|
+
"source": "GDC",
|
|
96
|
+
"endpoint": "cases",
|
|
97
|
+
"success": False,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@register_tool(
|
|
102
|
+
"GDCFilesTool",
|
|
103
|
+
config={
|
|
104
|
+
"name": "GDC_list_files",
|
|
105
|
+
"type": "GDCFilesTool",
|
|
106
|
+
"description": "List NCI GDC files via /files with optional data_type filter",
|
|
107
|
+
"parameter": {
|
|
108
|
+
"type": "object",
|
|
109
|
+
"properties": {
|
|
110
|
+
"data_type": {
|
|
111
|
+
"type": "string",
|
|
112
|
+
"description": "Data type filter (e.g., 'Gene Expression Quantification')",
|
|
113
|
+
},
|
|
114
|
+
"size": {
|
|
115
|
+
"type": "integer",
|
|
116
|
+
"default": 10,
|
|
117
|
+
"minimum": 1,
|
|
118
|
+
"maximum": 100,
|
|
119
|
+
"description": "Number of results (1–100)",
|
|
120
|
+
},
|
|
121
|
+
"offset": {
|
|
122
|
+
"type": "integer",
|
|
123
|
+
"default": 0,
|
|
124
|
+
"minimum": 0,
|
|
125
|
+
"description": "Offset for pagination (0-based)",
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
"settings": {"base_url": "https://api.gdc.cancer.gov", "timeout": 30},
|
|
130
|
+
},
|
|
131
|
+
)
|
|
132
|
+
class GDCFilesTool:
|
|
133
|
+
def __init__(self, tool_config=None):
|
|
134
|
+
self.tool_config = tool_config or {}
|
|
135
|
+
|
|
136
|
+
def run(self, arguments: Dict[str, Any]):
|
|
137
|
+
base = self.tool_config.get("settings", {}).get(
|
|
138
|
+
"base_url", "https://api.gdc.cancer.gov"
|
|
139
|
+
)
|
|
140
|
+
timeout = int(self.tool_config.get("settings", {}).get("timeout", 30))
|
|
141
|
+
|
|
142
|
+
query: Dict[str, Any] = {}
|
|
143
|
+
if arguments.get("data_type"):
|
|
144
|
+
filters = {
|
|
145
|
+
"op": "=",
|
|
146
|
+
"content": {
|
|
147
|
+
"field": "files.data_type",
|
|
148
|
+
"value": [arguments["data_type"]],
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
query["filters"] = json.dumps(filters)
|
|
152
|
+
if arguments.get("size") is not None:
|
|
153
|
+
query["size"] = int(arguments["size"])
|
|
154
|
+
if arguments.get("offset") is not None:
|
|
155
|
+
query["from"] = int(arguments["offset"])
|
|
156
|
+
|
|
157
|
+
url = f"{base}/files?{urlencode(query)}"
|
|
158
|
+
try:
|
|
159
|
+
data = _http_get(
|
|
160
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
161
|
+
)
|
|
162
|
+
return {
|
|
163
|
+
"source": "GDC",
|
|
164
|
+
"endpoint": "files",
|
|
165
|
+
"query": query,
|
|
166
|
+
"data": data,
|
|
167
|
+
"success": True,
|
|
168
|
+
}
|
|
169
|
+
except Exception as e:
|
|
170
|
+
return {
|
|
171
|
+
"error": str(e),
|
|
172
|
+
"source": "GDC",
|
|
173
|
+
"endpoint": "files",
|
|
174
|
+
"success": False,
|
|
175
|
+
}
|
tooluniverse/generate_tools.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import shutil
|
|
6
6
|
import subprocess
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Dict, Any, Optional, List
|
|
8
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def json_type_to_python(json_type: str) -> str:
|
|
@@ -20,6 +20,55 @@ def json_type_to_python(json_type: str) -> str:
|
|
|
20
20
|
}.get(json_type, "Any")
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def validate_generated_code(
|
|
24
|
+
tool_name: str, tool_config: Dict[str, Any], generated_file: Path
|
|
25
|
+
) -> Tuple[bool, list]:
|
|
26
|
+
"""Validate that generated code matches the tool configuration.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
tool_name: Name of the tool
|
|
30
|
+
tool_config: Original tool configuration
|
|
31
|
+
generated_file: Path to the generated Python file
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Tuple of (is_valid, list_of_issues)
|
|
35
|
+
"""
|
|
36
|
+
issues = []
|
|
37
|
+
|
|
38
|
+
if not generated_file.exists():
|
|
39
|
+
return False, [f"Generated file does not exist: {generated_file}"]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
content = generated_file.read_text(encoding="utf-8")
|
|
43
|
+
|
|
44
|
+
# Check that function name matches tool name
|
|
45
|
+
if f"def {tool_name}(" not in content:
|
|
46
|
+
issues.append(f"Function definition not found for {tool_name}")
|
|
47
|
+
|
|
48
|
+
# Check that all required parameters are present
|
|
49
|
+
schema = tool_config.get("parameter", {}) or {}
|
|
50
|
+
properties = schema.get("properties", {}) or {}
|
|
51
|
+
required = schema.get("required", []) or []
|
|
52
|
+
|
|
53
|
+
for param_name in required:
|
|
54
|
+
# Check if parameter appears in function signature
|
|
55
|
+
if f"{param_name}:" not in content:
|
|
56
|
+
issues.append(
|
|
57
|
+
f"Required parameter '{param_name}' missing from function signature"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Check that all parameters in config appear in generated code
|
|
61
|
+
for param_name in properties.keys():
|
|
62
|
+
# Parameter should appear either in signature or in kwargs
|
|
63
|
+
if f'"{param_name}"' not in content and f"{param_name}:" not in content:
|
|
64
|
+
issues.append(f"Parameter '{param_name}' missing from generated code")
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
issues.append(f"Error reading generated file: {e}")
|
|
68
|
+
|
|
69
|
+
return len(issues) == 0, issues
|
|
70
|
+
|
|
71
|
+
|
|
23
72
|
def generate_tool_file(
|
|
24
73
|
tool_name: str,
|
|
25
74
|
tool_config: Dict[str, Any],
|
|
@@ -403,11 +452,18 @@ def _format_files(paths: List[str]) -> None:
|
|
|
403
452
|
pass
|
|
404
453
|
|
|
405
454
|
|
|
406
|
-
def main(
|
|
455
|
+
def main(
|
|
456
|
+
format_enabled: Optional[bool] = None,
|
|
457
|
+
force_regenerate: bool = False,
|
|
458
|
+
verbose: bool = False,
|
|
459
|
+
) -> None:
|
|
407
460
|
"""Generate tools and format the generated files if enabled.
|
|
408
461
|
|
|
409
|
-
|
|
410
|
-
|
|
462
|
+
Args:
|
|
463
|
+
format_enabled: If None, decide based on TOOLUNIVERSE_SKIP_FORMAT env var
|
|
464
|
+
(skip when set to "1").
|
|
465
|
+
force_regenerate: If True, regenerate all tools regardless of changes
|
|
466
|
+
verbose: If True, print detailed change information
|
|
411
467
|
"""
|
|
412
468
|
from tooluniverse import ToolUniverse
|
|
413
469
|
from .build_optimizer import cleanup_orphaned_files, get_changed_tools
|
|
@@ -428,23 +484,64 @@ def main(format_enabled: Optional[bool] = None) -> None:
|
|
|
428
484
|
|
|
429
485
|
# Check for changes
|
|
430
486
|
metadata_file = output / ".tool_metadata.json"
|
|
431
|
-
|
|
432
|
-
|
|
487
|
+
# Allow override via environment variable or function parameter
|
|
488
|
+
force_regenerate = force_regenerate or (
|
|
489
|
+
os.getenv("TOOLUNIVERSE_FORCE_REGENERATE") == "1"
|
|
490
|
+
)
|
|
491
|
+
verbose = verbose or (os.getenv("TOOLUNIVERSE_VERBOSE") == "1")
|
|
492
|
+
|
|
493
|
+
new_tools, changed_tools, unchanged_tools, change_details = get_changed_tools(
|
|
494
|
+
tu.all_tool_dict,
|
|
495
|
+
metadata_file,
|
|
496
|
+
force_regenerate=force_regenerate,
|
|
497
|
+
verbose=verbose,
|
|
433
498
|
)
|
|
434
499
|
|
|
435
500
|
generated_paths: List[str] = []
|
|
436
501
|
|
|
437
502
|
# Generate only changed tools if there are changes
|
|
438
503
|
if new_tools or changed_tools:
|
|
439
|
-
|
|
504
|
+
total_changed = len(new_tools + changed_tools)
|
|
505
|
+
print(f"🔄 Generating {total_changed} changed tools...")
|
|
506
|
+
if new_tools:
|
|
507
|
+
print(f" ✨ {len(new_tools)} new tools")
|
|
508
|
+
if changed_tools:
|
|
509
|
+
print(f" 🔄 {len(changed_tools)} modified tools")
|
|
510
|
+
if (
|
|
511
|
+
verbose and len(changed_tools) <= 20
|
|
512
|
+
): # Only show details for reasonable number
|
|
513
|
+
for tool_name in changed_tools[:20]:
|
|
514
|
+
print(f" - {tool_name}")
|
|
515
|
+
if len(changed_tools) > 20:
|
|
516
|
+
print(f" ... and {len(changed_tools) - 20} more")
|
|
517
|
+
|
|
518
|
+
validation_errors = []
|
|
440
519
|
for i, (tool_name, tool_config) in enumerate(tu.all_tool_dict.items(), 1):
|
|
441
520
|
if tool_name in new_tools or tool_name in changed_tools:
|
|
442
521
|
path = generate_tool_file(tool_name, tool_config, output)
|
|
443
522
|
generated_paths.append(str(path))
|
|
523
|
+
|
|
524
|
+
# Validate generated code matches configuration
|
|
525
|
+
is_valid, issues = validate_generated_code(tool_name, tool_config, path)
|
|
526
|
+
if not is_valid:
|
|
527
|
+
validation_errors.extend([(tool_name, issue) for issue in issues])
|
|
528
|
+
if verbose:
|
|
529
|
+
print(f" ⚠️ Validation issues for {tool_name}:")
|
|
530
|
+
for issue in issues:
|
|
531
|
+
print(f" - {issue}")
|
|
532
|
+
|
|
444
533
|
if i % 50 == 0:
|
|
445
|
-
print(f" Processed {i} tools...")
|
|
534
|
+
print(f" Processed {i}/{len(tu.all_tool_dict)} tools...")
|
|
535
|
+
|
|
536
|
+
if validation_errors:
|
|
537
|
+
print(f"\n⚠️ Found {len(validation_errors)} validation issue(s):")
|
|
538
|
+
for tool_name, issue in validation_errors[:10]: # Show first 10
|
|
539
|
+
print(f" - {tool_name}: {issue}")
|
|
540
|
+
if len(validation_errors) > 10:
|
|
541
|
+
print(f" ... and {len(validation_errors) - 10} more issues")
|
|
446
542
|
else:
|
|
447
543
|
print("✨ No changes detected, skipping tool generation")
|
|
544
|
+
print(f" 📊 Status: {len(unchanged_tools)} tools unchanged")
|
|
448
545
|
|
|
449
546
|
# Always regenerate __init__.py to include all tools
|
|
450
547
|
init_path = generate_init(list(tu.all_tool_dict.keys()), output)
|
|
@@ -477,5 +574,20 @@ if __name__ == "__main__":
|
|
|
477
574
|
action="store_true",
|
|
478
575
|
help="Do not run formatters on generated files",
|
|
479
576
|
)
|
|
577
|
+
parser.add_argument(
|
|
578
|
+
"--force",
|
|
579
|
+
action="store_true",
|
|
580
|
+
help="Force regeneration of all tools regardless of changes",
|
|
581
|
+
)
|
|
582
|
+
parser.add_argument(
|
|
583
|
+
"--verbose",
|
|
584
|
+
"-v",
|
|
585
|
+
action="store_true",
|
|
586
|
+
help="Print detailed change information",
|
|
587
|
+
)
|
|
480
588
|
args = parser.parse_args()
|
|
481
|
-
main(
|
|
589
|
+
main(
|
|
590
|
+
format_enabled=not args.no_format,
|
|
591
|
+
force_regenerate=args.force,
|
|
592
|
+
verbose=args.verbose,
|
|
593
|
+
)
|