tooluniverse 1.0.11.2__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +46 -8
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +17 -44
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +58 -17
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.2.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"type": "SmolAgentTool",
|
|
4
|
+
"name": "advanced_literature_search_agent",
|
|
5
|
+
"description": "Advanced multi-agent literature search system. Required pipeline: (1) query_planner must produce a structured plan and immediately dispatch each sub-query to multi_database_searcher; (2) multi_database_searcher must call ToolUniverse literature tools (PubMed_search_articles, EuropePMC_search_articles, SemanticScholar_search_papers, openalex_literature_search, ArXiv_search_papers, BioRxiv_search_preprints, MedRxiv_search_preprints, Crossref_search_works, DBLP_search_publications, DOAJ_search_articles, CORE_search_papers, PMC_search_papers) and return raw results; (3) result_analyzer must deduplicate and score results; (4) literature_synthesizer must generate a structured markdown report (Executive Summary, Key Findings, Trends, Methods, Top Papers with rationale, Gaps, References). Do not skip any stage; do not answer directly without calling tools.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Research query or topic to search in academic literature. The agent will automatically determine search strategy, database selection, filters, and result limits based on the query content and research domain."
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"required": ["query"]
|
|
15
|
+
},
|
|
16
|
+
"settings": {
|
|
17
|
+
"agent_type": "ManagedAgent",
|
|
18
|
+
"available_tools": [],
|
|
19
|
+
"model": {
|
|
20
|
+
"provider": "AzureOpenAIModel",
|
|
21
|
+
"model_id": "gpt-5",
|
|
22
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
23
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
24
|
+
"api_version": "2024-10-21"
|
|
25
|
+
},
|
|
26
|
+
"agent_init_params": {
|
|
27
|
+
"max_steps": 50,
|
|
28
|
+
"stream_outputs": true,
|
|
29
|
+
"verbosity_level": 1,
|
|
30
|
+
"planning_interval": 2,
|
|
31
|
+
"max_execution_time": 600
|
|
32
|
+
},
|
|
33
|
+
"sub_agents": [
|
|
34
|
+
{
|
|
35
|
+
"name": "query_planner",
|
|
36
|
+
"description": "Strategic query planning agent that analyzes intent, decomposes into prioritized sub-queries, and generates optimized search terms and target databases. After outputting the plan, immediately invoke multi_database_searcher with the sub-queries (no summaries). Output: JSON plan and explicit call instruction for multi_database_searcher.",
|
|
37
|
+
"agent_type": "CodeAgent",
|
|
38
|
+
"available_tools": [],
|
|
39
|
+
"model": {
|
|
40
|
+
"provider": "AzureOpenAIModel",
|
|
41
|
+
"model_id": "gpt-5",
|
|
42
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
43
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
44
|
+
"api_version": "2024-10-21"
|
|
45
|
+
},
|
|
46
|
+
"agent_init_params": {
|
|
47
|
+
"add_base_tools": true,
|
|
48
|
+
"additional_authorized_imports": ["json", "datetime", "collections"],
|
|
49
|
+
"max_steps": 10,
|
|
50
|
+
"stream_outputs": true,
|
|
51
|
+
"verbosity_level": 1
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"name": "multi_database_searcher",
|
|
56
|
+
"description": "Multi-database parallel search specialist. Must call the following ToolUniverse tools for each sub-query (as applicable): PubMed_search_articles, EuropePMC_search_articles, SemanticScholar_search_papers, openalex_literature_search, ArXiv_search_papers, BioRxiv_search_preprints, MedRxiv_search_preprints, Crossref_search_works, DBLP_search_publications, DOAJ_search_articles, CORE_search_papers, PMC_search_papers. Adapt queries to each API and return structured JSON with raw items (title, abstract, authors, doi, year, venue, citations, url). Do not summarize.",
|
|
57
|
+
"agent_type": "CodeAgent",
|
|
58
|
+
"available_tools": [
|
|
59
|
+
"PubMed_search_articles",
|
|
60
|
+
"EuropePMC_search_articles",
|
|
61
|
+
"SemanticScholar_search_papers",
|
|
62
|
+
"openalex_literature_search",
|
|
63
|
+
"ArXiv_search_papers",
|
|
64
|
+
"BioRxiv_search_preprints",
|
|
65
|
+
"MedRxiv_search_preprints",
|
|
66
|
+
"Crossref_search_works",
|
|
67
|
+
"DBLP_search_publications",
|
|
68
|
+
"DOAJ_search_articles",
|
|
69
|
+
"CORE_search_papers",
|
|
70
|
+
"PMC_search_papers"
|
|
71
|
+
],
|
|
72
|
+
"model": {
|
|
73
|
+
"provider": "AzureOpenAIModel",
|
|
74
|
+
"model_id": "gpt-5",
|
|
75
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
76
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
77
|
+
"api_version": "2024-10-21"
|
|
78
|
+
},
|
|
79
|
+
"agent_init_params": {
|
|
80
|
+
"add_base_tools": true,
|
|
81
|
+
"additional_authorized_imports": ["json", "concurrent.futures", "datetime", "urllib.parse", "re"],
|
|
82
|
+
"max_steps": 25,
|
|
83
|
+
"stream_outputs": true,
|
|
84
|
+
"verbosity_level": 1,
|
|
85
|
+
"max_tool_threads": 5
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"name": "result_analyzer",
|
|
90
|
+
"description": "Intelligent result analysis agent. Input: multi_database_searcher raw results. Steps: deduplicate (DOI, normalized title similarity, author matching), compute composite relevance score (keyword match, normalized citations, venue impact, recency, cross-source frequency), filter low-quality (<0.3), rank and cluster by themes, identify high-impact and recent breakthroughs. Output: ranked, deduplicated list with scores, themes, and quality flags. Then instruct literature_synthesizer to produce the final report.",
|
|
91
|
+
"agent_type": "CodeAgent",
|
|
92
|
+
"available_tools": [],
|
|
93
|
+
"model": {
|
|
94
|
+
"provider": "AzureOpenAIModel",
|
|
95
|
+
"model_id": "gpt-5",
|
|
96
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
97
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
98
|
+
"api_version": "2024-10-21"
|
|
99
|
+
},
|
|
100
|
+
"agent_init_params": {
|
|
101
|
+
"add_base_tools": true,
|
|
102
|
+
"additional_authorized_imports": ["json", "collections", "re", "difflib", "datetime", "math"],
|
|
103
|
+
"max_steps": 15,
|
|
104
|
+
"stream_outputs": true,
|
|
105
|
+
"verbosity_level": 1
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"name": "literature_synthesizer",
|
|
110
|
+
"description": "Literature synthesis and report generation specialist. Input: result_analyzer ranked list. Produce a structured markdown report with sections: Executive Summary, Key Findings, Research Trends, Methodology Overview, Top Papers with rationale (10–15), Research Gaps, References (with DOIs/URLs). Use only analyzed items; do not invent citations.",
|
|
111
|
+
"agent_type": "CodeAgent",
|
|
112
|
+
"available_tools": [],
|
|
113
|
+
"model": {
|
|
114
|
+
"provider": "AzureOpenAIModel",
|
|
115
|
+
"model_id": "gpt-5",
|
|
116
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
117
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
118
|
+
"api_version": "2024-10-21"
|
|
119
|
+
},
|
|
120
|
+
"agent_init_params": {
|
|
121
|
+
"add_base_tools": true,
|
|
122
|
+
"additional_authorized_imports": ["json", "collections", "datetime", "statistics"],
|
|
123
|
+
"max_steps": 20,
|
|
124
|
+
"stream_outputs": true,
|
|
125
|
+
"verbosity_level": 1
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"type": "SmolAgentTool",
|
|
133
|
+
"name": "open_deep_research_agent",
|
|
134
|
+
"description": "Research manager agent that decomposes the user task, delegates focused subtasks to domain sub‑agents (web researcher, synthesizer), enforces evidence use, requires numeric outputs with units, and returns a concise final answer with citations. It should: (1) draft a brief plan, (2) ask web_researcher to gather authoritative facts (URLs + extracted numbers), (3) validate consistency across sources, (4) instruct synthesizer to compute/compose the final result, and (5) output only the final, unit‑aware answer plus one short rationale line.",
|
|
135
|
+
"parameter": {
|
|
136
|
+
"type": "object",
|
|
137
|
+
"properties": {
|
|
138
|
+
"task": {"type": "string", "description": "Research query/task to execute"}
|
|
139
|
+
},
|
|
140
|
+
"required": ["task"]
|
|
141
|
+
},
|
|
142
|
+
"settings": {
|
|
143
|
+
"agent_type": "ManagedAgent",
|
|
144
|
+
"available_tools": [],
|
|
145
|
+
"model": {
|
|
146
|
+
"provider": "AzureOpenAIModel",
|
|
147
|
+
"model_id": "gpt-5",
|
|
148
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
149
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
150
|
+
"api_version": "2024-10-21"
|
|
151
|
+
},
|
|
152
|
+
"agent_init_params": {
|
|
153
|
+
"max_steps": 30,
|
|
154
|
+
"stream_outputs": true,
|
|
155
|
+
"verbosity_level": 1,
|
|
156
|
+
"planning_interval": 1
|
|
157
|
+
},
|
|
158
|
+
"sub_agents": [
|
|
159
|
+
{
|
|
160
|
+
"name": "web_researcher",
|
|
161
|
+
"description": "Web research specialist that (a) formulates robust search queries, (b) selects authoritative sources (official sites, Wikipedia with corroboration, reputable databases), (c) visits pages and extracts exact figures (units, context), (d) records 1–2 key quotes/snippets and the canonical URL, and (e) returns a short, source‑linked note ready for synthesis.",
|
|
162
|
+
"agent_type": "CodeAgent",
|
|
163
|
+
"available_tools": [
|
|
164
|
+
{"type": "smolagents", "class": "WebSearchTool", "import_path": "smolagents.default_tools"},
|
|
165
|
+
{"type": "smolagents", "class": "VisitWebpageTool", "import_path": "smolagents.default_tools"}
|
|
166
|
+
],
|
|
167
|
+
"model": {
|
|
168
|
+
"provider": "AzureOpenAIModel",
|
|
169
|
+
"model_id": "gpt-5",
|
|
170
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
171
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
172
|
+
"api_version": "2024-10-21"
|
|
173
|
+
},
|
|
174
|
+
"agent_init_params": {
|
|
175
|
+
"add_base_tools": true,
|
|
176
|
+
"additional_authorized_imports": ["requests", "bs4", "lxml"],
|
|
177
|
+
"max_steps": 12,
|
|
178
|
+
"stream_outputs": true,
|
|
179
|
+
"verbosity_level": 1,
|
|
180
|
+
"planning_interval": 1
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"name": "synthesizer",
|
|
185
|
+
"description": "Synthesis specialist that reads prior research notes, performs any light calculation (unit conversion, division, rounding), resolves minor conflicts by favoring higher‑authority sources, and produces a single, precise answer with units and 1–2 citations. Keep prose minimal; prioritize the final numeric result and rationale.",
|
|
186
|
+
"agent_type": "ToolCallingAgent",
|
|
187
|
+
"available_tools": [
|
|
188
|
+
{"type": "smolagents", "class": "WebSearchTool", "import_path": "smolagents.default_tools"}
|
|
189
|
+
],
|
|
190
|
+
"model": {
|
|
191
|
+
"provider": "AzureOpenAIModel",
|
|
192
|
+
"model_id": "gpt-5",
|
|
193
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
194
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
195
|
+
"api_version": "2024-10-21"
|
|
196
|
+
},
|
|
197
|
+
"agent_init_params": {
|
|
198
|
+
"max_steps": 8,
|
|
199
|
+
"stream_outputs": false,
|
|
200
|
+
"planning_interval": 1
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "WikiPathways_search",
|
|
4
|
+
"type": "WikiPathwaysSearchTool",
|
|
5
|
+
"description": "Text search across community-curated pathways (disease, metabolic, signaling). Use to discover relevant pathways for a topic/gene set and obtain WPIDs for retrieval/visualization.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Free-text query (keywords, gene symbols, processes), e.g., 'p53', 'glycolysis'."
|
|
12
|
+
},
|
|
13
|
+
"organism": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "Organism filter (scientific name), e.g., 'Homo sapiens'."
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"required": ["query"]
|
|
19
|
+
},
|
|
20
|
+
"fields": {
|
|
21
|
+
"endpoint": "https://webservice.wikipathways.org/findPathwaysByText",
|
|
22
|
+
"format": "json"
|
|
23
|
+
},
|
|
24
|
+
"return_schema": {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"description": "WikiPathways search response",
|
|
27
|
+
"properties": {
|
|
28
|
+
"status": {"type": "string"},
|
|
29
|
+
"data": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"result": {
|
|
33
|
+
"type": "array",
|
|
34
|
+
"items": {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"properties": {
|
|
37
|
+
"id": {"type": "string", "description": "WPID"},
|
|
38
|
+
"name": {"type": "string"},
|
|
39
|
+
"species": {"type": "string"}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"url": {"type": "string"}
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
"test_examples": [
|
|
49
|
+
{"query": "p53"},
|
|
50
|
+
{"query": "metabolism", "organism": "Homo sapiens"}
|
|
51
|
+
],
|
|
52
|
+
"label": ["WikiPathways", "Pathway", "Search"],
|
|
53
|
+
"metadata": {
|
|
54
|
+
"tags": ["pathway", "enrichment", "visualization"],
|
|
55
|
+
"estimated_execution_time": "< 2 seconds"
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"name": "WikiPathways_get_pathway",
|
|
60
|
+
"type": "WikiPathwaysGetTool",
|
|
61
|
+
"description": "Fetch pathway content by WPID (JSON/GPML). Use to programmatically access pathway nodes/edges/metadata for enrichment reporting or network visualization.",
|
|
62
|
+
"parameter": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"properties": {
|
|
65
|
+
"wpid": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"description": "WikiPathways identifier (e.g., 'WP254')."
|
|
68
|
+
},
|
|
69
|
+
"format": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"enum": ["json", "gpml"],
|
|
72
|
+
"default": "json",
|
|
73
|
+
"description": "Response format: 'json' for structured, 'gpml' for GPML XML."
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"required": ["wpid"]
|
|
77
|
+
},
|
|
78
|
+
"fields": {
|
|
79
|
+
"endpoint": "https://webservice.wikipathways.org/getPathway",
|
|
80
|
+
"format": "json"
|
|
81
|
+
},
|
|
82
|
+
"return_schema": {
|
|
83
|
+
"type": "object",
|
|
84
|
+
"description": "WikiPathways getPathway response",
|
|
85
|
+
"properties": {
|
|
86
|
+
"status": {"type": "string"},
|
|
87
|
+
"data": {
|
|
88
|
+
"type": "object",
|
|
89
|
+
"properties": {
|
|
90
|
+
"pathway": {"type": "object"},
|
|
91
|
+
"metadata": {"type": "object"}
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
"url": {"type": "string"}
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"test_examples": [
|
|
98
|
+
{"wpid": "WP254", "format": "json"}
|
|
99
|
+
],
|
|
100
|
+
"label": ["WikiPathways", "Pathway", "Content"],
|
|
101
|
+
"metadata": {
|
|
102
|
+
"tags": ["pathway", "content", "gpml"],
|
|
103
|
+
"estimated_execution_time": "< 2 seconds"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
]
|
tooluniverse/default_config.py
CHANGED
|
@@ -51,6 +51,8 @@ default_tool_files = {
|
|
|
51
51
|
"fatcat": os.path.join(current_dir, "data", "fatcat_tools.json"),
|
|
52
52
|
"wikidata_sparql": os.path.join(current_dir, "data", "wikidata_sparql_tools.json"),
|
|
53
53
|
"agents": os.path.join(current_dir, "data", "agentic_tools.json"),
|
|
54
|
+
# Smolagents tool wrapper configs
|
|
55
|
+
"smolagents": os.path.join(current_dir, "data", "smolagent_tools.json"),
|
|
54
56
|
"tool_discovery_agents": os.path.join(
|
|
55
57
|
current_dir, "data", "tool_discovery_agents.json"
|
|
56
58
|
),
|
|
@@ -181,8 +183,18 @@ default_tool_files = {
|
|
|
181
183
|
"geo": os.path.join(current_dir, "data", "geo_tools.json"),
|
|
182
184
|
"dbsnp": os.path.join(current_dir, "data", "dbsnp_tools.json"),
|
|
183
185
|
"gnomad": os.path.join(current_dir, "data", "gnomad_tools.json"),
|
|
186
|
+
# Newly added database tools
|
|
187
|
+
"gbif": os.path.join(current_dir, "data", "gbif_tools.json"),
|
|
188
|
+
"obis": os.path.join(current_dir, "data", "obis_tools.json"),
|
|
189
|
+
"wikipathways": os.path.join(current_dir, "data", "wikipathways_tools.json"),
|
|
190
|
+
"rnacentral": os.path.join(current_dir, "data", "rnacentral_tools.json"),
|
|
191
|
+
"encode": os.path.join(current_dir, "data", "encode_tools.json"),
|
|
192
|
+
"gtex": os.path.join(current_dir, "data", "gtex_tools.json"),
|
|
193
|
+
"mgnify": os.path.join(current_dir, "data", "mgnify_tools.json"),
|
|
194
|
+
"gdc": os.path.join(current_dir, "data", "gdc_tools.json"),
|
|
184
195
|
# Ontology tools
|
|
185
196
|
"ols": os.path.join(current_dir, "data", "ols_tools.json"),
|
|
197
|
+
"optimizer": os.path.join(current_dir, "data", "optimizer_tools.json"),
|
|
186
198
|
}
|
|
187
199
|
|
|
188
200
|
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from urllib.error import HTTPError
|
|
4
|
+
from urllib.parse import urlencode
|
|
5
|
+
from urllib.request import Request, urlopen
|
|
6
|
+
|
|
7
|
+
from tooluniverse.tool_registry import register_tool
|
|
8
|
+
from tooluniverse.exceptions import (
|
|
9
|
+
ToolError,
|
|
10
|
+
ToolAuthError,
|
|
11
|
+
ToolRateLimitError,
|
|
12
|
+
ToolUnavailableError,
|
|
13
|
+
ToolValidationError,
|
|
14
|
+
ToolConfigError,
|
|
15
|
+
ToolDependencyError,
|
|
16
|
+
ToolServerError,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _http_get(
|
|
21
|
+
url: str,
|
|
22
|
+
headers: Dict[str, str] | None = None,
|
|
23
|
+
timeout: int = 30,
|
|
24
|
+
) -> Dict[str, Any]:
|
|
25
|
+
req = Request(url, headers=headers or {})
|
|
26
|
+
try:
|
|
27
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
28
|
+
data = resp.read()
|
|
29
|
+
try:
|
|
30
|
+
return json.loads(data.decode("utf-8", errors="ignore"))
|
|
31
|
+
except Exception:
|
|
32
|
+
return {"raw": data.decode("utf-8", errors="ignore")}
|
|
33
|
+
except HTTPError as e:
|
|
34
|
+
# ENCODE API may return 404 even with valid JSON data
|
|
35
|
+
# Read the response body from the error
|
|
36
|
+
try:
|
|
37
|
+
data = e.read()
|
|
38
|
+
parsed = json.loads(data.decode("utf-8", errors="ignore"))
|
|
39
|
+
# If we got valid JSON, return it even though status was 404
|
|
40
|
+
return parsed
|
|
41
|
+
except Exception:
|
|
42
|
+
# If we can't parse, re-raise the original error
|
|
43
|
+
raise
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@register_tool(
|
|
47
|
+
"ENCODESearchTool",
|
|
48
|
+
config={
|
|
49
|
+
"name": "ENCODE_search_experiments",
|
|
50
|
+
"type": "ENCODESearchTool",
|
|
51
|
+
"description": "Search ENCODE experiments",
|
|
52
|
+
"parameter": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"properties": {
|
|
55
|
+
"assay_title": {"type": "string"},
|
|
56
|
+
"target": {"type": "string"},
|
|
57
|
+
"organism": {"type": "string"},
|
|
58
|
+
"status": {"type": "string", "default": "released"},
|
|
59
|
+
"limit": {"type": "integer", "default": 10},
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
"settings": {"base_url": "https://www.encodeproject.org", "timeout": 30},
|
|
63
|
+
},
|
|
64
|
+
)
|
|
65
|
+
class ENCODESearchTool:
|
|
66
|
+
def __init__(self, tool_config=None):
|
|
67
|
+
self.tool_config = tool_config or {}
|
|
68
|
+
|
|
69
|
+
def handle_error(self, exception: Exception) -> ToolError:
|
|
70
|
+
"""Classify exceptions into structured ToolError."""
|
|
71
|
+
error_str = str(exception).lower()
|
|
72
|
+
if any(
|
|
73
|
+
kw in error_str
|
|
74
|
+
for kw in ["auth", "unauthorized", "401", "403", "api key", "token"]
|
|
75
|
+
):
|
|
76
|
+
return ToolAuthError(f"Authentication failed: {exception}")
|
|
77
|
+
elif any(
|
|
78
|
+
kw in error_str for kw in ["rate limit", "429", "quota", "limit exceeded"]
|
|
79
|
+
):
|
|
80
|
+
return ToolRateLimitError(f"Rate limit exceeded: {exception}")
|
|
81
|
+
elif any(
|
|
82
|
+
kw in error_str
|
|
83
|
+
for kw in [
|
|
84
|
+
"unavailable",
|
|
85
|
+
"timeout",
|
|
86
|
+
"connection",
|
|
87
|
+
"network",
|
|
88
|
+
"not found",
|
|
89
|
+
"404",
|
|
90
|
+
]
|
|
91
|
+
):
|
|
92
|
+
return ToolUnavailableError(f"Tool unavailable: {exception}")
|
|
93
|
+
elif any(
|
|
94
|
+
kw in error_str for kw in ["validation", "invalid", "schema", "parameter"]
|
|
95
|
+
):
|
|
96
|
+
return ToolValidationError(f"Validation error: {exception}")
|
|
97
|
+
elif any(kw in error_str for kw in ["config", "configuration", "setup"]):
|
|
98
|
+
return ToolConfigError(f"Configuration error: {exception}")
|
|
99
|
+
elif any(
|
|
100
|
+
kw in error_str for kw in ["import", "module", "dependency", "package"]
|
|
101
|
+
):
|
|
102
|
+
return ToolDependencyError(f"Dependency error: {exception}")
|
|
103
|
+
else:
|
|
104
|
+
return ToolServerError(f"Unexpected error: {exception}")
|
|
105
|
+
|
|
106
|
+
def run(self, arguments: Dict[str, Any]):
|
|
107
|
+
# Read from fields.endpoint or settings.base_url
|
|
108
|
+
fields = self.tool_config.get("fields", {})
|
|
109
|
+
settings = self.tool_config.get("settings", {})
|
|
110
|
+
endpoint = fields.get(
|
|
111
|
+
"endpoint",
|
|
112
|
+
settings.get("base_url", "https://www.encodeproject.org/search/"),
|
|
113
|
+
)
|
|
114
|
+
# Extract base URL if endpoint includes /search/
|
|
115
|
+
if endpoint.endswith("/search/"):
|
|
116
|
+
base = endpoint[:-7] # Remove "/search/"
|
|
117
|
+
else:
|
|
118
|
+
base = endpoint.rstrip("/")
|
|
119
|
+
timeout = int(settings.get("timeout", 30))
|
|
120
|
+
|
|
121
|
+
query: Dict[str, Any] = {"type": "Experiment", "format": "json"}
|
|
122
|
+
for key in ("assay_title", "target", "organism", "status", "limit"):
|
|
123
|
+
if arguments.get(key) is not None:
|
|
124
|
+
query[key] = arguments[key]
|
|
125
|
+
|
|
126
|
+
# ENCODE API expects specific parameter format
|
|
127
|
+
# Build URL with proper query string
|
|
128
|
+
url = f"{base}/search/?{urlencode(query, doseq=True)}"
|
|
129
|
+
try:
|
|
130
|
+
data = _http_get(
|
|
131
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
132
|
+
)
|
|
133
|
+
return {
|
|
134
|
+
"source": "ENCODE",
|
|
135
|
+
"endpoint": "search",
|
|
136
|
+
"query": query,
|
|
137
|
+
"data": data,
|
|
138
|
+
"success": True,
|
|
139
|
+
}
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return {
|
|
142
|
+
"error": str(e),
|
|
143
|
+
"source": "ENCODE",
|
|
144
|
+
"endpoint": "search",
|
|
145
|
+
"success": False,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@register_tool(
|
|
150
|
+
"ENCODEFilesTool",
|
|
151
|
+
config={
|
|
152
|
+
"name": "ENCODE_list_files",
|
|
153
|
+
"type": "ENCODEFilesTool",
|
|
154
|
+
"description": "List ENCODE files",
|
|
155
|
+
"parameter": {
|
|
156
|
+
"type": "object",
|
|
157
|
+
"properties": {
|
|
158
|
+
"file_type": {"type": "string"},
|
|
159
|
+
"assay_title": {"type": "string"},
|
|
160
|
+
"limit": {"type": "integer", "default": 10},
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
"settings": {"base_url": "https://www.encodeproject.org", "timeout": 30},
|
|
164
|
+
},
|
|
165
|
+
)
|
|
166
|
+
class ENCODEFilesTool:
|
|
167
|
+
def __init__(self, tool_config=None):
|
|
168
|
+
self.tool_config = tool_config or {}
|
|
169
|
+
|
|
170
|
+
def handle_error(self, exception: Exception) -> ToolError:
|
|
171
|
+
"""Classify exceptions into structured ToolError."""
|
|
172
|
+
error_str = str(exception).lower()
|
|
173
|
+
if any(
|
|
174
|
+
kw in error_str
|
|
175
|
+
for kw in ["auth", "unauthorized", "401", "403", "api key", "token"]
|
|
176
|
+
):
|
|
177
|
+
return ToolAuthError(f"Authentication failed: {exception}")
|
|
178
|
+
elif any(
|
|
179
|
+
kw in error_str for kw in ["rate limit", "429", "quota", "limit exceeded"]
|
|
180
|
+
):
|
|
181
|
+
return ToolRateLimitError(f"Rate limit exceeded: {exception}")
|
|
182
|
+
elif any(
|
|
183
|
+
kw in error_str
|
|
184
|
+
for kw in [
|
|
185
|
+
"unavailable",
|
|
186
|
+
"timeout",
|
|
187
|
+
"connection",
|
|
188
|
+
"network",
|
|
189
|
+
"not found",
|
|
190
|
+
"404",
|
|
191
|
+
]
|
|
192
|
+
):
|
|
193
|
+
return ToolUnavailableError(f"Tool unavailable: {exception}")
|
|
194
|
+
elif any(
|
|
195
|
+
kw in error_str for kw in ["validation", "invalid", "schema", "parameter"]
|
|
196
|
+
):
|
|
197
|
+
return ToolValidationError(f"Validation error: {exception}")
|
|
198
|
+
elif any(kw in error_str for kw in ["config", "configuration", "setup"]):
|
|
199
|
+
return ToolConfigError(f"Configuration error: {exception}")
|
|
200
|
+
elif any(
|
|
201
|
+
kw in error_str for kw in ["import", "module", "dependency", "package"]
|
|
202
|
+
):
|
|
203
|
+
return ToolDependencyError(f"Dependency error: {exception}")
|
|
204
|
+
else:
|
|
205
|
+
return ToolServerError(f"Unexpected error: {exception}")
|
|
206
|
+
|
|
207
|
+
def run(self, arguments: Dict[str, Any]):
|
|
208
|
+
# Read from fields.endpoint or settings.base_url
|
|
209
|
+
fields = self.tool_config.get("fields", {})
|
|
210
|
+
settings = self.tool_config.get("settings", {})
|
|
211
|
+
endpoint = fields.get(
|
|
212
|
+
"endpoint",
|
|
213
|
+
settings.get("base_url", "https://www.encodeproject.org/search/"),
|
|
214
|
+
)
|
|
215
|
+
# Extract base URL if endpoint includes /search/
|
|
216
|
+
if endpoint.endswith("/search/"):
|
|
217
|
+
base = endpoint[:-7] # Remove "/search/"
|
|
218
|
+
else:
|
|
219
|
+
base = endpoint.rstrip("/")
|
|
220
|
+
timeout = int(settings.get("timeout", 30))
|
|
221
|
+
|
|
222
|
+
query: Dict[str, Any] = {"type": "File", "format": "json"}
|
|
223
|
+
for key in ("file_type", "assay_title", "limit"):
|
|
224
|
+
if arguments.get(key):
|
|
225
|
+
query[key] = arguments[key]
|
|
226
|
+
|
|
227
|
+
url = f"{base}/search/?{urlencode(query)}"
|
|
228
|
+
try:
|
|
229
|
+
data = _http_get(
|
|
230
|
+
url, headers={"Accept": "application/json"}, timeout=timeout
|
|
231
|
+
)
|
|
232
|
+
return {
|
|
233
|
+
"source": "ENCODE",
|
|
234
|
+
"endpoint": "search",
|
|
235
|
+
"query": query,
|
|
236
|
+
"data": data,
|
|
237
|
+
"success": True,
|
|
238
|
+
}
|
|
239
|
+
except Exception as e:
|
|
240
|
+
return {
|
|
241
|
+
"error": str(e),
|
|
242
|
+
"source": "ENCODE",
|
|
243
|
+
"endpoint": "search",
|
|
244
|
+
"success": False,
|
|
245
|
+
}
|