tooluniverse 1.0.11.1__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/alphafold_tool.py +47 -7
- tooluniverse/base_tool.py +9 -1
- tooluniverse/build_optimizer.py +115 -22
- tooluniverse/data/alphafold_tools.json +7 -12
- tooluniverse/data/encode_tools.json +139 -0
- tooluniverse/data/gbif_tools.json +152 -0
- tooluniverse/data/gdc_tools.json +116 -0
- tooluniverse/data/gtex_tools.json +116 -0
- tooluniverse/data/icgc_tools.json +0 -0
- tooluniverse/data/mgnify_tools.json +121 -0
- tooluniverse/data/obis_tools.json +122 -0
- tooluniverse/data/optimizer_tools.json +275 -0
- tooluniverse/data/rnacentral_tools.json +99 -0
- tooluniverse/data/smolagent_tools.json +206 -0
- tooluniverse/data/uniprot_tools.json +13 -5
- tooluniverse/data/wikipathways_tools.json +106 -0
- tooluniverse/default_config.py +12 -0
- tooluniverse/encode_tool.py +245 -0
- tooluniverse/execute_function.py +185 -17
- tooluniverse/gbif_tool.py +166 -0
- tooluniverse/gdc_tool.py +175 -0
- tooluniverse/generate_tools.py +121 -9
- tooluniverse/gtex_tool.py +168 -0
- tooluniverse/mgnify_tool.py +181 -0
- tooluniverse/obis_tool.py +185 -0
- tooluniverse/pypi_package_inspector_tool.py +3 -2
- tooluniverse/python_executor_tool.py +43 -13
- tooluniverse/rnacentral_tool.py +124 -0
- tooluniverse/smcp.py +17 -25
- tooluniverse/smcp_server.py +1 -1
- tooluniverse/smolagent_tool.py +555 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/ENCODE_list_files.py +59 -0
- tooluniverse/tools/ENCODE_search_experiments.py +67 -0
- tooluniverse/tools/GBIF_search_occurrences.py +67 -0
- tooluniverse/tools/GBIF_search_species.py +55 -0
- tooluniverse/tools/GDC_list_files.py +55 -0
- tooluniverse/tools/GDC_search_cases.py +55 -0
- tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
- tooluniverse/tools/GTEx_query_eqtl.py +59 -0
- tooluniverse/tools/MGnify_list_analyses.py +52 -0
- tooluniverse/tools/MGnify_search_studies.py +55 -0
- tooluniverse/tools/OBIS_search_occurrences.py +59 -0
- tooluniverse/tools/OBIS_search_taxa.py +52 -0
- tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
- tooluniverse/tools/RNAcentral_search.py +52 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +4 -0
- tooluniverse/tools/UniProt_search.py +14 -6
- tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
- tooluniverse/tools/WikiPathways_search.py +52 -0
- tooluniverse/tools/__init__.py +43 -1
- tooluniverse/tools/advanced_literature_search_agent.py +46 -0
- tooluniverse/tools/alphafold_get_annotations.py +4 -10
- tooluniverse/tools/download_binary_file.py +3 -6
- tooluniverse/tools/open_deep_research_agent.py +46 -0
- tooluniverse/uniprot_tool.py +51 -4
- tooluniverse/wikipathways_tool.py +122 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +65 -24
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"type": "ComposeTool",
|
|
4
|
+
"name": "ToolDescriptionOptimizer",
|
|
5
|
+
"description": "Optimizes a tool's description and parameter descriptions by generating test cases, executing them, analyzing the results, and suggesting improved descriptions for both the tool and its arguments. Optionally saves a comprehensive optimization report to a file without overwriting the original.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"tool_config": {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"description": "The full configuration of the tool to optimize."
|
|
12
|
+
},
|
|
13
|
+
"save_to_file": {
|
|
14
|
+
"type": "boolean",
|
|
15
|
+
"description": "If true, save the optimized description to a file (do not overwrite the original).",
|
|
16
|
+
"default": false
|
|
17
|
+
},
|
|
18
|
+
"output_file": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Optional file path to save the optimized description. If not provided, use '<tool_name>_optimized_description.txt'."
|
|
21
|
+
},
|
|
22
|
+
"max_iterations": {
|
|
23
|
+
"type": "integer",
|
|
24
|
+
"description": "Maximum number of optimization rounds to perform.",
|
|
25
|
+
"default": 3
|
|
26
|
+
},
|
|
27
|
+
"satisfaction_threshold": {
|
|
28
|
+
"type": "number",
|
|
29
|
+
"description": "Quality score threshold (1-10) to consider optimization satisfactory.",
|
|
30
|
+
"default": 8
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"required": [
|
|
34
|
+
"tool_config",
|
|
35
|
+
"save_to_file",
|
|
36
|
+
"output_file",
|
|
37
|
+
"max_iterations",
|
|
38
|
+
"satisfaction_threshold"
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"auto_load_dependencies": true,
|
|
42
|
+
"fail_on_missing_tools": false,
|
|
43
|
+
"required_tools": [
|
|
44
|
+
"TestCaseGenerator",
|
|
45
|
+
"DescriptionAnalyzer",
|
|
46
|
+
"ArgumentDescriptionOptimizer",
|
|
47
|
+
"DescriptionQualityEvaluator"
|
|
48
|
+
],
|
|
49
|
+
"composition_file": "tool_description_optimizer.py",
|
|
50
|
+
"composition_function": "compose",
|
|
51
|
+
"return_schema": {
|
|
52
|
+
"type": "object",
|
|
53
|
+
"properties": {
|
|
54
|
+
"optimized_tool": {
|
|
55
|
+
"type": "object",
|
|
56
|
+
"description": "Tool with optimized descriptions"
|
|
57
|
+
},
|
|
58
|
+
"optimization_report": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"description": "Detailed optimization report",
|
|
61
|
+
"properties": {
|
|
62
|
+
"iterations_performed": {
|
|
63
|
+
"type": "integer"
|
|
64
|
+
},
|
|
65
|
+
"final_quality_score": {
|
|
66
|
+
"type": "number"
|
|
67
|
+
},
|
|
68
|
+
"improvements_made": {
|
|
69
|
+
"type": "array",
|
|
70
|
+
"items": {
|
|
71
|
+
"type": "string"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"saved_files": {
|
|
75
|
+
"type": "array",
|
|
76
|
+
"items": {
|
|
77
|
+
"type": "string"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"required": [
|
|
84
|
+
"optimized_tool",
|
|
85
|
+
"optimization_report"
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"type": "AgenticTool",
|
|
91
|
+
"name": "TestCaseGenerator",
|
|
92
|
+
"description": "Generates diverse and representative ToolUniverse tool call dictionaries for a given tool based on its parameter schema. Each tool call should be a JSON object with 'name' (the tool's name) and 'arguments' (a dict of input arguments), covering different parameter combinations, edge cases, and typical usage. Can generate targeted test cases based on previous optimization feedback.",
|
|
93
|
+
"prompt": "You are an expert software tester. Generate 3-5 diverse ToolUniverse tool call dictionaries for the given tool configuration. Each tool call must be a JSON object with 'name' (tool name) and 'arguments' (input parameters).\n\nFEEDBACK-DRIVEN GENERATION:\nIf tool_config contains '_optimization_feedback' and '_iteration', generate targeted test cases addressing the specific issues mentioned in the feedback. Focus on edge cases, parameter combinations, or usage patterns that need better coverage.\n\nSTANDARD GENERATION:\nCover typical usage, edge cases, and boundary conditions when possible.\n\nTool configuration: {tool_config}\n\nReturn a JSON object with key 'test_cases' containing an array of test case objects. Example format:\n{\"test_cases\": [{\"name\":\"tool_name_with_underscores\",\"arguments\":{\"param\":\"value\"}},{\"name\":\"tool_name_with_underscores\",\"arguments\":{\"param\":123}}]}",
|
|
94
|
+
"input_arguments": [
|
|
95
|
+
"tool_config"
|
|
96
|
+
],
|
|
97
|
+
"parameter": {
|
|
98
|
+
"type": "object",
|
|
99
|
+
"properties": {
|
|
100
|
+
"tool_config": {
|
|
101
|
+
"type": "object",
|
|
102
|
+
"description": "The full configuration of the tool to generate test cases for. May include '_optimization_feedback' and '_iteration' fields for feedback-driven test generation."
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
"required": [
|
|
106
|
+
"tool_config"
|
|
107
|
+
]
|
|
108
|
+
},
|
|
109
|
+
"configs": {
|
|
110
|
+
"api_type": "CHATGPT",
|
|
111
|
+
"model_id": "o4-mini-0416",
|
|
112
|
+
"temperature": 1.0,
|
|
113
|
+
"max_new_tokens": 4096,
|
|
114
|
+
"return_json": true,
|
|
115
|
+
"response_format": {
|
|
116
|
+
"type": "json_object"
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
"return_schema": {
|
|
120
|
+
"type": "object",
|
|
121
|
+
"properties": {
|
|
122
|
+
"test_cases": {
|
|
123
|
+
"type": "array",
|
|
124
|
+
"description": "Generated test cases for the tool",
|
|
125
|
+
"items": {
|
|
126
|
+
"type": "object",
|
|
127
|
+
"properties": {
|
|
128
|
+
"name": {
|
|
129
|
+
"type": "string",
|
|
130
|
+
"description": "Tool name"
|
|
131
|
+
},
|
|
132
|
+
"arguments": {
|
|
133
|
+
"type": "object",
|
|
134
|
+
"description": "Input arguments"
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
"required": [
|
|
138
|
+
"name",
|
|
139
|
+
"arguments"
|
|
140
|
+
]
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
},
|
|
144
|
+
"required": [
|
|
145
|
+
"test_cases"
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"type": "AgenticTool",
|
|
151
|
+
"name": "ArgumentDescriptionOptimizer",
|
|
152
|
+
"description": "Optimizes the descriptions of tool arguments/parameters based on test case results and actual usage patterns. Provides improved descriptions that are more accurate and user-friendly.",
|
|
153
|
+
"prompt": "You are an expert technical writer specializing in API documentation. Given a tool's parameter schema and test case results, analyze how each parameter is used and optimize their descriptions to be clear, accurate, and concise.\n\nCRITICAL CONSTRAINTS - PARAMETER DESCRIPTION SCOPE:\n1. If the parameter schema contains '_previous_feedback', use that feedback to address specific issues and improve the parameter descriptions accordingly.\n2. Parameter descriptions should be HIGHLY SPECIFIC to each individual parameter.\n3. NEVER repeat or reference the main tool functionality - assume the user already knows what the tool does.\n4. Focus EXCLUSIVELY on parameter-specific details: data types, formats, constraints, valid values, required formats, examples when helpful.\n5. Each description should answer: 'What should I put in this specific parameter?' not 'What does the tool do?'\n6. Avoid generic phrases like 'for this tool', 'used by the tool', 'enables functionality' unless they provide specific technical context.\n7. Be precise about technical requirements (e.g., 'JSON string', 'integer between 1-100', 'URL format', etc.)\n8. Every word must serve a purpose - eliminate filler words and redundant phrases.\n\nOriginal parameter schema:\n{parameter_schema}\n\nTest results showing parameter usage:\n{test_results}\n\nFor each parameter, suggest an improved description that:\n1. Is brief but informative (1-2 sentences max)\n2. Accurately reflects the parameter's specific purpose, data type, and constraints\n3. Uses clear, simple language with precise technical details\n4. Avoids redundancy with the parameter name\n5. Addresses any issues mentioned in previous feedback\n6. Contains only essential information about what value should be provided\n\nReturn a JSON object with keys: 'optimized_parameters' (object with parameter names as keys and optimized descriptions as values) and 'rationale' (explaining the key changes made).",
|
|
154
|
+
"input_arguments": [
|
|
155
|
+
"parameter_schema",
|
|
156
|
+
"test_results"
|
|
157
|
+
],
|
|
158
|
+
"parameter": {
|
|
159
|
+
"type": "object",
|
|
160
|
+
"properties": {
|
|
161
|
+
"parameter_schema": {
|
|
162
|
+
"type": "string",
|
|
163
|
+
"description": "JSON string of the original parameter schema with properties and descriptions."
|
|
164
|
+
},
|
|
165
|
+
"test_results": {
|
|
166
|
+
"type": "string",
|
|
167
|
+
"description": "A JSON string containing test case input/output pairs showing parameter usage."
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
"required": [
|
|
171
|
+
"parameter_schema",
|
|
172
|
+
"test_results"
|
|
173
|
+
]
|
|
174
|
+
},
|
|
175
|
+
"configs": {
|
|
176
|
+
"api_type": "CHATGPT",
|
|
177
|
+
"model_id": "o4-mini-0416",
|
|
178
|
+
"temperature": 1.0,
|
|
179
|
+
"max_new_tokens": 1536,
|
|
180
|
+
"return_json": true
|
|
181
|
+
},
|
|
182
|
+
"return_schema": {
|
|
183
|
+
"type": "object",
|
|
184
|
+
"properties": {
|
|
185
|
+
"optimized_parameters": {
|
|
186
|
+
"type": "object",
|
|
187
|
+
"description": "Optimized parameter descriptions",
|
|
188
|
+
"additionalProperties": {
|
|
189
|
+
"type": "string"
|
|
190
|
+
}
|
|
191
|
+
},
|
|
192
|
+
"rationale": {
|
|
193
|
+
"type": "string"
|
|
194
|
+
}
|
|
195
|
+
},
|
|
196
|
+
"required": [
|
|
197
|
+
"optimized_parameters"
|
|
198
|
+
]
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
"type": "AgenticTool",
|
|
203
|
+
"name": "DescriptionAnalyzer",
|
|
204
|
+
"description": "Analyzes a tool's original description and the results of multiple test cases, then suggests an improved description that is more accurate, comprehensive, and user-friendly. Optionally provides a rationale for the changes.",
|
|
205
|
+
"prompt": "You are an expert technical writer and tool evaluator. Given the original description of a tool and the results of several test cases (inputs and outputs), analyze whether the description accurately reflects the tool's behavior. Suggest an improved description that is more precise, comprehensive, and user-friendly. Also provide a brief rationale for your changes.\n\nCRITICAL CONSTRAINTS - TOOL DESCRIPTION SCOPE:\n1. If the original description contains 'Previous optimization feedback:', use that feedback to guide your improvements and address the specific issues mentioned.\n2. The tool description should focus EXCLUSIVELY on the OVERALL PURPOSE and HIGH-LEVEL FUNCTIONALITY of the tool.\n3. NEVER include parameter-specific details, formats, or requirements in the tool description.\n4. NEVER mention specific parameter names, data types, or input requirements - these belong in parameter descriptions.\n5. Focus ONLY on: what the tool does, its primary use cases, what kind of output it provides, and its general behavior patterns.\n6. Avoid generic filler phrases like 'enabling workflows', 'supporting analysis', 'facilitating research' unless they add specific meaning.\n7. Every sentence must convey essential information about the tool's core functionality.\n8. Think of the tool description as answering 'What does this tool do?' with concrete, actionable information.\n\nOriginal description:\n{original_description}\n\nTest results:\n{test_results}\n\nReturn a JSON object with keys: 'optimized_description' and 'rationale'.",
|
|
206
|
+
"input_arguments": [
|
|
207
|
+
"original_description",
|
|
208
|
+
"test_results"
|
|
209
|
+
],
|
|
210
|
+
"parameter": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {
|
|
213
|
+
"original_description": {
|
|
214
|
+
"type": "string",
|
|
215
|
+
"description": "The original description of the tool."
|
|
216
|
+
},
|
|
217
|
+
"test_results": {
|
|
218
|
+
"type": "string",
|
|
219
|
+
"description": "A JSON string containing a list of test case input/output pairs."
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"required": [
|
|
223
|
+
"original_description",
|
|
224
|
+
"test_results"
|
|
225
|
+
]
|
|
226
|
+
},
|
|
227
|
+
"configs": {
|
|
228
|
+
"api_type": "CHATGPT",
|
|
229
|
+
"model_id": "o4-mini-0416",
|
|
230
|
+
"temperature": 0.4,
|
|
231
|
+
"max_new_tokens": 1024,
|
|
232
|
+
"return_json": true
|
|
233
|
+
}
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
"type": "AgenticTool",
|
|
237
|
+
"name": "DescriptionQualityEvaluator",
|
|
238
|
+
"description": "Evaluates the quality of tool descriptions and parameter descriptions, providing a score and specific feedback for improvements.",
|
|
239
|
+
"prompt": "You are an expert evaluator of technical documentation. Given a tool description, parameter descriptions, and test results, evaluate the quality and provide a score from 1-10 along with specific feedback.\n\nTool description:\n{tool_description}\n\nParameter descriptions:\n{parameter_descriptions}\n\nTest results:\n{test_results}\n\nEvaluate based on these criteria:\n1. Clarity and understandability (1-10)\n2. Accuracy based on test results (1-10)\n3. Completeness of information (1-10)\n4. Conciseness and meaningfulness - every sentence must serve a purpose (1-10)\n5. User-friendliness (1-10)\n6. Redundancy avoidance - tool description and parameter descriptions must not duplicate information (1-10)\n\nCRITICAL EVALUATION FOCUS:\n- Tool description should ONLY describe overall functionality and purpose, NOT parameter details\n- Parameter descriptions should ONLY describe specific parameter requirements, NOT tool functionality\n- Check for meaningless filler phrases like 'enabling workflows', 'supporting analysis', 'facilitating integration' - DEDUCT POINTS for vague language\n- Check for overlap: Does the tool description mention parameter names, formats, or specific input requirements? (DEDUCT POINTS)\n- Check for overlap: Do parameter descriptions repeat what the tool does overall? (DEDUCT POINTS)\n- Every sentence must convey essential, actionable information\n\nReturn a JSON object with:\n- 'overall_score': Average of all criteria scores (1-10)\n- 'criteria_scores': Object with individual scores for each criterion\n- 'feedback': Specific suggestions for improvement, identifying meaningless phrases and redundancy issues\n- 'is_satisfactory': Boolean indicating if quality is acceptable (score >= 8)\n- 'meaningfulness_analysis': Detailed explanation of any filler language or redundant information found",
|
|
240
|
+
"input_arguments": [
|
|
241
|
+
"tool_description",
|
|
242
|
+
"parameter_descriptions",
|
|
243
|
+
"test_results"
|
|
244
|
+
],
|
|
245
|
+
"parameter": {
|
|
246
|
+
"type": "object",
|
|
247
|
+
"properties": {
|
|
248
|
+
"tool_description": {
|
|
249
|
+
"type": "string",
|
|
250
|
+
"description": "The tool description to evaluate."
|
|
251
|
+
},
|
|
252
|
+
"parameter_descriptions": {
|
|
253
|
+
"type": "string",
|
|
254
|
+
"description": "JSON string of parameter names and their descriptions."
|
|
255
|
+
},
|
|
256
|
+
"test_results": {
|
|
257
|
+
"type": "string",
|
|
258
|
+
"description": "JSON string containing test case results."
|
|
259
|
+
}
|
|
260
|
+
},
|
|
261
|
+
"required": [
|
|
262
|
+
"tool_description",
|
|
263
|
+
"parameter_descriptions",
|
|
264
|
+
"test_results"
|
|
265
|
+
]
|
|
266
|
+
},
|
|
267
|
+
"configs": {
|
|
268
|
+
"api_type": "CHATGPT",
|
|
269
|
+
"model_id": "o4-mini-0416",
|
|
270
|
+
"temperature": 1.0,
|
|
271
|
+
"max_new_tokens": 1024,
|
|
272
|
+
"return_json": true
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "RNAcentral_search",
|
|
4
|
+
"type": "RNAcentralSearchTool",
|
|
5
|
+
"description": "Search aggregated ncRNA records (miRNA, rRNA, lncRNA, etc.) across sources via RNAcentral. Use to find accessions, types, species, and descriptions for ncRNA analysis.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Keyword, accession, or sequence-based query (per RNAcentral API)."
|
|
12
|
+
},
|
|
13
|
+
"page_size": {
|
|
14
|
+
"type": "integer",
|
|
15
|
+
"default": 10,
|
|
16
|
+
"minimum": 1,
|
|
17
|
+
"maximum": 100,
|
|
18
|
+
"description": "Number of records per page (1–100)."
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"required": ["query"]
|
|
22
|
+
},
|
|
23
|
+
"fields": {
|
|
24
|
+
"endpoint": "https://rnacentral.org/api/v1/rna/",
|
|
25
|
+
"format": "json"
|
|
26
|
+
},
|
|
27
|
+
"return_schema": {
|
|
28
|
+
"type": "object",
|
|
29
|
+
"description": "RNAcentral rna list response",
|
|
30
|
+
"properties": {
|
|
31
|
+
"status": {"type": "string"},
|
|
32
|
+
"data": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"count": {"type": "integer"},
|
|
36
|
+
"results": {
|
|
37
|
+
"type": "array",
|
|
38
|
+
"items": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"rnacentral_id": {"type": "string"},
|
|
42
|
+
"description": {"type": "string"},
|
|
43
|
+
"rna_type": {"type": "string"},
|
|
44
|
+
"taxon": {"type": "object", "properties": {"scientific_name": {"type": "string"}, "taxid": {"type": "integer"}}}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"url": {"type": "string"}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"test_examples": [
|
|
54
|
+
{"query": "let-7", "page_size": 1},
|
|
55
|
+
{"query": "U6", "page_size": 1}
|
|
56
|
+
],
|
|
57
|
+
"label": ["RNAcentral", "ncRNA", "Search"],
|
|
58
|
+
"metadata": {
|
|
59
|
+
"tags": ["rna", "mirna", "lncrna", "annotation"],
|
|
60
|
+
"estimated_execution_time": "< 2 seconds"
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"name": "RNAcentral_get_by_accession",
|
|
65
|
+
"type": "RNAcentralGetTool",
|
|
66
|
+
"description": "Retrieve a single RNAcentral entry by accession for detailed annotations and source cross-references. Use for cross-database ID mapping and metadata.",
|
|
67
|
+
"parameter": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"properties": {
|
|
70
|
+
"accession": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"description": "RNAcentral accession (e.g., 'URS000075C808')."
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"required": ["accession"]
|
|
76
|
+
},
|
|
77
|
+
"fields": {
|
|
78
|
+
"endpoint": "https://rnacentral.org/api/v1/rna/{accession}",
|
|
79
|
+
"format": "json"
|
|
80
|
+
},
|
|
81
|
+
"return_schema": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"description": "RNAcentral accession response",
|
|
84
|
+
"properties": {
|
|
85
|
+
"status": {"type": "string"},
|
|
86
|
+
"data": {"type": "object"},
|
|
87
|
+
"url": {"type": "string"}
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"test_examples": [
|
|
91
|
+
{"accession": "URS000075C808"}
|
|
92
|
+
],
|
|
93
|
+
"label": ["RNAcentral", "ncRNA", "Record"],
|
|
94
|
+
"metadata": {
|
|
95
|
+
"tags": ["rna", "accession", "annotation"],
|
|
96
|
+
"estimated_execution_time": "< 2 seconds"
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
]
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"type": "SmolAgentTool",
|
|
4
|
+
"name": "advanced_literature_search_agent",
|
|
5
|
+
"description": "Advanced multi-agent literature search system. Required pipeline: (1) query_planner must produce a structured plan and immediately dispatch each sub-query to multi_database_searcher; (2) multi_database_searcher must call ToolUniverse literature tools (PubMed_search_articles, EuropePMC_search_articles, SemanticScholar_search_papers, openalex_literature_search, ArXiv_search_papers, BioRxiv_search_preprints, MedRxiv_search_preprints, Crossref_search_works, DBLP_search_publications, DOAJ_search_articles, CORE_search_papers, PMC_search_papers) and return raw results; (3) result_analyzer must deduplicate and score results; (4) literature_synthesizer must generate a structured markdown report (Executive Summary, Key Findings, Trends, Methods, Top Papers with rationale, Gaps, References). Do not skip any stage; do not answer directly without calling tools.",
|
|
6
|
+
"parameter": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"query": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "Research query or topic to search in academic literature. The agent will automatically determine search strategy, database selection, filters, and result limits based on the query content and research domain."
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"required": ["query"]
|
|
15
|
+
},
|
|
16
|
+
"settings": {
|
|
17
|
+
"agent_type": "ManagedAgent",
|
|
18
|
+
"available_tools": [],
|
|
19
|
+
"model": {
|
|
20
|
+
"provider": "AzureOpenAIModel",
|
|
21
|
+
"model_id": "gpt-5",
|
|
22
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
23
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
24
|
+
"api_version": "2024-10-21"
|
|
25
|
+
},
|
|
26
|
+
"agent_init_params": {
|
|
27
|
+
"max_steps": 50,
|
|
28
|
+
"stream_outputs": true,
|
|
29
|
+
"verbosity_level": 1,
|
|
30
|
+
"planning_interval": 2,
|
|
31
|
+
"max_execution_time": 600
|
|
32
|
+
},
|
|
33
|
+
"sub_agents": [
|
|
34
|
+
{
|
|
35
|
+
"name": "query_planner",
|
|
36
|
+
"description": "Strategic query planning agent that analyzes intent, decomposes into prioritized sub-queries, and generates optimized search terms and target databases. After outputting the plan, immediately invoke multi_database_searcher with the sub-queries (no summaries). Output: JSON plan and explicit call instruction for multi_database_searcher.",
|
|
37
|
+
"agent_type": "CodeAgent",
|
|
38
|
+
"available_tools": [],
|
|
39
|
+
"model": {
|
|
40
|
+
"provider": "AzureOpenAIModel",
|
|
41
|
+
"model_id": "gpt-5",
|
|
42
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
43
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
44
|
+
"api_version": "2024-10-21"
|
|
45
|
+
},
|
|
46
|
+
"agent_init_params": {
|
|
47
|
+
"add_base_tools": true,
|
|
48
|
+
"additional_authorized_imports": ["json", "datetime", "collections"],
|
|
49
|
+
"max_steps": 10,
|
|
50
|
+
"stream_outputs": true,
|
|
51
|
+
"verbosity_level": 1
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"name": "multi_database_searcher",
|
|
56
|
+
"description": "Multi-database parallel search specialist. Must call the following ToolUniverse tools for each sub-query (as applicable): PubMed_search_articles, EuropePMC_search_articles, SemanticScholar_search_papers, openalex_literature_search, ArXiv_search_papers, BioRxiv_search_preprints, MedRxiv_search_preprints, Crossref_search_works, DBLP_search_publications, DOAJ_search_articles, CORE_search_papers, PMC_search_papers. Adapt queries to each API and return structured JSON with raw items (title, abstract, authors, doi, year, venue, citations, url). Do not summarize.",
|
|
57
|
+
"agent_type": "CodeAgent",
|
|
58
|
+
"available_tools": [
|
|
59
|
+
"PubMed_search_articles",
|
|
60
|
+
"EuropePMC_search_articles",
|
|
61
|
+
"SemanticScholar_search_papers",
|
|
62
|
+
"openalex_literature_search",
|
|
63
|
+
"ArXiv_search_papers",
|
|
64
|
+
"BioRxiv_search_preprints",
|
|
65
|
+
"MedRxiv_search_preprints",
|
|
66
|
+
"Crossref_search_works",
|
|
67
|
+
"DBLP_search_publications",
|
|
68
|
+
"DOAJ_search_articles",
|
|
69
|
+
"CORE_search_papers",
|
|
70
|
+
"PMC_search_papers"
|
|
71
|
+
],
|
|
72
|
+
"model": {
|
|
73
|
+
"provider": "AzureOpenAIModel",
|
|
74
|
+
"model_id": "gpt-5",
|
|
75
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
76
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
77
|
+
"api_version": "2024-10-21"
|
|
78
|
+
},
|
|
79
|
+
"agent_init_params": {
|
|
80
|
+
"add_base_tools": true,
|
|
81
|
+
"additional_authorized_imports": ["json", "concurrent.futures", "datetime", "urllib.parse", "re"],
|
|
82
|
+
"max_steps": 25,
|
|
83
|
+
"stream_outputs": true,
|
|
84
|
+
"verbosity_level": 1,
|
|
85
|
+
"max_tool_threads": 5
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"name": "result_analyzer",
|
|
90
|
+
"description": "Intelligent result analysis agent. Input: multi_database_searcher raw results. Steps: deduplicate (DOI, normalized title similarity, author matching), compute composite relevance score (keyword match, normalized citations, venue impact, recency, cross-source frequency), filter low-quality (<0.3), rank and cluster by themes, identify high-impact and recent breakthroughs. Output: ranked, deduplicated list with scores, themes, and quality flags. Then instruct literature_synthesizer to produce the final report.",
|
|
91
|
+
"agent_type": "CodeAgent",
|
|
92
|
+
"available_tools": [],
|
|
93
|
+
"model": {
|
|
94
|
+
"provider": "AzureOpenAIModel",
|
|
95
|
+
"model_id": "gpt-5",
|
|
96
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
97
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
98
|
+
"api_version": "2024-10-21"
|
|
99
|
+
},
|
|
100
|
+
"agent_init_params": {
|
|
101
|
+
"add_base_tools": true,
|
|
102
|
+
"additional_authorized_imports": ["json", "collections", "re", "difflib", "datetime", "math"],
|
|
103
|
+
"max_steps": 15,
|
|
104
|
+
"stream_outputs": true,
|
|
105
|
+
"verbosity_level": 1
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"name": "literature_synthesizer",
|
|
110
|
+
"description": "Literature synthesis and report generation specialist. Input: result_analyzer ranked list. Produce a structured markdown report with sections: Executive Summary, Key Findings, Research Trends, Methodology Overview, Top Papers with rationale (10–15), Research Gaps, References (with DOIs/URLs). Use only analyzed items; do not invent citations.",
|
|
111
|
+
"agent_type": "CodeAgent",
|
|
112
|
+
"available_tools": [],
|
|
113
|
+
"model": {
|
|
114
|
+
"provider": "AzureOpenAIModel",
|
|
115
|
+
"model_id": "gpt-5",
|
|
116
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
117
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
118
|
+
"api_version": "2024-10-21"
|
|
119
|
+
},
|
|
120
|
+
"agent_init_params": {
|
|
121
|
+
"add_base_tools": true,
|
|
122
|
+
"additional_authorized_imports": ["json", "collections", "datetime", "statistics"],
|
|
123
|
+
"max_steps": 20,
|
|
124
|
+
"stream_outputs": true,
|
|
125
|
+
"verbosity_level": 1
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"type": "SmolAgentTool",
|
|
133
|
+
"name": "open_deep_research_agent",
|
|
134
|
+
"description": "Research manager agent that decomposes the user task, delegates focused subtasks to domain sub‑agents (web researcher, synthesizer), enforces evidence use, requires numeric outputs with units, and returns a concise final answer with citations. It should: (1) draft a brief plan, (2) ask web_researcher to gather authoritative facts (URLs + extracted numbers), (3) validate consistency across sources, (4) instruct synthesizer to compute/compose the final result, and (5) output only the final, unit‑aware answer plus one short rationale line.",
|
|
135
|
+
"parameter": {
|
|
136
|
+
"type": "object",
|
|
137
|
+
"properties": {
|
|
138
|
+
"task": {"type": "string", "description": "Research query/task to execute"}
|
|
139
|
+
},
|
|
140
|
+
"required": ["task"]
|
|
141
|
+
},
|
|
142
|
+
"settings": {
|
|
143
|
+
"agent_type": "ManagedAgent",
|
|
144
|
+
"available_tools": [],
|
|
145
|
+
"model": {
|
|
146
|
+
"provider": "AzureOpenAIModel",
|
|
147
|
+
"model_id": "gpt-5",
|
|
148
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
149
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
150
|
+
"api_version": "2024-10-21"
|
|
151
|
+
},
|
|
152
|
+
"agent_init_params": {
|
|
153
|
+
"max_steps": 30,
|
|
154
|
+
"stream_outputs": true,
|
|
155
|
+
"verbosity_level": 1,
|
|
156
|
+
"planning_interval": 1
|
|
157
|
+
},
|
|
158
|
+
"sub_agents": [
|
|
159
|
+
{
|
|
160
|
+
"name": "web_researcher",
|
|
161
|
+
"description": "Web research specialist that (a) formulates robust search queries, (b) selects authoritative sources (official sites, Wikipedia with corroboration, reputable databases), (c) visits pages and extracts exact figures (units, context), (d) records 1–2 key quotes/snippets and the canonical URL, and (e) returns a short, source‑linked note ready for synthesis.",
|
|
162
|
+
"agent_type": "CodeAgent",
|
|
163
|
+
"available_tools": [
|
|
164
|
+
{"type": "smolagents", "class": "WebSearchTool", "import_path": "smolagents.default_tools"},
|
|
165
|
+
{"type": "smolagents", "class": "VisitWebpageTool", "import_path": "smolagents.default_tools"}
|
|
166
|
+
],
|
|
167
|
+
"model": {
|
|
168
|
+
"provider": "AzureOpenAIModel",
|
|
169
|
+
"model_id": "gpt-5",
|
|
170
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
171
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
172
|
+
"api_version": "2024-10-21"
|
|
173
|
+
},
|
|
174
|
+
"agent_init_params": {
|
|
175
|
+
"add_base_tools": true,
|
|
176
|
+
"additional_authorized_imports": ["requests", "bs4", "lxml"],
|
|
177
|
+
"max_steps": 12,
|
|
178
|
+
"stream_outputs": true,
|
|
179
|
+
"verbosity_level": 1,
|
|
180
|
+
"planning_interval": 1
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"name": "synthesizer",
|
|
185
|
+
"description": "Synthesis specialist that reads prior research notes, performs any light calculation (unit conversion, division, rounding), resolves minor conflicts by favoring higher‑authority sources, and produces a single, precise answer with units and 1–2 citations. Keep prose minimal; prioritize the final numeric result and rationale.",
|
|
186
|
+
"agent_type": "ToolCallingAgent",
|
|
187
|
+
"available_tools": [
|
|
188
|
+
{"type": "smolagents", "class": "WebSearchTool", "import_path": "smolagents.default_tools"}
|
|
189
|
+
],
|
|
190
|
+
"model": {
|
|
191
|
+
"provider": "AzureOpenAIModel",
|
|
192
|
+
"model_id": "gpt-5",
|
|
193
|
+
"api_key": "env:AZURE_OPENAI_API_KEY",
|
|
194
|
+
"azure_endpoint": "https://azure-ai.hms.edu",
|
|
195
|
+
"api_version": "2024-10-21"
|
|
196
|
+
},
|
|
197
|
+
"agent_init_params": {
|
|
198
|
+
"max_steps": 8,
|
|
199
|
+
"stream_outputs": false,
|
|
200
|
+
"planning_interval": 1
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
]
|
|
@@ -210,28 +210,36 @@
|
|
|
210
210
|
},
|
|
211
211
|
{
|
|
212
212
|
"name": "UniProt_search",
|
|
213
|
-
"description": "Search UniProtKB database
|
|
213
|
+
"description": "Search UniProtKB database with flexible query syntax. Returns protein entries with accession numbers and metadata. Query syntax supports: field searches (gene:TP53, organism_id:9606, reviewed:true), ranges (length:[100 TO 500], mass:[20000 TO 50000]), wildcards (gene:MEIOB*), boolean operators (AND/OR/NOT), and parentheses for grouping. Examples: 'gene:TP53 AND organism_id:9606', 'length:[400 TO 500] AND reviewed:true', 'tissue:brain NOT organism_id:10090'.",
|
|
214
214
|
"parameter": {
|
|
215
215
|
"type": "object",
|
|
216
216
|
"properties": {
|
|
217
217
|
"query": {
|
|
218
218
|
"type": "string",
|
|
219
|
-
"description": "Search query
|
|
219
|
+
"description": "Search query using UniProt syntax. Simple: 'MEIOB', 'insulin'. Field searches: 'gene:TP53', 'protein_name:insulin', 'organism_id:9606', 'reviewed:true'. Ranges: 'length:[100 TO 500]', 'mass:[20000 TO 50000]'. Wildcards: 'gene:MEIOB*'. Boolean: 'gene:TP53 AND organism_id:9606', 'tissue:brain OR tissue:liver', 'reviewed:true NOT fragment:true'. Use parentheses for grouping: '(organism_id:9606 OR organism_id:10090) AND gene:TP53'. Note: 'organism:' auto-converts to 'organism_id:'."
|
|
220
220
|
},
|
|
221
221
|
"organism": {
|
|
222
222
|
"type": "string",
|
|
223
|
-
"description": "Optional organism filter.
|
|
223
|
+
"description": "Optional organism filter. Use common names ('human', 'mouse', 'rat', 'yeast') or taxonomy ID ('9606'). Automatically combined with query using AND. Will not duplicate if organism is already in query."
|
|
224
224
|
},
|
|
225
225
|
"limit": {
|
|
226
226
|
"type": "integer",
|
|
227
|
-
"description": "Maximum number of results to return (default: 25, max: 500)"
|
|
227
|
+
"description": "Maximum number of results to return (default: 25, max: 500). Accepts string or integer."
|
|
228
|
+
},
|
|
229
|
+
"min_length": {
|
|
230
|
+
"type": "integer",
|
|
231
|
+
"description": "Minimum sequence length. Auto-converts to 'length:[min TO *]' range query."
|
|
232
|
+
},
|
|
233
|
+
"max_length": {
|
|
234
|
+
"type": "integer",
|
|
235
|
+
"description": "Maximum sequence length. Auto-converts to 'length:[* TO max]' range query."
|
|
228
236
|
},
|
|
229
237
|
"fields": {
|
|
230
238
|
"type": "array",
|
|
231
239
|
"items": {
|
|
232
240
|
"type": "string"
|
|
233
241
|
},
|
|
234
|
-
"description": "
|
|
242
|
+
"description": "List of field names to return (e.g., ['accession','gene_primary','length','organism_name']). When specified, returns raw API response with requested fields. Common fields: accession, id, gene_names, gene_primary, protein_name, organism_name, organism_id, length, mass, sequence, reviewed, cc_function. See UniProt API docs for full list. Default (no fields): returns formatted response with accession, id, protein_name, gene_names, organism, length."
|
|
235
243
|
}
|
|
236
244
|
},
|
|
237
245
|
"required": ["query"]
|