tooluniverse 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (186) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/adverse_event_tools.json +108 -0
  23. tooluniverse/data/agentic_tools.json +1156 -0
  24. tooluniverse/data/alphafold_tools.json +87 -0
  25. tooluniverse/data/boltz_tools.json +9 -0
  26. tooluniverse/data/chembl_tools.json +16 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +1 -1
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +1 -1
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  66. tooluniverse/data/packages/visualization_tools.json +399 -0
  67. tooluniverse/data/pubchem_tools.json +215 -0
  68. tooluniverse/data/pubtator_tools.json +68 -0
  69. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  70. tooluniverse/data/reactome_tools.json +19 -0
  71. tooluniverse/data/semantic_scholar_tools.json +26 -0
  72. tooluniverse/data/special_tools.json +2 -25
  73. tooluniverse/data/tool_composition_tools.json +88 -0
  74. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  75. tooluniverse/data/txagent_client_tools.json +9 -0
  76. tooluniverse/data/uniprot_tools.json +211 -0
  77. tooluniverse/data/url_fetch_tools.json +94 -0
  78. tooluniverse/data/uspto_downloader_tools.json +9 -0
  79. tooluniverse/data/uspto_tools.json +811 -0
  80. tooluniverse/data/xml_tools.json +3275 -0
  81. tooluniverse/dataset_tool.py +296 -0
  82. tooluniverse/default_config.py +165 -0
  83. tooluniverse/efo_tool.py +42 -0
  84. tooluniverse/embedding_database.py +630 -0
  85. tooluniverse/embedding_sync.py +396 -0
  86. tooluniverse/enrichr_tool.py +266 -0
  87. tooluniverse/europe_pmc_tool.py +52 -0
  88. tooluniverse/execute_function.py +1775 -95
  89. tooluniverse/extended_hooks.py +444 -0
  90. tooluniverse/gene_ontology_tool.py +194 -0
  91. tooluniverse/graphql_tool.py +158 -36
  92. tooluniverse/gwas_tool.py +358 -0
  93. tooluniverse/hpa_tool.py +1645 -0
  94. tooluniverse/humanbase_tool.py +389 -0
  95. tooluniverse/logging_config.py +254 -0
  96. tooluniverse/mcp_client_tool.py +764 -0
  97. tooluniverse/mcp_integration.py +413 -0
  98. tooluniverse/mcp_tool_registry.py +925 -0
  99. tooluniverse/medlineplus_tool.py +337 -0
  100. tooluniverse/openalex_tool.py +228 -0
  101. tooluniverse/openfda_adv_tool.py +283 -0
  102. tooluniverse/openfda_tool.py +393 -160
  103. tooluniverse/output_hook.py +1122 -0
  104. tooluniverse/package_tool.py +195 -0
  105. tooluniverse/pubchem_tool.py +158 -0
  106. tooluniverse/pubtator_tool.py +168 -0
  107. tooluniverse/rcsb_pdb_tool.py +38 -0
  108. tooluniverse/reactome_tool.py +108 -0
  109. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  110. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  111. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  112. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  113. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  114. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  115. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  116. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  117. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  118. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  119. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  120. tooluniverse/remote_tool.py +99 -0
  121. tooluniverse/restful_tool.py +53 -30
  122. tooluniverse/scripts/generate_tool_graph.py +408 -0
  123. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  124. tooluniverse/semantic_scholar_tool.py +62 -0
  125. tooluniverse/smcp.py +2452 -0
  126. tooluniverse/smcp_server.py +975 -0
  127. tooluniverse/test/mcp_server_test.py +0 -0
  128. tooluniverse/test/test_admetai_tool.py +370 -0
  129. tooluniverse/test/test_agentic_tool.py +129 -0
  130. tooluniverse/test/test_alphafold_tool.py +71 -0
  131. tooluniverse/test/test_chem_tool.py +37 -0
  132. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  133. tooluniverse/test/test_compose_tool.py +448 -0
  134. tooluniverse/test/test_dailymed.py +69 -0
  135. tooluniverse/test/test_dataset_tool.py +200 -0
  136. tooluniverse/test/test_disease_target_score.py +56 -0
  137. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  138. tooluniverse/test/test_efo.py +31 -0
  139. tooluniverse/test/test_enrichr_tool.py +21 -0
  140. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  141. tooluniverse/test/test_fda_adv.py +95 -0
  142. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  143. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  144. tooluniverse/test/test_gwas_tool.py +139 -0
  145. tooluniverse/test/test_hpa.py +625 -0
  146. tooluniverse/test/test_humanbase_tool.py +20 -0
  147. tooluniverse/test/test_idmap_tools.py +61 -0
  148. tooluniverse/test/test_mcp_server.py +211 -0
  149. tooluniverse/test/test_mcp_tool.py +247 -0
  150. tooluniverse/test/test_medlineplus.py +220 -0
  151. tooluniverse/test/test_openalex_tool.py +32 -0
  152. tooluniverse/test/test_opentargets.py +28 -0
  153. tooluniverse/test/test_pubchem_tool.py +116 -0
  154. tooluniverse/test/test_pubtator_tool.py +37 -0
  155. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  156. tooluniverse/test/test_reactome.py +54 -0
  157. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  158. tooluniverse/test/test_software_tools.py +147 -0
  159. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  160. tooluniverse/test/test_tool_finder.py +26 -0
  161. tooluniverse/test/test_tool_finder_llm.py +252 -0
  162. tooluniverse/test/test_tools_find.py +195 -0
  163. tooluniverse/test/test_uniprot_tools.py +74 -0
  164. tooluniverse/test/test_uspto_tool.py +72 -0
  165. tooluniverse/test/test_xml_tool.py +113 -0
  166. tooluniverse/tool_finder_embedding.py +267 -0
  167. tooluniverse/tool_finder_keyword.py +693 -0
  168. tooluniverse/tool_finder_llm.py +699 -0
  169. tooluniverse/tool_graph_web_ui.py +955 -0
  170. tooluniverse/tool_registry.py +416 -0
  171. tooluniverse/uniprot_tool.py +155 -0
  172. tooluniverse/url_tool.py +253 -0
  173. tooluniverse/uspto_tool.py +240 -0
  174. tooluniverse/utils.py +369 -41
  175. tooluniverse/xml_tool.py +369 -0
  176. tooluniverse-1.0.1.dist-info/METADATA +387 -0
  177. tooluniverse-1.0.1.dist-info/RECORD +182 -0
  178. tooluniverse-1.0.1.dist-info/entry_points.txt +9 -0
  179. tooluniverse/generate_mcp_tools.py +0 -113
  180. tooluniverse/mcp_server.py +0 -3340
  181. tooluniverse-0.2.0.dist-info/METADATA +0 -139
  182. tooluniverse-0.2.0.dist-info/RECORD +0 -21
  183. tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
  184. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/WHEEL +0 -0
  185. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/licenses/LICENSE +0 -0
  186. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1156 @@
1
+ [
2
+ {
3
+ "type": "AgenticTool",
4
+ "name": "ScientificTextSummarizer",
5
+ "description": "Summarizes biomedical research texts, abstracts, or papers with specified length and focus areas. Uses AI to extract key findings, methodology, and conclusions from complex biomedical literature.",
6
+ "prompt": "You are a biomedical expert. Please summarize the following biomedical text in {summary_length} words, focusing on {focus_area}:\n\n{text}\n\nProvide a clear, concise summary that captures the most important information.",
7
+ "input_arguments": [
8
+ "text",
9
+ "summary_length",
10
+ "focus_area"
11
+ ],
12
+ "parameter": {
13
+ "type": "object",
14
+ "properties": {
15
+ "text": {
16
+ "type": "string",
17
+ "description": "The biomedical text, abstract, or paper content to be summarized.",
18
+ "required": true
19
+ },
20
+ "summary_length": {
21
+ "type": "string",
22
+ "description": "Desired length of summary (e.g., '50', '100', '200 words').",
23
+ "required": true
24
+ },
25
+ "focus_area": {
26
+ "type": "string",
27
+ "description": "What to focus on in the summary (e.g., 'methodology', 'results', 'clinical implications', 'drug interactions').",
28
+ "required": true
29
+ }
30
+ },
31
+ "required": [
32
+ "text",
33
+ "summary_length",
34
+ "focus_area"
35
+ ]
36
+ },
37
+ "configs": {
38
+ "api_type": "CHATGPT",
39
+ "model_id": "o4-mini-0416",
40
+ "temperature": 1.0,
41
+ "max_new_tokens": 4096,
42
+ "return_json": false
43
+ }
44
+ },
45
+ {
46
+ "type": "AgenticTool",
47
+ "name": "CodeQualityAnalyzer",
48
+ "description": "Analyzes code quality from multiple dimensions including algorithmic correctness, functional implementation capability, performance characteristics, and best practices. Provides detailed feedback and improvement suggestions.",
49
+ "prompt": "You are an expert software engineer and code quality analyst. Please analyze the following code implementation and provide comprehensive quality assessment.\n\n## CODE TO ANALYZE\nTool Name: {tool_name}\nTool Description: {tool_description}\nTool Parameters: {tool_parameters}\nImplementation Code: {implementation_code}\nTest Cases: {test_cases}\nTest Execution Results: {test_execution_results}\n\n## ANALYSIS REQUIREMENTS\nPlease provide a comprehensive analysis covering the following dimensions:\n\n### 1. ALGORITHMIC CORRECTNESS (0-10)\n- Mathematical accuracy and logical correctness\n- Algorithm efficiency and time/space complexity\n- Edge case handling and boundary conditions\n- Error propagation and numerical stability\n- Correctness of domain-specific calculations\n\n### 2. FUNCTIONAL IMPLEMENTATION CAPABILITY (0-10)\n- Completeness of required functionality\n- Parameter validation and input handling\n- Return value accuracy and format consistency\n- Integration with external libraries/APIs\n- Feature completeness vs. requirements\n\n### 3. PERFORMANCE CHARACTERISTICS (0-10)\n- Time complexity analysis\n- Space complexity analysis\n- Memory usage optimization\n- Computational efficiency\n- Scalability considerations\n\n### 4. CODE QUALITY AND STRUCTURE (0-10)\n- Code readability and maintainability\n- Function and variable naming\n- Code organization and modularity\n- Documentation quality\n- Adherence to coding standards\n\n### 5. ERROR HANDLING AND ROBUSTNESS (0-10)\n- Exception handling coverage\n- Input validation robustness\n- Error message clarity and usefulness\n- Graceful degradation strategies\n- Recovery mechanisms\n\n### 6. TESTING AND VALIDATION (0-10)\n- Test coverage completeness\n- Test case quality and relevance\n- Edge case testing\n- Performance testing\n- Integration testing considerations\n- **IMPORTANT**: When test_execution_results are provided, use them to validate actual code behavior and adjust scoring accordingly\n\n### 7. SECURITY AND SAFETY (0-10)\n- Input sanitization and validation\n- Resource usage limits\n- Access control considerations\n- Data privacy protection\n- Security best practices\n\n### 8. MAINTAINABILITY AND EXTENSIBILITY (0-10)\n- Code modularity and reusability\n- Configuration flexibility\n- Future enhancement readiness\n- Dependency management\n- Technical debt assessment\n\n## TEST EXECUTION ANALYSIS\nWhen test_execution_results are provided:\n- Analyze actual test outcomes vs. expected results\n- Identify discrepancies between code behavior and test expectations\n- Use real execution data to validate code correctness\n- Adjust quality scores based on actual performance\n- Provide specific feedback on test failures and their implications\n\n## OUTPUT FORMAT\nProvide your analysis in the following JSON format:\n\n```json\n{\n \"overall_score\": <0-10>,\n \"scores\": {\n \"algorithmic_correctness\": <0-10>,\n \"functional_capability\": <0-10>,\n \"performance\": <0-10>,\n \"code_quality\": <0-10>,\n \"error_handling\": <0-10>,\n \"testing\": <0-10>,\n \"security\": <0-10>,\n \"maintainability\": <0-10>\n },\n \"feedback\": {\n \"strengths\": [\"list of code strengths\"],\n \"weaknesses\": [\"list of specific weaknesses\"],\n \"critical_issues\": [\"list of critical issues that must be fixed\"],\n \"improvement_opportunities\": [\"list of areas for improvement\"]\n },\n \"algorithm_analysis\": {\n \"complexity\": \"O(n) analysis\",\n \"correctness_verification\": \"mathematical verification details\",\n \"edge_cases\": \"identified edge cases\",\n \"numerical_stability\": \"numerical computation stability assessment\"\n },\n \"functional_verification\": {\n \"requirements_coverage\": \"percentage of requirements covered\",\n \"missing_features\": [\"list of missing features\"],\n \"integration_points\": [\"external dependencies and integration points\"],\n \"api_consistency\": \"API design consistency assessment\"\n },\n \"test_execution_analysis\": {\n \"test_results_summary\": \"summary of test execution outcomes\",\n \"pass_rate\": \"percentage of tests passed\",\n \"failed_tests\": [\"list of failed tests with reasons\"],\n \"actual_vs_expected\": \"analysis of actual vs expected behavior\"\n },\n \"recommendations\": [\n {\n \"priority\": \"high|medium|low\",\n \"category\": \"algorithm|functionality|performance|quality|security\",\n \"description\": \"specific improvement description\",\n \"action\": \"concrete action to take\",\n \"expected_impact\": \"expected improvement impact\"\n }\n ]\n}\n```\n\n## ANALYSIS GUIDELINES\n- Be thorough and objective in your assessment\n- Provide specific examples from the code when possible\n- Focus on actionable feedback and concrete improvements\n- Consider both immediate fixes and long-term improvements\n- Evaluate code from both technical and business perspectives\n- Provide evidence-based scoring with clear justification\n- **When test_execution_results are available, prioritize actual behavior over theoretical analysis**",
50
+ "input_arguments": [
51
+ "tool_name",
52
+ "tool_description",
53
+ "tool_parameters",
54
+ "implementation_code",
55
+ "test_cases",
56
+ "test_execution_results"
57
+ ],
58
+ "parameter": {
59
+ "type": "object",
60
+ "properties": {
61
+ "tool_name": {
62
+ "type": "string",
63
+ "description": "Name of the tool being analyzed",
64
+ "required": true
65
+ },
66
+ "tool_description": {
67
+ "type": "string",
68
+ "description": "Description of what the tool is supposed to do",
69
+ "required": true
70
+ },
71
+ "tool_parameters": {
72
+ "type": "string",
73
+ "description": "JSON string of tool parameters and their types",
74
+ "required": true
75
+ },
76
+ "implementation_code": {
77
+ "type": "string",
78
+ "description": "The actual implementation code to analyze",
79
+ "required": true
80
+ },
81
+ "test_cases": {
82
+ "type": "string",
83
+ "description": "JSON string of test cases for the tool",
84
+ "required": true
85
+ },
86
+ "test_execution_results": {
87
+ "type": "string",
88
+ "description": "JSON string of test execution results including pass/fail status and actual outputs",
89
+ "required": false
90
+ }
91
+ },
92
+ "required": [
93
+ "tool_name",
94
+ "tool_description",
95
+ "tool_parameters",
96
+ "implementation_code",
97
+ "test_cases"
98
+ ]
99
+ },
100
+ "configs": {
101
+ "api_type": "CHATGPT",
102
+ "model_id": "o4-mini-0416",
103
+ "temperature": 0.3,
104
+ "max_new_tokens": 8192,
105
+ "return_json": true
106
+ }
107
+ },
108
+ {
109
+ "type": "AgenticTool",
110
+ "name": "MedicalLiteratureReviewer",
111
+ "description": "Conducts systematic reviews of medical literature on specific topics. Synthesizes findings from multiple studies and provides evidence-based conclusions with structured analysis and quality assessment.",
112
+ "prompt": "You are an expert medical researcher conducting a comprehensive systematic literature review.\n\n## REVIEW PARAMETERS\nResearch Topic: {research_topic}\nFocus Area: {focus_area}\nStudy Types to Prioritize: {study_types}\nEvidence Quality Level: {quality_level}\nReview Scope: {review_scope}\n\n## LITERATURE TO REVIEW\n{literature_content}\n\n## SYSTEMATIC REVIEW INSTRUCTIONS\nConduct a thorough evidence-based systematic review following PRISMA guidelines. Provide a structured analysis with the following sections:\n\n### 1. EXECUTIVE SUMMARY\n- Brief overview of research question and key findings\n- Overall strength of evidence and confidence level\n\n### 2. STUDY CHARACTERISTICS\n- Number and types of studies reviewed\n- Study populations and sample sizes\n- Geographic distribution and time periods\n- Quality assessment of included studies\n\n### 3. SYNTHESIS OF EVIDENCE\n- Key findings across studies with effect sizes where available\n- Consistency/inconsistency of results\n- Dose-response relationships (if applicable)\n- Subgroup analyses and population-specific findings\n\n### 4. QUALITY OF EVIDENCE ASSESSMENT\n- Risk of bias assessment\n- Heterogeneity between studies\n- Publication bias considerations\n- GRADE evidence assessment (High/Moderate/Low/Very Low)\n\n### 5. CLINICAL IMPLICATIONS\n- Practical implications for clinical practice\n- Patient safety considerations\n- Cost-effectiveness insights (if available)\n- Applicability to different patient populations\n\n### 6. RESEARCH GAPS AND LIMITATIONS\n- Identified knowledge gaps\n- Methodological limitations across studies\n- Areas requiring further investigation\n\n### 7. RECOMMENDATIONS\n- Evidence-based clinical recommendations with confidence levels\n- Suggestions for future research priorities\n- Implementation considerations\n\n### 8. CONCLUSION\n- Summary of evidence strength\n- Final recommendations with qualification statements\n\nEnsure all conclusions are proportionate to the evidence quality and explicitly state limitations.",
113
+ "input_arguments": [
114
+ "research_topic",
115
+ "literature_content",
116
+ "focus_area",
117
+ "study_types",
118
+ "quality_level",
119
+ "review_scope"
120
+ ],
121
+ "parameter": {
122
+ "type": "object",
123
+ "properties": {
124
+ "research_topic": {
125
+ "type": "string",
126
+ "description": "The specific medical/research topic for literature review (e.g., 'efficacy of drug X in treating condition Y').",
127
+ "required": true
128
+ },
129
+ "literature_content": {
130
+ "type": "string",
131
+ "description": "The literature content, abstracts, full studies, or research papers to review and synthesize.",
132
+ "required": true
133
+ },
134
+ "focus_area": {
135
+ "type": "string",
136
+ "description": "Primary focus area for the review (e.g., 'therapeutic efficacy', 'safety profile', 'diagnostic accuracy', 'biomarker validation').",
137
+ "required": true
138
+ },
139
+ "study_types": {
140
+ "type": "string",
141
+ "description": "Types of studies to prioritize in the analysis (e.g., 'randomized controlled trials', 'meta-analyses', 'cohort studies', 'case-control studies').",
142
+ "required": true
143
+ },
144
+ "quality_level": {
145
+ "type": "string",
146
+ "description": "Minimum evidence quality level to include (e.g., 'high quality only', 'moderate and above', 'all available evidence').",
147
+ "required": true
148
+ },
149
+ "review_scope": {
150
+ "type": "string",
151
+ "description": "Scope of the review (e.g., 'comprehensive systematic review', 'rapid review', 'scoping review', 'narrative review').",
152
+ "required": true
153
+ }
154
+ },
155
+ "required": [
156
+ "research_topic",
157
+ "literature_content",
158
+ "focus_area",
159
+ "study_types",
160
+ "quality_level",
161
+ "review_scope"
162
+ ]
163
+ },
164
+ "configs": {
165
+ "api_type": "CHATGPT",
166
+ "model_id": "o4-mini-0416",
167
+ "temperature": 1.0,
168
+ "max_new_tokens": 8192,
169
+ "return_json": false
170
+ }
171
+ },
172
+ {
173
+ "type": "AgenticTool",
174
+ "name": "HypothesisGenerator",
175
+ "description": "Generates research hypotheses based on provided background context, domain, and desired format. Uses AI to propose novel, testable hypotheses for scientific exploration.",
176
+ "prompt": "You are an expert researcher in {domain}. Based on the following background context, generate {number_of_hypotheses} clear, focused, and testable research hypotheses.\n\nContext:\n{context}\n\nFormat (optional): {hypothesis_format}\n\nProvide each hypothesis as a concise statement that could be empirically investigated.",
177
+ "input_arguments": [
178
+ "context",
179
+ "domain",
180
+ "number_of_hypotheses",
181
+ "hypothesis_format"
182
+ ],
183
+ "parameter": {
184
+ "type": "object",
185
+ "properties": {
186
+ "context": {
187
+ "type": "string",
188
+ "description": "Background information, observations, or data description from which to derive hypotheses.",
189
+ "required": true
190
+ },
191
+ "domain": {
192
+ "type": "string",
193
+ "description": "Field of study or research area (e.g., 'neuroscience', 'ecology', 'materials science').",
194
+ "required": true
195
+ },
196
+ "number_of_hypotheses": {
197
+ "type": "string",
198
+ "description": "Number of hypotheses to generate (e.g., '3', '5').",
199
+ "required": true
200
+ },
201
+ "hypothesis_format": {
202
+ "type": "string",
203
+ "description": "Optional directive on how to structure each hypothesis. Choose from one of the following formats:\n\n1. If–Then Statements: \"If [independent variable condition], then [expected outcome].\"\n2. Null and Alternative (Statistical):\n • H₀ (Null): \"There is no difference/effect/association between X and Y.\"\n • H₁ (Alt): \"There is a difference/effect/association between X and Y.\"\n3. Associative (Correlation-Focused): \"There is a relationship/association between [Variable A] and [Variable B].\"\n4. Directional (Non-If–Then): \"Increasing/decreasing [Variable A] will lead to [directional change] in [Variable B].\"\n5. Comparative (Group Comparison): \"Group A will show higher/lower [dependent measure] compared to Group B under [condition].\"\n6. Mechanistic: \"Because [mechanism or process], [Variable A] will cause [Variable B].\"\n7. Descriptive (Exploratory/Pattern-Oriented): \"Population X exhibits pattern Y in context Z.\"\n\nIf omitted, defaults to concise declarative sentences.",
204
+ "default": "concise declarative sentences",
205
+ "required": false
206
+ }
207
+ },
208
+ "required": [
209
+ "context",
210
+ "domain",
211
+ "number_of_hypotheses"
212
+ ]
213
+ },
214
+ "configs": {
215
+ "api_type": "CHATGPT",
216
+ "model_id": "o4-mini-0416",
217
+ "temperature": 1.0,
218
+ "max_new_tokens": 8192,
219
+ "return_json": false
220
+ }
221
+ },
222
+ {
223
+ "type": "AgenticTool",
224
+ "name": "ExperimentalDesignScorer",
225
+ "description": "Assesses a proposed experimental design by assigning scores and structured feedback on hypothesis clarity, variable definitions, sample size, controls, randomization, measurement methods, statistical analysis, bias mitigation, ethical considerations, and overall feasibility.",
226
+ "prompt": "You are an expert in experimental design. The user has provided the following hypothesis and detailed description of their proposed experiment:\n\nHypothesis:\n{hypothesis}\n\nDesign Description:\n{design_description}\n\nFor each of the categories below, assign a score from 1 (poor) to 5 (excellent), then provide concise feedback explaining your rating and any suggestions for improvement:\n\n1. **Hypothesis Clarity & Alignment**\n - Score and feedback on whether the hypothesis is clearly stated, testable, and directly addressed by the design.\n\n2. **Variables & Controls**\n - Score and feedback on definition of independent, dependent, and control variables, and identification of any missing controls or confounds.\n\n3. **Sample Size & Randomization**\n - Score and feedback on justification of sample size (power analysis or effect-size rationale) and appropriateness of randomization/allocation methods.\n\n4. **Measurement & Data Collection**\n - Score and feedback on reliability and validity of measurement methods and clarity of data collection procedures.\n\n5. **Statistical Analysis Plan**\n - Score and feedback on suitability of proposed statistical tests, treatment of assumptions, and alignment with the hypothesis.\n\n6. **Bias & Limitations**\n - Score and feedback on identification and mitigation of potential biases, and discussion of key limitations.\n\n7. **Ethical & Practical Feasibility**\n - Score and feedback on ethical considerations (e.g., consent, welfare) and feasibility within time, resources, and equipment constraints.\n\n8. **Overall Feasibility & Recommendations**\n - Provide an overall feasibility score and 2–3 concrete recommendations to strengthen the design (e.g., additional controls, measurement improvements, blinding enhancements).",
227
+ "input_arguments": [
228
+ "hypothesis",
229
+ "design_description"
230
+ ],
231
+ "parameter": {
232
+ "type": "object",
233
+ "properties": {
234
+ "hypothesis": {
235
+ "type": "string",
236
+ "description": "A clear statement of the research hypothesis to be tested.",
237
+ "required": true
238
+ },
239
+ "design_description": {
240
+ "type": "string",
241
+ "description": "A detailed description of the proposed experimental design, including variables, methods, sample details, and planned analyses.",
242
+ "required": true
243
+ }
244
+ },
245
+ "required": [
246
+ "hypothesis",
247
+ "design_description"
248
+ ]
249
+ },
250
+ "configs": {
251
+ "api_type": "CHATGPT",
252
+ "model_id": "o4-mini-0416",
253
+ "temperature": 1.0,
254
+ "max_new_tokens": 8192,
255
+ "return_json": false
256
+ }
257
+ },
258
+ {
259
+ "type": "AgenticTool",
260
+ "name": "MedicalTermNormalizer",
261
+ "description": "Identifies and corrects misspelled drug or disease names, returning a list of plausible standardized terms.",
262
+ "prompt": "You are an expert in biomedical terminology. The user has given you one or more drug or disease names that may be misspelled or incomplete:\\n\\nInput:\\n{raw_terms}\\n\\nReturn **only** a JSON array (list) of all plausible standardized names—ordered from most to least likely. If you cannot identify any plausible terms, return an empty array.\\n\\nExample Input:\\n'aspirin'\\n\\nExample Output:\\n['Aspirin']\\n\\nDo not include any explanations or additional text in your response.",
263
+ "input_arguments": [
264
+ "raw_terms"
265
+ ],
266
+ "parameter": {
267
+ "type": "object",
268
+ "properties": {
269
+ "raw_terms": {
270
+ "type": "string",
271
+ "description": "A comma- or whitespace-separated string containing one misspelled drug or disease name.",
272
+ "required": true
273
+ }
274
+ },
275
+ "required": [
276
+ "raw_terms"
277
+ ]
278
+ },
279
+ "configs": {
280
+ "api_type": "CHATGPT",
281
+ "model_id": "o4-mini-0416",
282
+ "temperature": 1.0,
283
+ "max_new_tokens": 2048,
284
+ "return_json": true
285
+ }
286
+ },
287
+ {
288
+ "type": "AgenticTool",
289
+ "name": "NoveltySignificanceReviewer",
290
+ "description": "Provides a structured peer-review of the work's originality and potential impact.",
291
+ "prompt": "You are a journal peer reviewer with deep knowledge of the field. The user has supplied the manuscript below.\\n\\nTitle:\\n{paper_title}\\n\\nAbstract:\\n{abstract}\\n\\nFull Manuscript:\\n{manuscript_text}\\n\\nWrite a structured review that, for each criterion, first assigns a rating from 1 (very weak) to 5 (excellent) **followed by 2–3 sentences** justifying the rating and giving specific improvement advice.\\n\\n1. **Originality of Research Question**\\n2. **Contribution to the Field**\\n3. **Incremental vs. Ground-breaking Nature**\\n\\nEnd with 2-3 overarching recommendations to strengthen the manuscript's contribution.",
292
+ "input_arguments": [
293
+ "paper_title",
294
+ "abstract",
295
+ "manuscript_text"
296
+ ],
297
+ "parameter": {
298
+ "type": "object",
299
+ "properties": {
300
+ "paper_title": {
301
+ "type": "string",
302
+ "description": "Manuscript title",
303
+ "required": true
304
+ },
305
+ "abstract": {
306
+ "type": "string",
307
+ "description": "Manuscript abstract",
308
+ "required": true
309
+ },
310
+ "manuscript_text": {
311
+ "type": "string",
312
+ "description": "Full manuscript text",
313
+ "required": true
314
+ }
315
+ },
316
+ "required": [
317
+ "paper_title",
318
+ "abstract",
319
+ "manuscript_text"
320
+ ]
321
+ },
322
+ "configs": {
323
+ "api_type": "CHATGPT",
324
+ "model_id": "o4-mini-0416",
325
+ "temperature": 1.0,
326
+ "max_new_tokens": 8192,
327
+ "return_json": false
328
+ }
329
+ },
330
+ {
331
+ "type": "AgenticTool",
332
+ "name": "LiteratureContextReviewer",
333
+ "description": "Reviews coverage, relevance, and critical synthesis of prior scholarship.",
334
+ "prompt": "You are a journal peer reviewer. Assess the literature-review section below.\\n\\nTitle:\\n{paper_title}\\n\\nLiterature Review Section:\\n{literature_review}\\n\\nFor each item, give a 1-5 rating and 2–3 sentences of feedback plus missing-citation suggestions.\\n\\n1. **Comprehensiveness of Sources**\\n2. **Relevance & Accuracy of Summaries**\\n3. **Critical Synthesis & Gap Identification**",
335
+ "input_arguments": [
336
+ "paper_title",
337
+ "literature_review"
338
+ ],
339
+ "parameter": {
340
+ "type": "object",
341
+ "properties": {
342
+ "paper_title": {
343
+ "type": "string",
344
+ "required": true
345
+ },
346
+ "literature_review": {
347
+ "type": "string",
348
+ "description": "Full literature-review text",
349
+ "required": true
350
+ }
351
+ },
352
+ "required": [
353
+ "paper_title",
354
+ "literature_review"
355
+ ]
356
+ },
357
+ "configs": {
358
+ "api_type": "CHATGPT",
359
+ "model_id": "o4-mini-0416",
360
+ "temperature": 1.0,
361
+ "max_new_tokens": 8192,
362
+ "return_json": false
363
+ }
364
+ },
365
+ {
366
+ "type": "AgenticTool",
367
+ "name": "MethodologyRigorReviewer",
368
+ "description": "Evaluates design appropriateness, sampling, and procedural transparency.",
369
+ "prompt": "You are a methodology peer reviewer. Critically appraise the Methods section below.\\n\\nMethods Section:\\n{methods_section}\\n\\nRate 1–5, then justify in 2–3 sentences for each:\\n\\n1. **Design Appropriateness to Research Question**\\n2. **Variable Definition & Operationalization Clarity**\\n3. **Sampling Strategy & Randomization Adequacy**\\n4. **Procedural Transparency (replicability)**\\n\\nConclude with the two most important methodological revisions you recommend.",
370
+ "input_arguments": [
371
+ "methods_section"
372
+ ],
373
+ "parameter": {
374
+ "type": "object",
375
+ "properties": {
376
+ "methods_section": {
377
+ "type": "string",
378
+ "description": "Full Methods text",
379
+ "required": true
380
+ }
381
+ },
382
+ "required": [
383
+ "methods_section"
384
+ ]
385
+ },
386
+ "configs": {
387
+ "api_type": "CHATGPT",
388
+ "model_id": "o4-mini-0416",
389
+ "temperature": 1.0,
390
+ "max_new_tokens": 8192,
391
+ "return_json": false
392
+ }
393
+ },
394
+ {
395
+ "type": "AgenticTool",
396
+ "name": "DataAnalysisValidityReviewer",
397
+ "description": "Checks statistical choices, assumption testing, and reporting transparency.",
398
+ "prompt": "You are the statistical reviewer for a journal. Examine the analysis plan and results below.\\n\\nStatistical Analysis Section:\\n{analysis_section}\\n\\nProvide a 1-5 rating and 2–3 sentences of critique for each criterion:\\n\\n1. **Appropriateness of Tests/Models**\\n2. **Assumption Verification & Remedies**\\n3. **Reporting Completeness (effect sizes, CIs, exact p-values)**\\n4. **Reproducibility (code/data availability)**\\n\\nSuggest any corrective analyses or transparency improvements.",
399
+ "input_arguments": [
400
+ "analysis_section"
401
+ ],
402
+ "parameter": {
403
+ "type": "object",
404
+ "properties": {
405
+ "analysis_section": {
406
+ "type": "string",
407
+ "required": true
408
+ }
409
+ },
410
+ "required": [
411
+ "analysis_section"
412
+ ]
413
+ },
414
+ "configs": {
415
+ "api_type": "CHATGPT",
416
+ "model_id": "o4-mini-0416",
417
+ "temperature": 1.0,
418
+ "max_new_tokens": 8192,
419
+ "return_json": false
420
+ }
421
+ },
422
+ {
423
+ "type": "AgenticTool",
424
+ "name": "ResultsInterpretationReviewer",
425
+ "description": "Judges whether conclusions are data-justified and limitations addressed.",
426
+ "prompt": "You are a peer reviewer focusing on interpretation. Review the sections below.\\n\\nResults:\\n{results_section}\\n\\nDiscussion:\\n{discussion_section}\\n\\nFor each item, assign 1–5 and write 2–3 sentences of review:\\n\\n1. **Alignment of Claims with Data**\\n2. **Consideration of Alternative Explanations**\\n3. **Limitations & Future-Work Discussion**\\n\\nFinish with recommendations for improving interpretive balance and acknowledging uncertainties.",
427
+ "input_arguments": [
428
+ "results_section",
429
+ "discussion_section"
430
+ ],
431
+ "parameter": {
432
+ "type": "object",
433
+ "properties": {
434
+ "results_section": {
435
+ "type": "string",
436
+ "required": true
437
+ },
438
+ "discussion_section": {
439
+ "type": "string",
440
+ "required": true
441
+ }
442
+ },
443
+ "required": [
444
+ "results_section",
445
+ "discussion_section"
446
+ ]
447
+ },
448
+ "configs": {
449
+ "api_type": "CHATGPT",
450
+ "model_id": "o4-mini-0416",
451
+ "temperature": 1.0,
452
+ "max_new_tokens": 8192,
453
+ "return_json": false
454
+ }
455
+ },
456
+ {
457
+ "type": "AgenticTool",
458
+ "name": "WritingPresentationReviewer",
459
+ "description": "Assesses clarity, organization, grammar, and visual presentation quality.",
460
+ "prompt": "You are a scientific writing reviewer. Evaluate the manuscript text below.\\n\\nManuscript:\\n{manuscript_text}\\n\\nRate 1–5 and give 2–3 sentences of feedback for:\\n\\n1. **Clarity & Conciseness of Writing**\\n2. **Logical Flow & Section Organization**\\n3. **Grammar, Style & Terminology Consistency**\\n4. **Figure/Table Quality & Caption Adequacy**\\n\\nProvide concrete examples of unclear sentences or formatting issues and suggest fixes.",
461
+ "input_arguments": [
462
+ "manuscript_text"
463
+ ],
464
+ "parameter": {
465
+ "type": "object",
466
+ "properties": {
467
+ "manuscript_text": {
468
+ "type": "string",
469
+ "required": true
470
+ }
471
+ },
472
+ "required": [
473
+ "manuscript_text"
474
+ ]
475
+ },
476
+ "configs": {
477
+ "api_type": "CHATGPT",
478
+ "model_id": "o4-mini-0416",
479
+ "temperature": 1.0,
480
+ "max_new_tokens": 8192,
481
+ "return_json": false
482
+ }
483
+ },
484
+ {
485
+ "type": "AgenticTool",
486
+ "name": "ReproducibilityTransparencyReviewer",
487
+ "description": "Evaluates data, code, and protocol availability for replication.",
488
+ "prompt": "You are a reproducibility reviewer. Analyze the statement below.\\n\\nData & Materials Availability Statement:\\n{availability_statement}\\n\\nFor each category, provide a 1-5 rating and 2–3 sentences of critique:\\n\\n1. **Data Accessibility & Documentation**\\n2. **Code/Software Availability & Licensing**\\n3. **Protocol & Materials Detail for Replication**\\n\\nNote embargoes or missing links and recommend steps to achieve full transparency.",
489
+ "input_arguments": [
490
+ "availability_statement"
491
+ ],
492
+ "parameter": {
493
+ "type": "object",
494
+ "properties": {
495
+ "availability_statement": {
496
+ "type": "string",
497
+ "required": true
498
+ }
499
+ },
500
+ "required": [
501
+ "availability_statement"
502
+ ]
503
+ },
504
+ "configs": {
505
+ "api_type": "CHATGPT",
506
+ "model_id": "o4-mini-0416",
507
+ "temperature": 1.0,
508
+ "max_new_tokens": 8192,
509
+ "return_json": false
510
+ }
511
+ },
512
+ {
513
+ "type": "AgenticTool",
514
+ "name": "EthicalComplianceReviewer",
515
+ "description": "Checks adherence to ethical standards and disclosure practices.",
516
+ "prompt": "You are an ethics peer reviewer. Examine the statements below.\\n\\nEthics & Compliance Section:\\n{ethics_section}\\n\\nRate 1–5 with 2–3 sentences of feedback for each:\\n\\n1. **Approvals & Informed Consent**\\n2. **Participant/Subject Welfare Measures**\\n3. **Conflict of Interest & Funding Disclosure**\\n4. **Data Privacy & Security Protections**\\n\\nIdentify any deficiencies and specify actions required for compliance.",
517
+ "input_arguments": [
518
+ "ethics_section"
519
+ ],
520
+ "parameter": {
521
+ "type": "object",
522
+ "properties": {
523
+ "ethics_section": {
524
+ "type": "string",
525
+ "required": true
526
+ }
527
+ },
528
+ "required": [
529
+ "ethics_section"
530
+ ]
531
+ },
532
+ "configs": {
533
+ "api_type": "CHATGPT",
534
+ "model_id": "o4-mini-0416",
535
+ "temperature": 1.0,
536
+ "max_new_tokens": 8192,
537
+ "return_json": false
538
+ }
539
+ },
540
+ {
541
+ "type": "AgenticTool",
542
+ "name": "QuestionRephraser",
543
+ "description": "Generates three distinct paraphrases of a given question while ensuring answer options remain valid and applicable.",
544
+ "prompt": "You are an expert academic editor. Given the *question* text below, rewrite the question in three different ways that preserve the original meaning and difficulty. \n\n**Important requirements:**\n- **Do not** include any answer options (e.g., choices labeled A), B), C), etc.) in your rephrased questions\n- Ensure that the provided answer options will still be valid and applicable to each rephrased version\n- Maintain the same level of difficulty and specificity as the original question\n- Return *only* a JSON array containing the three re-phrased versions, in the order you create them\n\nQuestion:\n{question}\n\nAnswer Options (if provided):\n{options}",
545
+ "input_arguments": [
546
+ "question",
547
+ "options"
548
+ ],
549
+ "parameter": {
550
+ "type": "object",
551
+ "properties": {
552
+ "question": {
553
+ "type": "string",
554
+ "description": "The original question text to be rephrased",
555
+ "required": true
556
+ },
557
+ "options": {
558
+ "type": "string",
559
+ "description": "Answer options (e.g., multiple choice options) that should remain valid for the rephrased questions. Leave empty if no options are provided.",
560
+ "required": false
561
+ }
562
+ },
563
+ "required": [
564
+ "question"
565
+ ]
566
+ },
567
+ "configs": {
568
+ "api_type": "CHATGPT",
569
+ "model_id": "o4-mini-0416",
570
+ "temperature": 1.0,
571
+ "max_new_tokens": 8192,
572
+ "return_json": true
573
+ }
574
+ },
575
+ {
576
+ "type": "AgenticTool",
577
+ "name": "ProtocolOptimizer",
578
+ "description": "Reviews an initial protocol and delivers targeted revisions that improve clarity, feasibility, risk-management, and evaluation rigor.",
579
+ "prompt": "You are a protocol-optimization specialist. Examine the initial protocol below.\\n\\nInitial Protocol:\\n{initial_protocol}\\n\\nFor each criterion, give a 1-to-5 rating followed by 2–3 sentences of feedback that include concrete revision suggestions:\\n\\n1. **Clarity & Completeness**\\n2. **Feasibility & Resource Efficiency**\\n3. **Risk Mitigation & Contingency Plans**\\n4. **Measurement & Evaluation Metrics**\\n\\nFinish with a prioritized action-item checklist that the author can follow to strengthen the protocol.",
580
+ "input_arguments": [
581
+ "initial_protocol"
582
+ ],
583
+ "parameter": {
584
+ "type": "object",
585
+ "properties": {
586
+ "initial_protocol": {
587
+ "type": "string",
588
+ "required": true
589
+ }
590
+ },
591
+ "required": [
592
+ "initial_protocol"
593
+ ]
594
+ },
595
+ "configs": {
596
+ "api_type": "CHATGPT",
597
+ "model_id": "o4-mini-0416",
598
+ "temperature": 1.0,
599
+ "max_new_tokens": 8192,
600
+ "return_json": false
601
+ }
602
+ },
603
+ {
604
+ "type": "AgenticTool",
605
+ "name": "TestCaseGenerator",
606
+ "description": "Generates diverse and representative ToolUniverse tool call dictionaries for a given tool based on its parameter schema. Each tool call should be a JSON object with 'name' (the tool's name) and 'arguments' (a dict of input arguments), covering different parameter combinations, edge cases, and typical usage. Can generate targeted test cases based on previous optimization feedback.",
607
+ "prompt": "You are an expert software tester. Generate 3-5 diverse ToolUniverse tool call dictionaries for the given tool configuration. Each tool call must be a JSON object with 'name' (tool name) and 'arguments' (input parameters).\n\nFEEDBACK-DRIVEN GENERATION:\nIf tool_config contains '_optimization_feedback' and '_iteration', generate targeted test cases addressing the specific issues mentioned in the feedback. Focus on edge cases, parameter combinations, or usage patterns that need better coverage.\n\nSTANDARD GENERATION:\nCover typical usage, edge cases, and boundary conditions when possible.\n\nTool configuration: {tool_config}\n\nReturn a JSON object with key 'test_cases' containing an array of test case objects. Example format:\n{\"test_cases\": [{\"name\":\"ToolName\",\"arguments\":{\"param\":\"value\"}},{\"name\":\"ToolName\",\"arguments\":{\"param\":123}}]}",
608
+ "input_arguments": ["tool_config"],
609
+ "parameter": {
610
+ "type": "object",
611
+ "properties": {
612
+ "tool_config": {
613
+ "type": "object",
614
+ "description": "The full configuration of the tool to generate test cases for. May include '_optimization_feedback' and '_iteration' fields for feedback-driven test generation.",
615
+ "required": true
616
+ }
617
+ },
618
+ "required": ["tool_config"]
619
+ },
620
+ "configs": {
621
+ "api_type": "CHATGPT",
622
+ "model_id": "o4-mini-0416",
623
+ "temperature": 1.0,
624
+ "max_new_tokens": 4096,
625
+ "return_json": true,
626
+ "response_format": {"type": "json_object"}
627
+ }
628
+ },
629
+ {
630
+ "type": "AgenticTool",
631
+ "name": "DescriptionAnalyzer",
632
+ "description": "Analyzes a tool's original description and the results of multiple test cases, then suggests an improved description that is more accurate, comprehensive, and user-friendly. Optionally provides a rationale for the changes.",
633
+ "prompt": "You are an expert technical writer and tool evaluator. Given the original description of a tool and the results of several test cases (inputs and outputs), analyze whether the description accurately reflects the tool's behavior. Suggest an improved description that is more precise, comprehensive, and user-friendly. Also provide a brief rationale for your changes.\n\nCRITICAL CONSTRAINTS - TOOL DESCRIPTION SCOPE:\n1. If the original description contains 'Previous optimization feedback:', use that feedback to guide your improvements and address the specific issues mentioned.\n2. The tool description should focus EXCLUSIVELY on the OVERALL PURPOSE and HIGH-LEVEL FUNCTIONALITY of the tool.\n3. NEVER include parameter-specific details, formats, or requirements in the tool description.\n4. NEVER mention specific parameter names, data types, or input requirements - these belong in parameter descriptions.\n5. Focus ONLY on: what the tool does, its primary use cases, what kind of output it provides, and its general behavior patterns.\n6. Avoid generic filler phrases like 'enabling workflows', 'supporting analysis', 'facilitating research' unless they add specific meaning.\n7. Every sentence must convey essential information about the tool's core functionality.\n8. Think of the tool description as answering 'What does this tool do?' with concrete, actionable information.\n\nOriginal description:\n{original_description}\n\nTest results:\n{test_results}\n\nReturn a JSON object with keys: 'optimized_description' and 'rationale'.",
634
+ "input_arguments": ["original_description", "test_results"],
635
+ "parameter": {
636
+ "type": "object",
637
+ "properties": {
638
+ "original_description": {
639
+ "type": "string",
640
+ "description": "The original description of the tool.",
641
+ "required": true
642
+ },
643
+ "test_results": {
644
+ "type": "string",
645
+ "description": "A JSON string containing a list of test case input/output pairs.",
646
+ "required": true
647
+ }
648
+ },
649
+ "required": ["original_description", "test_results"]
650
+ },
651
+ "configs": {
652
+ "api_type": "CHATGPT",
653
+ "model_id": "o4-mini-0416",
654
+ "temperature": 0.4,
655
+ "max_new_tokens": 1024,
656
+ "return_json": true
657
+ }
658
+ },
659
+ {
660
+ "type": "AgenticTool",
661
+ "name": "ArgumentDescriptionOptimizer",
662
+ "description": "Optimizes the descriptions of tool arguments/parameters based on test case results and actual usage patterns. Provides improved descriptions that are more accurate and user-friendly.",
663
+ "prompt": "You are an expert technical writer specializing in API documentation. Given a tool's parameter schema and test case results, analyze how each parameter is used and optimize their descriptions to be clear, accurate, and concise.\n\nCRITICAL CONSTRAINTS - PARAMETER DESCRIPTION SCOPE:\n1. If the parameter schema contains '_previous_feedback', use that feedback to address specific issues and improve the parameter descriptions accordingly.\n2. Parameter descriptions should be HIGHLY SPECIFIC to each individual parameter.\n3. NEVER repeat or reference the main tool functionality - assume the user already knows what the tool does.\n4. Focus EXCLUSIVELY on parameter-specific details: data types, formats, constraints, valid values, required formats, examples when helpful.\n5. Each description should answer: 'What should I put in this specific parameter?' not 'What does the tool do?'\n6. Avoid generic phrases like 'for this tool', 'used by the tool', 'enables functionality' unless they provide specific technical context.\n7. Be precise about technical requirements (e.g., 'JSON string', 'integer between 1-100', 'URL format', etc.)\n8. Every word must serve a purpose - eliminate filler words and redundant phrases.\n\nOriginal parameter schema:\n{parameter_schema}\n\nTest results showing parameter usage:\n{test_results}\n\nFor each parameter, suggest an improved description that:\n1. Is brief but informative (1-2 sentences max)\n2. Accurately reflects the parameter's specific purpose, data type, and constraints\n3. Uses clear, simple language with precise technical details\n4. Avoids redundancy with the parameter name\n5. Addresses any issues mentioned in previous feedback\n6. Contains only essential information about what value should be provided\n\nReturn a JSON object with keys: 'optimized_parameters' (object with parameter names as keys and optimized descriptions as values) and 'rationale' (explaining the key changes made).",
664
+ "input_arguments": ["parameter_schema", "test_results"],
665
+ "parameter": {
666
+ "type": "object",
667
+ "properties": {
668
+ "parameter_schema": {
669
+ "type": "string",
670
+ "description": "JSON string of the original parameter schema with properties and descriptions.",
671
+ "required": true
672
+ },
673
+ "test_results": {
674
+ "type": "string",
675
+ "description": "A JSON string containing test case input/output pairs showing parameter usage.",
676
+ "required": true
677
+ }
678
+ },
679
+ "required": ["parameter_schema", "test_results"]
680
+ },
681
+ "configs": {
682
+ "api_type": "CHATGPT",
683
+ "model_id": "o4-mini-0416",
684
+ "temperature": 1.0,
685
+ "max_new_tokens": 1536,
686
+ "return_json": true
687
+ }
688
+ },
689
+ {
690
+ "type": "AgenticTool",
691
+ "name": "DescriptionQualityEvaluator",
692
+ "description": "Evaluates the quality of tool descriptions and parameter descriptions, providing a score and specific feedback for improvements.",
693
+ "prompt": "You are an expert evaluator of technical documentation. Given a tool description, parameter descriptions, and test results, evaluate the quality and provide a score from 1-10 along with specific feedback.\n\nTool description:\n{tool_description}\n\nParameter descriptions:\n{parameter_descriptions}\n\nTest results:\n{test_results}\n\nEvaluate based on these criteria:\n1. Clarity and understandability (1-10)\n2. Accuracy based on test results (1-10)\n3. Completeness of information (1-10)\n4. Conciseness and meaningfulness - every sentence must serve a purpose (1-10)\n5. User-friendliness (1-10)\n6. Redundancy avoidance - tool description and parameter descriptions must not duplicate information (1-10)\n\nCRITICAL EVALUATION FOCUS:\n- Tool description should ONLY describe overall functionality and purpose, NOT parameter details\n- Parameter descriptions should ONLY describe specific parameter requirements, NOT tool functionality\n- Check for meaningless filler phrases like 'enabling workflows', 'supporting analysis', 'facilitating integration' - DEDUCT POINTS for vague language\n- Check for overlap: Does the tool description mention parameter names, formats, or specific input requirements? (DEDUCT POINTS)\n- Check for overlap: Do parameter descriptions repeat what the tool does overall? (DEDUCT POINTS)\n- Every sentence must convey essential, actionable information\n\nReturn a JSON object with:\n- 'overall_score': Average of all criteria scores (1-10)\n- 'criteria_scores': Object with individual scores for each criterion\n- 'feedback': Specific suggestions for improvement, identifying meaningless phrases and redundancy issues\n- 'is_satisfactory': Boolean indicating if quality is acceptable (score >= 8)\n- 'meaningfulness_analysis': Detailed explanation of any filler language or redundant information found",
694
+ "input_arguments": ["tool_description", "parameter_descriptions", "test_results"],
695
+ "parameter": {
696
+ "type": "object",
697
+ "properties": {
698
+ "tool_description": {
699
+ "type": "string",
700
+ "description": "The tool description to evaluate.",
701
+ "required": true
702
+ },
703
+ "parameter_descriptions": {
704
+ "type": "string",
705
+ "description": "JSON string of parameter names and their descriptions.",
706
+ "required": true
707
+ },
708
+ "test_results": {
709
+ "type": "string",
710
+ "description": "JSON string containing test case results.",
711
+ "required": true
712
+ }
713
+ },
714
+ "required": ["tool_description", "parameter_descriptions", "test_results"]
715
+ },
716
+ "configs": {
717
+ "api_type": "CHATGPT",
718
+ "model_id": "o4-mini-0416",
719
+ "temperature": 1.0,
720
+ "max_new_tokens": 1024,
721
+ "return_json": true
722
+ }
723
+ },
724
+ {
725
+ "type": "AgenticTool",
726
+ "name": "ToolSpecificationGenerator",
727
+ "description": "Generates complete ToolUniverse-compliant tool specifications based on a description and analysis of similar existing tools. Creates comprehensive tool configurations including parameters, prompts, and metadata.",
728
+ "prompt": "You are an expert tool architect specializing in ToolUniverse tool design. Generate a complete, valid ToolUniverse tool specification based on the provided requirements and analysis of similar tools.\n\n🚨 **CRITICAL REQUIREMENT**: For all tool types except AgenticTool, generate CUSTOM tool implementations with complete standalone code. Do NOT reference existing tool classes. Only use AgenticTool for subjective analysis tasks requiring LLM capabilities.\n\n**PACKAGE USAGE REQUIREMENT**: ALWAYS prioritize using existing, well-maintained Python packages over custom implementations. Leverage established libraries like pandas, numpy, scipy, requests, beautifulsoup4, biopython, etc. Only write custom algorithms when absolutely no suitable package exists.\n\nREQUIREMENTS:\nTool Description: {tool_description}\nTarget Category: {tool_category}\nTarget Type: {tool_type}\nSimilar Tools Analysis: {similar_tools}\nExisting Tools Context: {existing_tools_summary}\n\n**TOOL TYPE SELECTION LOGIC:**\n1. **AgenticTool**: Only for subjective tasks (analysis, interpretation, creative writing)\n2. **CustomTool**: For all computational, API, data processing tasks with complete custom implementation\n\n**FOR CustomTool (Complete custom implementation - PREFERRED for most tasks):**\n```json\n{\n \"type\": \"CustomTool\",\n \"name\": \"ToolName\",\n \"description\": \"Custom implementation for [specific functionality]\",\n \"implementation\": {\n \"language\": \"python\",\n \"dependencies\": [\"package1\", \"package2\"],\n \"installation_commands\": [\"pip install package1\", \"pip install package2\"],\n \"source_code\": \"import requests\\nimport json\\n\\ndef execute_tool(params):\\n # Complete implementation here\\n return result\",\n \"main_function\": \"execute_tool\",\n \"error_handling\": \"try-except blocks for robust error management\",\n \"validation\": \"input parameter validation logic\",\n \"documentation\": \"Complete usage documentation\"\n },\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": {\n \"input_param\": {\n \"type\": \"string\",\n \"description\": \"Description of input parameter\",\n \"required\": true\n }\n },\n \"required\": [\"input_param\"]\n },\n \"return_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"result\": {\"type\": \"string\", \"description\": \"Tool output description\"}\n }\n },\n \"testing\": {\n \"test_cases\": [\n {\n \"input\": {\"input_param\": \"test_value\"},\n \"expected_output_type\": \"object\",\n \"description\": \"Test case description\"\n }\n ],\n \"validation_method\": \"automated testing with assertions\"\n }\n}\n```\n\n**FOR AgenticTool (AI-powered tools - Only for subjective tasks):**\n```json\n{\n \"type\": \"AgenticTool\",\n \"name\": \"ToolName\",\n \"description\": \"AI-powered [subjective task] for [non-computational purpose]\",\n \"prompt\": \"Prompt template with {parameter_name} placeholders for subjective analysis\",\n \"input_arguments\": [\"param1\", \"param2\"],\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": {\n \"param1\": {\"type\": \"string\", \"description\": \"...\", \"required\": true}\n },\n \"required\": [\"param1\"]\n },\n \"configs\": {\n \"api_type\": \"CHATGPT\",\n \"model_id\": \"o4-mini-0416\",\n \"temperature\": 0.7,\n \"max_new_tokens\": 1024,\n \"return_json\": false\n }\n}\n```\n\n**GENERATION REQUIREMENTS:**\n1. **CUSTOM IMPLEMENTATION APPROACH**: For computational/API/data tasks, generate complete standalone Python code\n2. **NO EXISTING TOOL CLASSES**: Do not inherit from or reference PackageTool, RESTTool, XMLTool, etc.\n3. **REAL PACKAGES**: Use actual, installable Python packages (requests, pandas, numpy, etc.)\n4. **COMPLETE CODE**: Include full implementation with imports, error handling, and validation\n5. **TESTING READY**: Provide test cases and validation methods\n6. **AGENTICTOOL ONLY FOR SUBJECTIVE**: Use AgenticTool only for interpretation, analysis, creative tasks\n\n**QUALITY STANDARDS:**\n- Generate complete, executable Python code for CustomTool\n- Include real package dependencies and installation instructions\n- Provide comprehensive error handling and input validation\n- Ensure computational accuracy through established libraries\n- Only use AgenticTool for tasks requiring subjective judgment or LLM capabilities\n\nReturn ONLY a valid JSON object representing the complete tool configuration using the EXACT format for the chosen tool type. No additional text or explanations.",
729
+ "input_arguments": ["tool_description", "tool_category", "tool_type", "similar_tools", "existing_tools_summary"],
730
+ "parameter": {
731
+ "type": "object",
732
+ "properties": {
733
+ "tool_description": {
734
+ "type": "string",
735
+ "description": "Brief description of the desired tool functionality and purpose.",
736
+ "required": true
737
+ },
738
+ "tool_category": {
739
+ "type": "string",
740
+ "description": "Target category for the tool (e.g., 'biomedical', 'data_analysis', 'text_processing').",
741
+ "required": true
742
+ },
743
+ "tool_type": {
744
+ "type": "string",
745
+ "description": "Specific ToolUniverse tool type (e.g., 'AgenticTool', 'RESTTool', 'PythonTool').",
746
+ "required": true
747
+ },
748
+ "similar_tools": {
749
+ "type": "string",
750
+ "description": "JSON string containing configurations of similar existing tools for analysis and differentiation.",
751
+ "required": true
752
+ },
753
+ "existing_tools_summary": {
754
+ "type": "string",
755
+ "description": "Summary of existing tools in the ecosystem to avoid duplication and identify gaps.",
756
+ "required": true
757
+ }
758
+ },
759
+ "required": ["tool_description", "tool_category", "tool_type", "similar_tools", "existing_tools_summary"]
760
+ },
761
+ "configs": {
762
+ "api_type": "CHATGPT",
763
+ "model_id": "o4-mini-0416",
764
+ "temperature": 1.0,
765
+ "max_new_tokens": 40960,
766
+ "return_json": true
767
+ }
768
+ },
769
+ {
770
+ "type": "AgenticTool",
771
+ "name": "AdvancedCodeQualityAnalyzer",
772
+ "description": "Performs deep analysis of code quality including complexity, security, performance, and maintainability metrics with domain-specific expertise",
773
+ "prompt": "You are an expert code quality analyzer with deep knowledge in software engineering best practices. Analyze the provided source code and return a comprehensive quality assessment.\n\n## CODE TO ANALYZE\nLanguage: {language}\nAnalysis Depth: {analysis_depth}\nDomain Context: {domain_context}\n\nSource Code:\n```{language}\n{source_code}\n```\n\n## ANALYSIS FRAMEWORK\nAssess the code across these dimensions:\n\n### 1. CODE STRUCTURE (25%)\n- Function organization and modularity\n- Separation of concerns\n- Code architecture and design patterns\n- Logical flow and readability\n\n### 2. ERROR HANDLING (20%)\n- Exception handling completeness\n- Input validation robustness\n- Edge case coverage\n- Error message quality\n\n### 3. PERFORMANCE (15%)\n- Algorithm efficiency and complexity\n- Resource usage optimization\n- Scalability considerations\n- Memory management\n\n### 4. SECURITY (15%)\n- Input sanitization\n- Security vulnerability assessment\n- Data handling safety\n- Access control considerations\n\n### 5. MAINTAINABILITY (15%)\n- Code readability and clarity\n- Documentation quality\n- Naming conventions\n- Code reusability\n\n### 6. DOMAIN APPROPRIATENESS (10%)\n- Use of domain-specific algorithms\n- Adherence to field best practices\n- Appropriate library usage\n- Scientific/technical accuracy\n\n## RESPONSE FORMAT\nReturn a JSON object with detailed analysis:\n\n```json\n{\n \"overall_score\": <0-10>,\n \"category_scores\": {\n \"structure\": <0-10>,\n \"error_handling\": <0-10>,\n \"performance\": <0-10>,\n \"security\": <0-10>,\n \"maintainability\": <0-10>,\n \"domain_appropriateness\": <0-10>\n },\n \"issues\": [\n {\n \"category\": \"<category>\",\n \"severity\": \"low|medium|high|critical\",\n \"description\": \"<issue description>\",\n \"line_range\": \"<start-end if applicable>\",\n \"suggestion\": \"<improvement suggestion>\"\n }\n ],\n \"best_practices\": [\"<list of followed best practices>\"],\n \"recommendations\": [\n {\n \"priority\": \"high|medium|low\",\n \"action\": \"<specific recommendation>\",\n \"rationale\": \"<why this improvement is needed>\",\n \"estimated_impact\": \"<expected improvement>\"\n }\n ],\n \"metrics\": {\n \"lines_of_code\": <number>,\n \"cyclomatic_complexity\": \"<estimated>\",\n \"function_count\": <number>,\n \"comment_ratio\": \"<percentage>\"\n }\n}\n```",
774
+ "input_arguments": [
775
+ "source_code",
776
+ "language",
777
+ "analysis_depth",
778
+ "domain_context"
779
+ ],
780
+ "parameter": {
781
+ "type": "object",
782
+ "properties": {
783
+ "source_code": {
784
+ "type": "string",
785
+ "description": "The source code to analyze for quality assessment",
786
+ "required": true
787
+ },
788
+ "language": {
789
+ "type": "string",
790
+ "description": "Programming language (python, javascript, etc.)",
791
+ "default": "python",
792
+ "required": false
793
+ },
794
+ "analysis_depth": {
795
+ "type": "string",
796
+ "enum": ["basic", "comprehensive", "security-focused"],
797
+ "description": "Level of analysis depth to perform",
798
+ "default": "comprehensive",
799
+ "required": false
800
+ },
801
+ "domain_context": {
802
+ "type": "string",
803
+ "description": "Domain context for specialized analysis (e.g., bioinformatics, web development)",
804
+ "required": false
805
+ }
806
+ },
807
+ "required": ["source_code"]
808
+ },
809
+ "configs": {
810
+ "api_type": "CHATGPT",
811
+ "model_id": "o4-mini-0416",
812
+ "temperature": 0.1,
813
+ "max_new_tokens": 3000,
814
+ "return_json": true
815
+ }
816
+ },
817
+ {
818
+ "type": "AgenticTool",
819
+ "name": "ToolImplementationGenerator",
820
+ "description": "Generates domain-specific, functional code implementations based on tool descriptions and requirements with intelligent algorithm selection",
821
+ "prompt": "You are an expert software engineer specializing in domain-specific implementations. Generate a complete, functional implementation based on the requirements.\n\n## IMPLEMENTATION REQUEST\nTool Description: {tool_description}\nDomain: {domain}\nComplexity Level: {complexity_level}\nPerformance Requirements: {performance_requirements}\n\nTool Parameters:\n{tool_parameters}\n\n## CRITICAL REQUIREMENT: USE EXISTING PACKAGES\n🚨 **ALWAYS PRIORITIZE EXISTING, WELL-MAINTAINED PACKAGES** over custom implementations. Your goal is to leverage the ecosystem of established, tested, and optimized libraries. **NEVER write custom algorithms unless absolutely no suitable package exists.**\n\n## PACKAGE SELECTION STRATEGY\n1. **FIRST CHOICE**: Use established, widely-adopted packages (e.g., pandas, numpy, scipy, requests, beautifulsoup4)\n2. **SECOND CHOICE**: Use domain-specific packages (e.g., biopython for bioinformatics, matplotlib for plotting)\n3. **THIRD CHOICE**: Use specialized libraries (e.g., opencv-python for computer vision, scikit-learn for ML)\n4. **LAST RESORT**: Only implement custom algorithms when no suitable package exists\n\n## REQUIREMENTS ANALYSIS\n1. Parse the tool description to understand core functionality\n2. **Research existing packages** that can solve this problem\n3. **Evaluate package suitability** (popularity, maintenance, performance)\n4. Design implementation using the best available packages\n5. Generate production-ready code with proper error handling\n\n## CODE GENERATION GUIDELINES\n- **ALWAYS check if an existing package can solve the problem**\n- Use established libraries and algorithms for the domain\n- Include comprehensive input validation\n- Implement proper error handling with meaningful messages\n- Add informative comments explaining the logic\n- Return structured results with validation status\n- Consider edge cases and handle them gracefully\n- Follow domain-specific best practices\n- **Document why you chose specific packages**\n\n## DOMAIN-SPECIFIC PACKAGE RECOMMENDATIONS\n\n### Bioinformatics Tools\n- **Biopython**: Sequence analysis, molecular calculations, file format handling\n- **scipy.stats**: Statistical analysis for biological data\n- **pandas**: Data manipulation and analysis\n- **numpy**: Numerical computations\n- **matplotlib/seaborn**: Data visualization\n\n### Mathematical Tools\n- **numpy**: Numerical methods, linear algebra, optimization\n- **scipy**: Scientific computing, optimization, signal processing\n- **sympy**: Symbolic mathematics\n- **pandas**: Data analysis and statistics\n- **matplotlib**: Mathematical plotting\n\n### Data Science Tools\n- **pandas**: Data manipulation and analysis\n- **numpy**: Numerical computations\n- **scipy**: Statistical methods and optimization\n- **scikit-learn**: Machine learning algorithms\n- **matplotlib/seaborn**: Data visualization\n\n### Web API Tools\n- **requests**: HTTP handling and API calls\n- **urllib3**: HTTP client library\n- **aiohttp**: Asynchronous HTTP client/server\n- **httpx**: Modern HTTP client\n- **beautifulsoup4**: HTML/XML parsing\n\n### Text Processing Tools\n- **nltk**: Natural language processing\n- **spaCy**: Advanced NLP\n- **textblob**: Simple text processing\n- **re**: Regular expressions (built-in)\n- **difflib**: Sequence matching (built-in)\n\n### General Purpose\n- **pathlib**: File path operations (built-in)\n- **json**: JSON handling (built-in)\n- **csv**: CSV file handling (built-in)\n- **datetime**: Date/time operations (built-in)\n- **collections**: Advanced data structures (built-in)\n\n## PACKAGE EVALUATION CRITERIA\nWhen choosing packages, consider:\n1. **Popularity**: GitHub stars, PyPI downloads, community size\n2. **Maintenance**: Recent updates, active development\n3. **Documentation**: Quality and completeness of docs\n4. **Performance**: Speed and memory efficiency\n5. **Compatibility**: Python version support, dependencies\n6. **License**: Open source, commercial-friendly\n\n## IMPLEMENTATION APPROACH\n1. **Research Phase**: Identify 2-3 candidate packages for the problem\n2. **Evaluation Phase**: Compare packages based on criteria above\n3. **Selection Phase**: Choose the best package and document why\n4. **Implementation Phase**: Use the selected package with proper error handling\n5. **Fallback Phase**: If package fails, implement minimal custom solution\n\n## RESPONSE FORMAT\nReturn a comprehensive JSON object:\n\n```json\n{\n \"implementation\": {\n \"source_code\": \"<complete Python function implementation>\",\n \"dependencies\": [\"<required packages>\"],\n \"imports\": [\"<import statements>\"],\n \"algorithm_description\": \"<explanation of chosen algorithm/approach>\",\n \"complexity\": \"<time/space complexity analysis>\",\n \"test_cases\": [\n {\n \"input\": \"<test input>\",\n \"expected_output\": \"<expected result>\",\n \"description\": \"<what this test validates>\"\n }\n ],\n \"package_justification\": \"<explanation of why specific packages were chosen>\",\n \"alternative_packages\": [\"<other packages considered but not chosen>\"]\n },\n \"quality_metrics\": {\n \"estimated_accuracy\": \"<accuracy assessment>\",\n \"performance_characteristics\": \"<performance description>\",\n \"robustness_level\": \"<error handling assessment>\"\n },\n \"documentation\": {\n \"usage_examples\": [\"<code examples>\"],\n \"parameter_explanations\": \"<detailed parameter descriptions>\",\n \"return_format\": \"<description of return value structure>\"\n }\n}\n```\n\n## FINAL REMINDER\n**ALWAYS prefer using existing, well-maintained packages over custom implementations.** Only write custom code when absolutely necessary. Your goal is to create robust, maintainable tools that leverage the Python ecosystem's strengths.",
822
+ "input_arguments": [
823
+ "tool_description",
824
+ "tool_parameters",
825
+ "domain",
826
+ "complexity_level",
827
+ "performance_requirements"
828
+ ],
829
+ "parameter": {
830
+ "type": "object",
831
+ "properties": {
832
+ "tool_description": {
833
+ "type": "string",
834
+ "description": "Detailed description of what the tool should accomplish",
835
+ "required": true
836
+ },
837
+ "tool_parameters": {
838
+ "type": "string",
839
+ "description": "JSON string of parameter schema for the tool",
840
+ "required": true
841
+ },
842
+ "domain": {
843
+ "type": "string",
844
+ "description": "Domain area for specialized implementation",
845
+ "enum": ["bioinformatics", "data-science", "web-api", "mathematical", "text-processing", "general"],
846
+ "default": "general",
847
+ "required": false
848
+ },
849
+ "complexity_level": {
850
+ "type": "string",
851
+ "enum": ["basic", "intermediate", "advanced"],
852
+ "description": "Desired complexity level of implementation",
853
+ "default": "intermediate",
854
+ "required": false
855
+ },
856
+ "performance_requirements": {
857
+ "type": "string",
858
+ "description": "Performance requirements or constraints",
859
+ "required": false
860
+ }
861
+ },
862
+ "required": ["tool_description", "tool_parameters"]
863
+ },
864
+ "configs": {
865
+ "api_type": "CHATGPT",
866
+ "model_id": "o4-mini-0416",
867
+ "temperature": 0.2,
868
+ "max_new_tokens": 50000,
869
+ "return_json": true
870
+ }
871
+ },
872
+ {
873
+ "type": "AgenticTool",
874
+ "name": "ToolSpecificationOptimizer",
875
+ "description": "Optimizes tool specifications for clarity, completeness, and usability with comprehensive benchmarking against similar tools",
876
+ "prompt": "You are an expert in tool design and user experience optimization. Analyze and optimize the provided tool specification for maximum effectiveness.\n\n## TOOL SPECIFICATION TO OPTIMIZE\nCurrent Configuration:\n{tool_config}\n\nOptimization Focus: {optimization_focus}\nTarget Audience: {target_audience}\n\nSimilar Tools for Benchmarking:\n{similar_tools}\n\n## OPTIMIZATION FRAMEWORK\n\n### 1. CLARITY ASSESSMENT\n- Description comprehensiveness and clarity\n- Parameter naming and documentation\n- Usage examples and guidance\n- Error message quality\n\n### 2. COMPLETENESS EVALUATION\n- Required vs optional parameters\n- Edge case handling\n- Output format specification\n- Documentation coverage\n\n### 3. USABILITY ANALYSIS\n- Ease of understanding for target audience\n- Parameter complexity and defaults\n- Error prevention design\n- Workflow integration\n\n### 4. PERFORMANCE OPTIMIZATION\n- Parameter validation efficiency\n- Resource usage considerations\n- Scalability factors\n- Response time optimization\n\n### 5. COMPARATIVE BENCHMARKING\n- Analyze similar tools for best practices\n- Identify competitive advantages\n- Address common user pain points\n- Incorporate proven design patterns\n\n## OPTIMIZATION PROCESS\n1. Evaluate current specification quality\n2. Identify improvement opportunities\n3. Benchmark against similar tools\n4. Generate optimized version\n5. Provide detailed rationale for changes\n\n## RESPONSE FORMAT\nReturn a comprehensive optimization report:\n\n```json\n{\n \"optimized_config\": {\n \"name\": \"<optimized name>\",\n \"description\": \"<improved description>\",\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": \"<optimized parameters>\",\n \"required\": \"<updated required fields>\"\n },\n \"examples\": [\"<usage examples>\"],\n \"metadata\": {\n \"tags\": [\"<relevant tags>\"],\n \"difficulty_level\": \"<user difficulty>\",\n \"estimated_execution_time\": \"<typical runtime>\"\n }\n },\n \"improvements\": [\n {\n \"area\": \"<improvement area>\",\n \"change\": \"<what was changed>\",\n \"rationale\": \"<why this improves the tool>\",\n \"impact\": \"<expected user impact>\"\n }\n ],\n \"quality_score\": {\n \"before\": \"<0-10>\",\n \"after\": \"<0-10>\",\n \"improvement\": \"<difference>\"\n },\n \"recommendations\": [\n {\n \"type\": \"enhancement|fix|optimization\",\n \"description\": \"<recommendation>\",\n \"priority\": \"high|medium|low\"\n }\n ]\n}\n```",
877
+ "input_arguments": [
878
+ "tool_config",
879
+ "optimization_focus",
880
+ "target_audience",
881
+ "similar_tools"
882
+ ],
883
+ "parameter": {
884
+ "type": "object",
885
+ "properties": {
886
+ "tool_config": {
887
+ "type": "string",
888
+ "description": "JSON string of current tool configuration to optimize",
889
+ "required": true
890
+ },
891
+ "optimization_focus": {
892
+ "type": "string",
893
+ "enum": ["clarity", "completeness", "usability", "performance", "all"],
894
+ "description": "Primary optimization focus",
895
+ "default": "all",
896
+ "required": false
897
+ },
898
+ "target_audience": {
899
+ "type": "string",
900
+ "enum": ["beginner", "intermediate", "expert", "mixed"],
901
+ "description": "Target user expertise level",
902
+ "default": "mixed",
903
+ "required": false
904
+ },
905
+ "similar_tools": {
906
+ "type": "string",
907
+ "description": "JSON string array of similar tools for comparison and benchmarking",
908
+ "required": false
909
+ }
910
+ },
911
+ "required": ["tool_config"]
912
+ },
913
+ "configs": {
914
+ "api_type": "CHATGPT",
915
+ "model_id": "o4-mini-0416",
916
+ "temperature": 0.3,
917
+ "max_new_tokens": 3500,
918
+ "return_json": true
919
+ }
920
+ },
921
+ {
922
+ "type": "AgenticTool",
923
+ "name": "DomainExpertValidator",
924
+ "description": "Provides domain-specific validation and expert recommendations for tools with deep expertise across scientific and technical domains",
925
+ "prompt": "You are a domain expert with deep knowledge in {domain}. Validate the tool from a domain-specific perspective and provide expert recommendations.\n\n## TOOL TO VALIDATE\nTool Configuration:\n{tool_config}\n\nValidation Aspects: {validation_aspects}\n\nImplementation Code (if available):\n```python\n{implementation_code}\n```\n\n## DOMAIN EXPERTISE VALIDATION FRAMEWORK\n\n### 1. SCIENTIFIC ACCURACY\n- Verify methods and algorithms are scientifically sound\n- Check mathematical/statistical correctness\n- Validate against established standards\n- Assess theoretical foundations\n\n### 2. BEST PRACTICES COMPLIANCE\n- Domain-specific best practices adherence\n- Industry standard compliance\n- Professional guidelines following\n- Ethical considerations\n\n### 3. STANDARDS COMPLIANCE\n- Relevant format standards (e.g., FASTA, JSON, XML)\n- API standards and protocols\n- Data interchange formats\n- Certification requirements\n\n### 4. METHODOLOGY ASSESSMENT\n- Algorithm selection appropriateness\n- Computational approach evaluation\n- Scalability and efficiency\n- Error handling robustness\n\n### 5. SAFETY AND ETHICS\n- Safety considerations and risk assessment\n- Ethical implications\n- Privacy and security concerns\n- Regulatory compliance\n\n### 6. PERFORMANCE EVALUATION\n- Computational efficiency for domain\n- Resource requirements assessment\n- Accuracy and precision expectations\n- Scalability considerations\n\n## DOMAIN-SPECIFIC VALIDATION CRITERIA\n\n### Bioinformatics/Computational Biology\n- Sequence format handling (FASTA, GenBank, etc.)\n- Algorithm complexity for biological data\n- Database integration standards\n- Biological validity of results\n\n### Data Science/Machine Learning\n- Statistical method validity\n- Data preprocessing requirements\n- Model validation approaches\n- Performance metric appropriateness\n\n### Mathematics/Physics\n- Numerical stability and precision\n- Mathematical correctness\n- Physical law compliance\n- Convergence criteria\n\n### Chemistry\n- Molecular representation standards\n- Chemical safety considerations\n- Reaction mechanism validity\n- Thermodynamic consistency\n\n### Web Development\n- Security best practices\n- API design standards\n- Scalability requirements\n- Performance optimization\n\n## RESPONSE FORMAT\nProvide comprehensive domain expert analysis:\n\n```json\n{\n \"validation_results\": {\n \"overall_validity\": \"<0-10>\",\n \"domain_appropriateness\": \"<0-10>\",\n \"methodology_score\": \"<0-10>\",\n \"standards_compliance\": \"<0-10>\"\n },\n \"expert_analysis\": {\n \"strengths\": [\"<domain-specific strengths>\"],\n \"concerns\": [\n {\n \"severity\": \"low|medium|high|critical\",\n \"area\": \"<concern area>\",\n \"description\": \"<detailed concern>\",\n \"impact\": \"<potential impact>\",\n \"recommendation\": \"<how to address>\"\n }\n ],\n \"missing_considerations\": [\"<important missing aspects>\"]\n },\n \"domain_recommendations\": [\n {\n \"type\": \"algorithm|library|approach|validation|standard\",\n \"recommendation\": \"<specific recommendation>\",\n \"rationale\": \"<domain expertise rationale>\",\n \"priority\": \"high|medium|low\",\n \"implementation_hint\": \"<how to implement>\"\n }\n ],\n \"compliance_check\": {\n \"standards_followed\": [\"<relevant standards>\"],\n \"standards_violations\": [\"<violations found>\"],\n \"certification_notes\": \"<certification/validation notes>\"\n }\n}\n```",
926
+ "input_arguments": [
927
+ "tool_config",
928
+ "domain",
929
+ "validation_aspects",
930
+ "implementation_code"
931
+ ],
932
+ "parameter": {
933
+ "type": "object",
934
+ "properties": {
935
+ "tool_config": {
936
+ "type": "string",
937
+ "description": "JSON string of tool configuration to validate",
938
+ "required": true
939
+ },
940
+ "domain": {
941
+ "type": "string",
942
+ "description": "Domain expertise area for validation",
943
+ "enum": ["bioinformatics", "computational-biology", "data-science", "machine-learning", "web-development", "mathematics", "chemistry", "physics", "general"],
944
+ "required": true
945
+ },
946
+ "validation_aspects": {
947
+ "type": "string",
948
+ "description": "JSON array string of specific aspects to validate",
949
+ "default": "[\"accuracy\", \"methodology\", \"best-practices\"]",
950
+ "required": false
951
+ },
952
+ "implementation_code": {
953
+ "type": "string",
954
+ "description": "Implementation code to validate (optional)",
955
+ "required": false
956
+ }
957
+ },
958
+ "required": ["tool_config", "domain"]
959
+ },
960
+ "configs": {
961
+ "api_type": "CHATGPT",
962
+ "model_id": "o4-mini-0416",
963
+ "temperature": 0.1,
964
+ "max_new_tokens": 3500,
965
+ "return_json": true
966
+ }
967
+ },
968
+ {
969
+ "type": "AgenticTool",
970
+ "name": "ToolQualityEvaluator",
971
+ "description": "Evaluates the quality of tool configurations and implementations. Provides detailed scoring and feedback for improvement.",
972
+ "prompt": "You are a senior software architect. Evaluate this tool for quality and completeness:\n\nTool Configuration: {tool_config}\nTest Cases: {test_cases}\nEvaluation Aspects: {evaluation_aspects}\n\nProvide a comprehensive evaluation in this JSON format:\n{\n \"overall_score\": 8.5,\n \"scores\": {\n \"functionality\": 9.0,\n \"usability\": 8.0,\n \"completeness\": 8.5,\n \"best_practices\": 8.0\n },\n \"feedback\": {\n \"strengths\": [\"Clear parameter definitions\", \"Good error handling\"],\n \"weaknesses\": [\"Missing input validation\", \"Limited test coverage\"],\n \"suggestions\": [\"Add parameter validation\", \"Include more edge case tests\"]\n },\n \"quality_rating\": \"Excellent|Good|Fair|Poor\"\n}\n\nEvaluate based on:\n1. Parameter design quality\n2. Implementation robustness\n3. Error handling completeness\n4. Documentation clarity\n5. Test coverage adequacy\n6. Code quality and best practices\n7. Usability and user experience\n\nProvide specific, actionable feedback.",
973
+ "input_arguments": [
974
+ "tool_config",
975
+ "test_cases",
976
+ "evaluation_aspects"
977
+ ],
978
+ "parameter": {
979
+ "type": "object",
980
+ "properties": {
981
+ "tool_config": {
982
+ "type": "string",
983
+ "description": "JSON string of the tool configuration"
984
+ },
985
+ "test_cases": {
986
+ "type": "string",
987
+ "description": "JSON string of test cases"
988
+ },
989
+ "evaluation_aspects": {
990
+ "type": "array",
991
+ "description": "Aspects to evaluate (functionality, usability, completeness, best_practices)"
992
+ }
993
+ },
994
+ "required": ["tool_config"]
995
+ },
996
+ "configs": {
997
+ "api_type": "CHATGPT",
998
+ "model_id": "o4-mini-0416",
999
+ "temperature": 0.2,
1000
+ "max_new_tokens": 2048,
1001
+ "return_json": true
1002
+ }
1003
+ },
1004
+ {
1005
+ "type": "AgenticTool",
1006
+ "name": "ToolOptimizer",
1007
+ "description": "Optimizes tool configurations based on quality feedback. Improves tool specifications and implementations to address identified issues.",
1008
+ "prompt": "You are an expert tool optimizer. Improve this tool based on the quality feedback:\n\nOriginal Tool Configuration: {tool_config}\nQuality Feedback: {quality_feedback}\nOptimization Target: {optimization_target}\n\nGenerate an optimized version in this JSON format:\n{\n \"optimized_tool\": {\n \"name\": \"improved_tool_name\",\n \"type\": \"tool_type\",\n \"description\": \"enhanced_description\",\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": {},\n \"required\": []\n },\n \"category\": \"category\",\n \"implementation\": {\n \"source_code\": \"improved_code\",\n \"dependencies\": [],\n \"main_function\": \"execute_tool\"\n }\n },\n \"improvements_made\": [\n \"Added input validation\",\n \"Enhanced error handling\",\n \"Improved parameter descriptions\"\n ],\n \"optimization_notes\": \"Summary of key improvements\"\n}\n\nFocus on:\n1. Fixing identified weaknesses\n2. Enhancing parameter validation\n3. Improving error handling\n4. Adding missing functionality\n5. Optimizing for the specified target\n6. Maintaining backward compatibility where possible",
1009
+ "input_arguments": [
1010
+ "tool_config",
1011
+ "quality_feedback",
1012
+ "optimization_target"
1013
+ ],
1014
+ "parameter": {
1015
+ "type": "object",
1016
+ "properties": {
1017
+ "tool_config": {
1018
+ "type": "string",
1019
+ "description": "JSON string of the original tool configuration"
1020
+ },
1021
+ "quality_feedback": {
1022
+ "type": "string",
1023
+ "description": "JSON string of quality evaluation feedback"
1024
+ },
1025
+ "optimization_target": {
1026
+ "type": "string",
1027
+ "description": "What to optimize for (improve_quality, enhance_performance, etc.)"
1028
+ }
1029
+ },
1030
+ "required": ["tool_config", "quality_feedback"]
1031
+ },
1032
+ "configs": {
1033
+ "api_type": "CHATGPT",
1034
+ "model_id": "o4-mini-0416",
1035
+ "temperature": 0.3,
1036
+ "max_new_tokens": 3072,
1037
+ "return_json": true
1038
+ }
1039
+ },
1040
+ {
1041
+ "type": "AgenticTool",
1042
+ "name": "CodeOptimizer",
1043
+ "description": "Optimizes code implementation for tools based on quality evaluation. Takes tool configuration and quality evaluation results to produce improved source code.",
1044
+ "prompt": "You are an expert software engineer specializing in code optimization. Your task is to optimize the implementation code of a tool based on the provided configuration and quality evaluation.\n\n## TOOL CONFIGURATION\n{tool_config}\n\n## QUALITY EVALUATION\n{quality_evaluation}\n\n## OPTIMIZATION INSTRUCTIONS\nAnalyze the current implementation and quality evaluation feedback, then provide an optimized version that addresses the identified issues. Focus on:\n1. **Code Quality**: Improve readability, structure, and maintainability\n2. **Error Handling**: Enhance exception handling, input validation, and robustness\n3. **Performance**: Optimize algorithms, reduce complexity, improve efficiency\n4. **Best Practices**: Follow Python standards, proper documentation, and clean code principles\n\nReturn a JSON object with the following structure:\n```json\n{\n \"implementation\": {\n \"source_code\": \"<optimized_python_code>\",\n \"dependencies\": [\"<required_packages>\"],\n \"imports\": [\"<import_statements>\"],\n \"improvements_made\": [\"list of specific improvements based on quality evaluation feedback\"],\n \"addressed_issues\": [\"list of issues that were fixed\"],\n \"quality_improvements\": \"summary of overall quality improvements\"\n }\n}\n```\n\nEnsure the optimized code:\n- Maintains the same functionality as the original\n- Addresses the specific issues mentioned in quality evaluation feedback\n- Includes proper error handling and input validation\n- Follows Python best practices and PEP 8 standards\n- Is well-documented with clear docstrings and comments",
1045
+ "input_arguments": [
1046
+ "tool_config",
1047
+ "quality_evaluation"
1048
+ ],
1049
+ "parameter": {
1050
+ "type": "object",
1051
+ "properties": {
1052
+ "tool_config": {
1053
+ "type": "string",
1054
+ "description": "JSON string containing the complete tool configuration including current implementation",
1055
+ "required": true
1056
+ },
1057
+ "quality_evaluation": {
1058
+ "type": "string",
1059
+ "description": "JSON string containing quality evaluation results and feedback",
1060
+ "required": true
1061
+ }
1062
+ },
1063
+ "required": [
1064
+ "tool_config",
1065
+ "quality_evaluation"
1066
+ ]
1067
+ },
1068
+ "configs": {
1069
+ "api_type": "CHATGPT",
1070
+ "model_id": "o4-mini-0416",
1071
+ "temperature": 0.2,
1072
+ "max_new_tokens": 4096,
1073
+ "return_json": true
1074
+ }
1075
+ },
1076
+ {
1077
+ "type": "AgenticTool",
1078
+ "name": "LabelGenerator",
1079
+ "description": "Generates relevant keyword labels for tools based on their name, description, parameters, and category. Creates a comprehensive list of tags for tool discovery and categorization.",
1080
+ "prompt": "You are an expert in tool categorization and keyword generation. Your task is to generate relevant, descriptive labels/keywords for a tool that will help with discovery and organization.\n\n## TOOL INFORMATION\n- **Tool Name**: {tool_name}\n- **Description**: {tool_description}\n- **Parameters**: {tool_parameters}\n- **Category**: {category}\n\n## LABEL GENERATION INSTRUCTIONS\nAnalyze the tool information and generate a comprehensive list of relevant labels/keywords that:\n\n1. **Capture Tool Functionality**: What the tool does, its purpose\n2. **Describe Input/Output Types**: Data types, formats, domains\n3. **Indicate Use Cases**: When and where this tool would be useful\n4. **Reference Technical Domains**: Scientific fields, technologies, methodologies\n5. **Include Semantic Variations**: Synonyms, related terms, alternative descriptions\n\n## LABELING GUIDELINES\n- Generate 8-15 relevant labels\n- Use lowercase, hyphenated format (e.g., 'protein-analysis', 'molecular-weight')\n- Include both specific and general terms\n- Avoid overly generic terms like 'tool', 'utility', 'helper'\n- Include domain-specific terminology when applicable\n- Consider both technical and user-friendly terms\n\n## EXAMPLES\nFor a protein sequence analyzer:\n- Technical: ['protein-analysis', 'sequence-processing', 'bioinformatics', 'amino-acid-composition']\n- Functional: ['molecular-biology', 'protein-characterization', 'structural-analysis']\n- Use-case: ['research-tool', 'computational-biology', 'biochemistry']\n\nReturn a JSON object with the following structure:\n```json\n{\n \"labels\": [\"keyword1\", \"keyword2\", \"keyword3\", ...],\n \"rationale\": \"Brief explanation of label selection strategy\"\n}\n```",
1081
+ "input_arguments": [
1082
+ "tool_name",
1083
+ "tool_description",
1084
+ "tool_parameters",
1085
+ "category"
1086
+ ],
1087
+ "parameter": {
1088
+ "type": "object",
1089
+ "properties": {
1090
+ "tool_name": {
1091
+ "type": "string",
1092
+ "description": "The name of the tool",
1093
+ "required": true
1094
+ },
1095
+ "tool_description": {
1096
+ "type": "string",
1097
+ "description": "Detailed description of what the tool does",
1098
+ "required": true
1099
+ },
1100
+ "tool_parameters": {
1101
+ "type": "string",
1102
+ "description": "JSON string describing the tool's input parameters and their types",
1103
+ "required": true
1104
+ },
1105
+ "category": {
1106
+ "type": "string",
1107
+ "description": "The general category or domain the tool belongs to",
1108
+ "required": true
1109
+ }
1110
+ },
1111
+ "required": [
1112
+ "tool_name",
1113
+ "tool_description",
1114
+ "tool_parameters",
1115
+ "category"
1116
+ ]
1117
+ },
1118
+ "configs": {
1119
+ "api_type": "CHATGPT",
1120
+ "model_id": "o4-mini-0416",
1121
+ "temperature": 0.7,
1122
+ "max_new_tokens": 2048,
1123
+ "return_json": true
1124
+ }
1125
+ },
1126
+ {
1127
+ "type": "AgenticTool",
1128
+ "name": "call_agentic_human",
1129
+ "description": "Produces a concise, practical answer that emulates how a well-informed human would respond to the question.",
1130
+ "prompt": "You are not an AI. You are a knowledgeable human expert speaking naturally. Read the user's question carefully and respond as a thoughtful person would, giving a clear, direct, and context-aware answer. Do not mention AI, ChatGPT, or models. If the question involves uncertainty or opinion, share a reasoned perspective in a human tone.\n\nQuestion: {question}\n\nAnswer:",
1131
+ "input_arguments": [
1132
+ "question"
1133
+ ],
1134
+ "parameter": {
1135
+ "type": "object",
1136
+ "properties": {
1137
+ "question": {
1138
+ "type": "string",
1139
+ "description": "The user's question to be answered in a human-like manner.",
1140
+ "required": true
1141
+ }
1142
+ },
1143
+ "required": [
1144
+ "question"
1145
+ ]
1146
+ },
1147
+ "configs": {
1148
+ "api_type": "CHATGPT",
1149
+ "model_id": "o4-mini-0416",
1150
+ "temperature": 0.7,
1151
+ "max_new_tokens": 1024,
1152
+ "return_json": false
1153
+ }
1154
+ }
1155
+
1156
+ ]