tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/admetai_tool.py +1 -1
- tooluniverse/agentic_tool.py +65 -17
- tooluniverse/base_tool.py +19 -8
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +3 -3
- tooluniverse/cache/result_cache_manager.py +167 -12
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
- tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
- tooluniverse/compose_scripts/output_summarizer.py +4 -4
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
- tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
- tooluniverse/compose_tool.py +9 -9
- tooluniverse/core_tool.py +2 -2
- tooluniverse/ctg_tool.py +4 -4
- tooluniverse/custom_tool.py +1 -1
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dataset_tool.py +2 -2
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +36 -3
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/enrichr_tool.py +14 -14
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +594 -29
- tooluniverse/extended_hooks.py +4 -4
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/gene_ontology_tool.py +1 -1
- tooluniverse/generate_tools.py +3 -3
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/humanbase_tool.py +10 -10
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/logging_config.py +2 -2
- tooluniverse/mcp_client_tool.py +57 -129
- tooluniverse/mcp_integration.py +52 -49
- tooluniverse/mcp_tool_registry.py +147 -528
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/openalex_tool.py +8 -8
- tooluniverse/openfda_tool.py +2 -2
- tooluniverse/output_hook.py +15 -15
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/package_tool.py +1 -1
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pmc_tool.py +2 -2
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
- tooluniverse/remote/immune_compass/compass_tool.py +3 -3
- tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
- tooluniverse/remote_tool.py +4 -4
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/scripts/filter_tool_files.py +2 -2
- tooluniverse/smcp.py +93 -12
- tooluniverse/smcp_server.py +100 -21
- tooluniverse/space/__init__.py +46 -0
- tooluniverse/space/loader.py +133 -0
- tooluniverse/space/validator.py +353 -0
- tooluniverse/tool_finder_embedding.py +5 -3
- tooluniverse/tool_finder_keyword.py +12 -10
- tooluniverse/tool_finder_llm.py +12 -8
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/_shared_client.py +3 -3
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +19 -1
- tooluniverse/uspto_tool.py +1 -1
- tooluniverse/utils.py +12 -12
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1343 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"type": "AgenticTool",
|
|
4
|
+
"name": "CodeQualityAnalyzer",
|
|
5
|
+
"description": "Analyzes code quality from multiple dimensions including algorithmic correctness, functional implementation capability, performance characteristics, and best practices. Provides detailed feedback and improvement suggestions.",
|
|
6
|
+
"prompt": "You are an expert software engineer and code quality analyst. Please analyze the following code implementation and provide comprehensive quality assessment.\n\n## CODE TO ANALYZE\nTool Name: {tool_name}\nTool Description: {tool_description}\nTool Parameters: {tool_parameters}\nImplementation Code: {implementation_code}\nTest Cases: {test_cases}\nTest Execution Results: {test_execution_results}\n\n## ANALYSIS REQUIREMENTS\nPlease provide a comprehensive analysis covering the following dimensions:\n\n### 1. ALGORITHMIC CORRECTNESS (0-10)\n- Mathematical accuracy and logical correctness\n- Algorithm efficiency and time/space complexity\n- Edge case handling and boundary conditions\n- Error propagation and numerical stability\n- Correctness of domain-specific calculations\n\n### 2. FUNCTIONAL IMPLEMENTATION CAPABILITY (0-10)\n- Completeness of required functionality\n- Parameter validation and input handling\n- Return value accuracy and format consistency\n- Integration with external libraries/APIs\n- Feature completeness vs. requirements\n\n### 3. PERFORMANCE CHARACTERISTICS (0-10)\n- Time complexity analysis\n- Space complexity analysis\n- Memory usage optimization\n- Computational efficiency\n- Scalability considerations\n\n### 4. CODE QUALITY AND STRUCTURE (0-10)\n- Code readability and maintainability\n- Function and variable naming\n- Code organization and modularity\n- Documentation quality\n- Adherence to coding standards\n\n### 5. ERROR HANDLING AND ROBUSTNESS (0-10)\n- Exception handling coverage\n- Input validation robustness\n- Error message clarity and usefulness\n- Graceful degradation strategies\n- Recovery mechanisms\n\n### 6. TESTING AND VALIDATION (0-10)\n- Test coverage completeness\n- Test case quality and relevance\n- Edge case testing\n- Performance testing\n- Integration testing considerations\n- **IMPORTANT**: When test_execution_results are provided, use them to validate actual code behavior and adjust scoring accordingly\n\n### 7. SECURITY AND SAFETY (0-10)\n- Input sanitization and validation\n- Resource usage limits\n- Access control considerations\n- Data privacy protection\n- Security best practices\n\n### 8. MAINTAINABILITY AND EXTENSIBILITY (0-10)\n- Code modularity and reusability\n- Configuration flexibility\n- Future enhancement readiness\n- Dependency management\n- Technical debt assessment\n\n## TEST EXECUTION ANALYSIS\nWhen test_execution_results are provided:\n- Analyze actual test outcomes vs. expected results\n- Identify discrepancies between code behavior and test expectations\n- Use real execution data to validate code correctness\n- Adjust quality scores based on actual performance\n- Provide specific feedback on test failures and their implications\n\n## OUTPUT FORMAT\nProvide your analysis in the following JSON format:\n\n{\n \"overall_score\": <0-10>,\n \"scores\": {\n \"algorithmic_correctness\": <0-10>,\n \"functional_capability\": <0-10>,\n \"performance\": <0-10>,\n \"code_quality\": <0-10>,\n \"error_handling\": <0-10>,\n \"testing\": <0-10>,\n \"security\": <0-10>,\n \"maintainability\": <0-10>\n },\n \"feedback\": {\n \"strengths\": [\"list of code strengths\"],\n \"weaknesses\": [\"list of specific weaknesses\"],\n \"critical_issues\": [\"list of critical issues that must be fixed\"],\n \"improvement_opportunities\": [\"list of areas for improvement\"]\n },\n \"algorithm_analysis\": {\n \"complexity\": \"O(n) analysis\",\n \"correctness_verification\": \"mathematical verification details\",\n \"edge_cases\": \"identified edge cases\",\n \"numerical_stability\": \"numerical computation stability assessment\"\n },\n \"functional_verification\": {\n \"requirements_coverage\": \"percentage of requirements covered\",\n \"missing_features\": [\"list of missing features\"],\n \"integration_points\": [\"external dependencies and integration points\"],\n \"api_consistency\": \"API design consistency assessment\"\n },\n \"test_execution_analysis\": {\n \"test_results_summary\": \"summary of test execution outcomes\",\n \"pass_rate\": \"percentage of tests passed\",\n \"failed_tests\": [\"list of failed tests with reasons\"],\n \"actual_vs_expected\": \"analysis of actual vs expected behavior\"\n },\n \"recommendations\": [\n {\n \"priority\": \"high|medium|low\",\n \"category\": \"algorithm|functionality|performance|quality|security\",\n \"description\": \"specific improvement description\",\n \"action\": \"concrete action to take\",\n \"expected_impact\": \"expected improvement impact\"\n }\n ]\n}\n\n## ANALYSIS GUIDELINES\n- Be thorough and objective in your assessment\n- Provide specific examples from the code when possible\n- Focus on actionable feedback and concrete improvements\n- Consider both immediate fixes and long-term improvements\n- Evaluate code from both technical and business perspectives\n- Provide evidence-based scoring with clear justification\n- **When test_execution_results are available, prioritize actual behavior over theoretical analysis**",
|
|
7
|
+
"input_arguments": [
|
|
8
|
+
"tool_name",
|
|
9
|
+
"tool_description",
|
|
10
|
+
"tool_parameters",
|
|
11
|
+
"implementation_code",
|
|
12
|
+
"test_cases",
|
|
13
|
+
"test_execution_results"
|
|
14
|
+
],
|
|
15
|
+
"parameter": {
|
|
16
|
+
"type": "object",
|
|
17
|
+
"properties": {
|
|
18
|
+
"tool_name": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Name of the tool being analyzed"
|
|
21
|
+
},
|
|
22
|
+
"tool_description": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "Description of what the tool is supposed to do"
|
|
25
|
+
},
|
|
26
|
+
"tool_parameters": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "JSON string of tool parameters and their types"
|
|
29
|
+
},
|
|
30
|
+
"implementation_code": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "The actual implementation code to analyze"
|
|
33
|
+
},
|
|
34
|
+
"test_cases": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"description": "JSON string of test cases for the tool"
|
|
37
|
+
},
|
|
38
|
+
"test_execution_results": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"description": "JSON string of test execution results including pass/fail status and actual outputs"
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"required": [
|
|
44
|
+
"tool_name",
|
|
45
|
+
"tool_description",
|
|
46
|
+
"tool_parameters",
|
|
47
|
+
"implementation_code",
|
|
48
|
+
"test_cases"
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
"configs": {
|
|
52
|
+
"api_type": "CHATGPT",
|
|
53
|
+
"model_id": "gpt-5",
|
|
54
|
+
"temperature": 1.0,
|
|
55
|
+
"max_new_tokens": 8192,
|
|
56
|
+
"return_json": true
|
|
57
|
+
},
|
|
58
|
+
"return_schema": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"overall_score": {
|
|
62
|
+
"type": "number",
|
|
63
|
+
"description": "Overall quality score from 0-10"
|
|
64
|
+
},
|
|
65
|
+
"scores": {
|
|
66
|
+
"type": "object",
|
|
67
|
+
"description": "Detailed scores for each quality dimension with sub-scores and issues",
|
|
68
|
+
"properties": {
|
|
69
|
+
"functionality": {
|
|
70
|
+
"type": "object",
|
|
71
|
+
"properties": {
|
|
72
|
+
"score": {
|
|
73
|
+
"type": "number"
|
|
74
|
+
},
|
|
75
|
+
"details": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"properties": {
|
|
78
|
+
"correctness": {
|
|
79
|
+
"type": "number"
|
|
80
|
+
},
|
|
81
|
+
"completeness": {
|
|
82
|
+
"type": "number"
|
|
83
|
+
},
|
|
84
|
+
"algorithm_choice": {
|
|
85
|
+
"type": "number"
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
"issues": {
|
|
90
|
+
"type": "array",
|
|
91
|
+
"items": {
|
|
92
|
+
"type": "string"
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"code_quality": {
|
|
98
|
+
"type": "object",
|
|
99
|
+
"properties": {
|
|
100
|
+
"score": {
|
|
101
|
+
"type": "number"
|
|
102
|
+
},
|
|
103
|
+
"details": {
|
|
104
|
+
"type": "object",
|
|
105
|
+
"properties": {
|
|
106
|
+
"simplicity": {
|
|
107
|
+
"type": "number"
|
|
108
|
+
},
|
|
109
|
+
"readability": {
|
|
110
|
+
"type": "number"
|
|
111
|
+
},
|
|
112
|
+
"organization": {
|
|
113
|
+
"type": "number"
|
|
114
|
+
},
|
|
115
|
+
"idiomaticity": {
|
|
116
|
+
"type": "number"
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
},
|
|
120
|
+
"issues": {
|
|
121
|
+
"type": "array",
|
|
122
|
+
"items": {
|
|
123
|
+
"type": "string"
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
"performance": {
|
|
129
|
+
"type": "object",
|
|
130
|
+
"properties": {
|
|
131
|
+
"score": {
|
|
132
|
+
"type": "number"
|
|
133
|
+
},
|
|
134
|
+
"details": {
|
|
135
|
+
"type": "object",
|
|
136
|
+
"properties": {
|
|
137
|
+
"time_efficiency": {
|
|
138
|
+
"type": "number"
|
|
139
|
+
},
|
|
140
|
+
"space_efficiency": {
|
|
141
|
+
"type": "number"
|
|
142
|
+
},
|
|
143
|
+
"algorithm_optimality": {
|
|
144
|
+
"type": "number"
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
},
|
|
148
|
+
"issues": {
|
|
149
|
+
"type": "array",
|
|
150
|
+
"items": {
|
|
151
|
+
"type": "string"
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
"stability": {
|
|
157
|
+
"type": "object",
|
|
158
|
+
"properties": {
|
|
159
|
+
"score": {
|
|
160
|
+
"type": "number"
|
|
161
|
+
},
|
|
162
|
+
"details": {
|
|
163
|
+
"type": "object",
|
|
164
|
+
"properties": {
|
|
165
|
+
"error_handling": {
|
|
166
|
+
"type": "number"
|
|
167
|
+
},
|
|
168
|
+
"input_validation": {
|
|
169
|
+
"type": "number"
|
|
170
|
+
},
|
|
171
|
+
"edge_cases": {
|
|
172
|
+
"type": "number"
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
},
|
|
176
|
+
"issues": {
|
|
177
|
+
"type": "array",
|
|
178
|
+
"items": {
|
|
179
|
+
"type": "string"
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
"maintainability": {
|
|
185
|
+
"type": "object",
|
|
186
|
+
"properties": {
|
|
187
|
+
"score": {
|
|
188
|
+
"type": "number"
|
|
189
|
+
},
|
|
190
|
+
"details": {
|
|
191
|
+
"type": "object",
|
|
192
|
+
"properties": {
|
|
193
|
+
"documentation": {
|
|
194
|
+
"type": "number"
|
|
195
|
+
},
|
|
196
|
+
"modularity": {
|
|
197
|
+
"type": "number"
|
|
198
|
+
},
|
|
199
|
+
"testability": {
|
|
200
|
+
"type": "number"
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
"issues": {
|
|
205
|
+
"type": "array",
|
|
206
|
+
"items": {
|
|
207
|
+
"type": "string"
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
},
|
|
214
|
+
"improvement_suggestions": {
|
|
215
|
+
"type": "array",
|
|
216
|
+
"description": "Specific improvement suggestions with priorities",
|
|
217
|
+
"items": {
|
|
218
|
+
"type": "object",
|
|
219
|
+
"properties": {
|
|
220
|
+
"priority": {
|
|
221
|
+
"type": "string",
|
|
222
|
+
"enum": [
|
|
223
|
+
"high",
|
|
224
|
+
"medium",
|
|
225
|
+
"low"
|
|
226
|
+
]
|
|
227
|
+
},
|
|
228
|
+
"category": {
|
|
229
|
+
"type": "string",
|
|
230
|
+
"enum": [
|
|
231
|
+
"functionality",
|
|
232
|
+
"code_quality",
|
|
233
|
+
"performance",
|
|
234
|
+
"stability",
|
|
235
|
+
"maintainability"
|
|
236
|
+
]
|
|
237
|
+
},
|
|
238
|
+
"suggestion": {
|
|
239
|
+
"type": "string"
|
|
240
|
+
},
|
|
241
|
+
"location": {
|
|
242
|
+
"type": "string"
|
|
243
|
+
},
|
|
244
|
+
"impact": {
|
|
245
|
+
"type": "string"
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
"feedback": {
|
|
251
|
+
"type": "object",
|
|
252
|
+
"description": "Detailed feedback on code quality",
|
|
253
|
+
"properties": {
|
|
254
|
+
"strengths": {
|
|
255
|
+
"type": "array",
|
|
256
|
+
"items": {
|
|
257
|
+
"type": "string"
|
|
258
|
+
}
|
|
259
|
+
},
|
|
260
|
+
"weaknesses": {
|
|
261
|
+
"type": "array",
|
|
262
|
+
"items": {
|
|
263
|
+
"type": "string"
|
|
264
|
+
}
|
|
265
|
+
},
|
|
266
|
+
"critical_issues": {
|
|
267
|
+
"type": "array",
|
|
268
|
+
"items": {
|
|
269
|
+
"type": "string"
|
|
270
|
+
}
|
|
271
|
+
},
|
|
272
|
+
"improvement_opportunities": {
|
|
273
|
+
"type": "array",
|
|
274
|
+
"items": {
|
|
275
|
+
"type": "string"
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
"recommendations": {
|
|
281
|
+
"type": "array",
|
|
282
|
+
"description": "Specific improvement recommendations",
|
|
283
|
+
"items": {
|
|
284
|
+
"type": "object",
|
|
285
|
+
"properties": {
|
|
286
|
+
"priority": {
|
|
287
|
+
"type": "string",
|
|
288
|
+
"enum": [
|
|
289
|
+
"high",
|
|
290
|
+
"medium",
|
|
291
|
+
"low"
|
|
292
|
+
]
|
|
293
|
+
},
|
|
294
|
+
"category": {
|
|
295
|
+
"type": "string"
|
|
296
|
+
},
|
|
297
|
+
"description": {
|
|
298
|
+
"type": "string"
|
|
299
|
+
},
|
|
300
|
+
"action": {
|
|
301
|
+
"type": "string"
|
|
302
|
+
},
|
|
303
|
+
"expected_impact": {
|
|
304
|
+
"type": "string"
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
},
|
|
310
|
+
"required": [
|
|
311
|
+
"overall_score",
|
|
312
|
+
"scores",
|
|
313
|
+
"improvement_suggestions",
|
|
314
|
+
"feedback"
|
|
315
|
+
]
|
|
316
|
+
}
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
"type": "AgenticTool",
|
|
320
|
+
"name": "TestCaseGenerator",
|
|
321
|
+
"description": "Generates diverse and representative ToolUniverse tool call dictionaries for a given tool based on its parameter schema. Each tool call should be a JSON object with 'name' (the tool's name) and 'arguments' (a dict of input arguments), covering different parameter combinations, edge cases, and typical usage. Can generate targeted test cases based on previous optimization feedback.",
|
|
322
|
+
"prompt": "You are an expert software tester. Generate 3-5 diverse ToolUniverse tool call dictionaries for the given tool configuration. Each tool call must be a JSON object with 'name' (tool name) and 'arguments' (input parameters).\n\nFEEDBACK-DRIVEN GENERATION:\nIf tool_config contains '_optimization_feedback' and '_iteration', generate targeted test cases addressing the specific issues mentioned in the feedback. Focus on edge cases, parameter combinations, or usage patterns that need better coverage.\n\nSTANDARD GENERATION:\nCover typical usage, edge cases, and boundary conditions when possible.\n\nTool configuration: {tool_config}\n\nReturn a JSON object with key 'test_cases' containing an array of test case objects. Example format:\n{\"test_cases\": [{\"name\":\"tool_name_with_underscores\",\"arguments\":{\"param\":\"value\"}},{\"name\":\"tool_name_with_underscores\",\"arguments\":{\"param\":123}}]}",
|
|
323
|
+
"input_arguments": [
|
|
324
|
+
"tool_config"
|
|
325
|
+
],
|
|
326
|
+
"parameter": {
|
|
327
|
+
"type": "object",
|
|
328
|
+
"properties": {
|
|
329
|
+
"tool_config": {
|
|
330
|
+
"type": "object",
|
|
331
|
+
"description": "The full configuration of the tool to generate test cases for. May include '_optimization_feedback' and '_iteration' fields for feedback-driven test generation."
|
|
332
|
+
}
|
|
333
|
+
},
|
|
334
|
+
"required": [
|
|
335
|
+
"tool_config"
|
|
336
|
+
]
|
|
337
|
+
},
|
|
338
|
+
"configs": {
|
|
339
|
+
"api_type": "CHATGPT",
|
|
340
|
+
"model_id": "gpt-5",
|
|
341
|
+
"temperature": 1.0,
|
|
342
|
+
"max_new_tokens": 4096,
|
|
343
|
+
"return_json": true,
|
|
344
|
+
"response_format": {
|
|
345
|
+
"type": "json_object"
|
|
346
|
+
}
|
|
347
|
+
},
|
|
348
|
+
"return_schema": {
|
|
349
|
+
"type": "object",
|
|
350
|
+
"properties": {
|
|
351
|
+
"test_cases": {
|
|
352
|
+
"type": "array",
|
|
353
|
+
"description": "Generated test cases for the tool",
|
|
354
|
+
"items": {
|
|
355
|
+
"type": "object",
|
|
356
|
+
"properties": {
|
|
357
|
+
"name": {
|
|
358
|
+
"type": "string",
|
|
359
|
+
"description": "Tool name"
|
|
360
|
+
},
|
|
361
|
+
"arguments": {
|
|
362
|
+
"type": "object",
|
|
363
|
+
"description": "Input arguments"
|
|
364
|
+
}
|
|
365
|
+
},
|
|
366
|
+
"required": [
|
|
367
|
+
"name",
|
|
368
|
+
"arguments"
|
|
369
|
+
]
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
"required": [
|
|
374
|
+
"test_cases"
|
|
375
|
+
]
|
|
376
|
+
}
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
"type": "AgenticTool",
|
|
380
|
+
"name": "ArgumentDescriptionOptimizer",
|
|
381
|
+
"description": "Optimizes the descriptions of tool arguments/parameters based on test case results and actual usage patterns. Provides improved descriptions that are more accurate and user-friendly.",
|
|
382
|
+
"prompt": "You are an expert technical writer specializing in API documentation. Given a tool's parameter schema and test case results, analyze how each parameter is used and optimize their descriptions to be clear, accurate, and concise.\n\nCRITICAL CONSTRAINTS - PARAMETER DESCRIPTION SCOPE:\n1. If the parameter schema contains '_previous_feedback', use that feedback to address specific issues and improve the parameter descriptions accordingly.\n2. Parameter descriptions should be HIGHLY SPECIFIC to each individual parameter.\n3. NEVER repeat or reference the main tool functionality - assume the user already knows what the tool does.\n4. Focus EXCLUSIVELY on parameter-specific details: data types, formats, constraints, valid values, required formats, examples when helpful.\n5. Each description should answer: 'What should I put in this specific parameter?' not 'What does the tool do?'\n6. Avoid generic phrases like 'for this tool', 'used by the tool', 'enables functionality' unless they provide specific technical context.\n7. Be precise about technical requirements (e.g., 'JSON string', 'integer between 1-100', 'URL format', etc.)\n8. Every word must serve a purpose - eliminate filler words and redundant phrases.\n\nOriginal parameter schema:\n{parameter_schema}\n\nTest results showing parameter usage:\n{test_results}\n\nFor each parameter, suggest an improved description that:\n1. Is brief but informative (1-2 sentences max)\n2. Accurately reflects the parameter's specific purpose, data type, and constraints\n3. Uses clear, simple language with precise technical details\n4. Avoids redundancy with the parameter name\n5. Addresses any issues mentioned in previous feedback\n6. Contains only essential information about what value should be provided\n\nReturn a JSON object with keys: 'optimized_parameters' (object with parameter names as keys and optimized descriptions as values) and 'rationale' (explaining the key changes made).",
|
|
383
|
+
"input_arguments": [
|
|
384
|
+
"parameter_schema",
|
|
385
|
+
"test_results"
|
|
386
|
+
],
|
|
387
|
+
"parameter": {
|
|
388
|
+
"type": "object",
|
|
389
|
+
"properties": {
|
|
390
|
+
"parameter_schema": {
|
|
391
|
+
"type": "string",
|
|
392
|
+
"description": "JSON string of the original parameter schema with properties and descriptions."
|
|
393
|
+
},
|
|
394
|
+
"test_results": {
|
|
395
|
+
"type": "string",
|
|
396
|
+
"description": "A JSON string containing test case input/output pairs showing parameter usage."
|
|
397
|
+
}
|
|
398
|
+
},
|
|
399
|
+
"required": [
|
|
400
|
+
"parameter_schema",
|
|
401
|
+
"test_results"
|
|
402
|
+
]
|
|
403
|
+
},
|
|
404
|
+
"configs": {
|
|
405
|
+
"api_type": "CHATGPT",
|
|
406
|
+
"model_id": "gpt-5",
|
|
407
|
+
"temperature": 1.0,
|
|
408
|
+
"max_new_tokens": 1536,
|
|
409
|
+
"return_json": true
|
|
410
|
+
},
|
|
411
|
+
"return_schema": {
|
|
412
|
+
"type": "object",
|
|
413
|
+
"properties": {
|
|
414
|
+
"optimized_parameters": {
|
|
415
|
+
"type": "object",
|
|
416
|
+
"description": "Optimized parameter descriptions",
|
|
417
|
+
"additionalProperties": {
|
|
418
|
+
"type": "string"
|
|
419
|
+
}
|
|
420
|
+
},
|
|
421
|
+
"rationale": {
|
|
422
|
+
"type": "string",
|
|
423
|
+
"description": "Explanation of key changes made"
|
|
424
|
+
}
|
|
425
|
+
},
|
|
426
|
+
"required": [
|
|
427
|
+
"optimized_parameters",
|
|
428
|
+
"rationale"
|
|
429
|
+
]
|
|
430
|
+
}
|
|
431
|
+
},
|
|
432
|
+
{
|
|
433
|
+
"type": "AgenticTool",
|
|
434
|
+
"name": "ToolSpecificationGenerator",
|
|
435
|
+
"description": "Generates complete ToolUniverse-compliant tool specifications based on a description and analysis of similar existing tools. Creates comprehensive tool configurations including parameters, prompts, and metadata.",
|
|
436
|
+
"prompt": "You are an expert tool architect. Generate a complete ToolUniverse tool specification.\n\n## REQUIREMENTS\nTool Description: {tool_description}\nReference Information: {reference_info}\nTemplate: {specification_template}\n\n## CRITICAL RULES\n\n🚨 **JSON FORMAT**: Return ONLY valid JSON. No markdown, no explanations.\n\n🚨 **IMPLEMENTATION STRATEGY**: For computational tools, generate implementation STRATEGY as text, NOT source code.\n\n🚨 **PACKAGE PRIORITY**: Use established packages (requests, pandas, numpy, etc.) over custom code.\n\n## TOOL TYPE SELECTION\n- **BioinformaticsTool**: Biological data, genomics, pathways\n- **DataAnalysisTool**: Data processing, analysis, ML\n- **APITool**: Web APIs, REST services\n- **CustomTool**: General purpose (PREFERRED)\n- **AgenticTool**: ONLY for subjective tasks requiring LLM\n\n## TEMPLATE FORMAT\n\n**CustomTool (Preferred):**\n```json\n{\n \"type\": \"ExampleTool\",\n \"name\": \"tool_name\",\n \"description\": \"Tool description\",\n \"implementation\": \"Implementation strategy: Use package X for Y. Steps: 1) Validate input, 2) Process data, 3) Return result. Dependencies: package1, package2. Error handling: try-except for errors.\",\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": {\n \"param\": {\"type\": \"string\", \"description\": \"...\", \"required\": true}\n },\n \"required\": [\"param\"]\n },\n \"return_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"result\": {\"type\": \"string\"}\n }\n },\n \"test_examples\": [\n {\n \"input\": {\"param\": \"test\"},\n \"expected_output_type\": \"object\",\n \"description\": \"Test case\"\n }\n ],\n \"metadata\": {\n \"tags\": [\"tag1\", \"tag2\"],\n \"difficulty_level\": \"intermediate\",\n \"estimated_execution_time\": \"< 1 second\"\n }\n}\n```\n\n## GENERATION REQUIREMENTS\n1. Choose appropriate tool type based on functionality\n2. Generate comprehensive implementation strategy text\n3. Use package recommendations from reference_info\n4. Include proper parameter and return schemas\n5. Provide realistic test examples\n6. Return ONLY valid JSON\n\nReturn the complete tool specification as valid JSON.",
|
|
437
|
+
"input_arguments": [
|
|
438
|
+
"tool_description",
|
|
439
|
+
"reference_info",
|
|
440
|
+
"specification_template"
|
|
441
|
+
],
|
|
442
|
+
"parameter": {
|
|
443
|
+
"type": "object",
|
|
444
|
+
"properties": {
|
|
445
|
+
"tool_description": {
|
|
446
|
+
"type": "string",
|
|
447
|
+
"description": "Description of the desired tool functionality"
|
|
448
|
+
},
|
|
449
|
+
"reference_info": {
|
|
450
|
+
"type": "string",
|
|
451
|
+
"description": "JSON string containing all reference information including similar tools, API documentation, and package recommendations"
|
|
452
|
+
},
|
|
453
|
+
"specification_template": {
|
|
454
|
+
"type": "string",
|
|
455
|
+
"description": "Template example showing the expected specification format"
|
|
456
|
+
}
|
|
457
|
+
},
|
|
458
|
+
"required": [
|
|
459
|
+
"tool_description",
|
|
460
|
+
"reference_info",
|
|
461
|
+
"specification_template"
|
|
462
|
+
]
|
|
463
|
+
},
|
|
464
|
+
"configs": {
|
|
465
|
+
"api_type": "CHATGPT",
|
|
466
|
+
"model_id": "gpt-5",
|
|
467
|
+
"temperature": 1.0,
|
|
468
|
+
"max_new_tokens": 2000,
|
|
469
|
+
"return_json": true
|
|
470
|
+
},
|
|
471
|
+
"return_schema": {
|
|
472
|
+
"type": "object",
|
|
473
|
+
"properties": {
|
|
474
|
+
"result": {
|
|
475
|
+
"type": "object",
|
|
476
|
+
"description": "Generated tool specification",
|
|
477
|
+
"properties": {
|
|
478
|
+
"type": {
|
|
479
|
+
"type": "string"
|
|
480
|
+
},
|
|
481
|
+
"name": {
|
|
482
|
+
"type": "string"
|
|
483
|
+
},
|
|
484
|
+
"description": {
|
|
485
|
+
"type": "string"
|
|
486
|
+
},
|
|
487
|
+
"parameter": {
|
|
488
|
+
"type": "object"
|
|
489
|
+
},
|
|
490
|
+
"return_schema": {
|
|
491
|
+
"type": "object"
|
|
492
|
+
},
|
|
493
|
+
"test_examples": {
|
|
494
|
+
"type": "array"
|
|
495
|
+
},
|
|
496
|
+
"metadata": {
|
|
497
|
+
"type": "object"
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
},
|
|
502
|
+
"required": [
|
|
503
|
+
"result"
|
|
504
|
+
]
|
|
505
|
+
}
|
|
506
|
+
},
|
|
507
|
+
{
|
|
508
|
+
"type": "AgenticTool",
|
|
509
|
+
"name": "ToolImplementationGenerator",
|
|
510
|
+
"description": "Generates domain-specific, functional code implementations based on tool descriptions and requirements with intelligent algorithm selection",
|
|
511
|
+
"prompt": "You are an expert software engineer. Generate a complete, production-ready Python tool implementation.\n\n## REQUEST\nTool Specification: {tool_specification}\nReference Information: {reference_info}\n\n## TEMPLATE EXAMPLE\nUse this EXACT structure for your implementation:\n\n{template_example}\n\n## ERROR FEEDBACK\n{error_feedback}\n\n## KEY POINTS TO REMEMBER\n\n### 🎯 Core Requirements\n- Generate COMPLETE Python class with imports, decorators, and full implementation\n- Use @register_tool decorator and inherit from BaseTool (see template above)\n- Include proper __init__(self, tool_config=None) and run(self, arguments) methods\n- Return JSON format: {\"status\": \"success/error\", \"data/error\": ...}\n- Follow the EXACT structure shown in the template example\n\n### 🛡️ Error Handling\n- Always wrap main logic in try-except\n- Use self.validate_parameters(arguments) for parameter validation\n- Provide clear, actionable error messages\n- Handle edge cases gracefully\n\n### 📦 Package Usage\n- Use recommended packages from reference_info when available\n- Include proper import statements\n- Avoid standalone module names (no \"requests\" as separate line)\n- Add packages to dependencies list\n\n### 🏗️ Code Structure\n- Follow Python best practices (PEP 8)\n- Use descriptive variable names\n- Add meaningful docstrings\n- Keep functions focused and readable\n- Ensure syntactically correct Python\n- Follow the template structure EXACTLY\n\n### ⚠️ Common Pitfalls to Avoid\n- Don't output just module names without import\n- Don't create incomplete try blocks\n- Don't forget parameter validation\n- Don't skip error handling\n- Don't make assumptions about input data\n- Use the template example as your guide for correct @register_tool usage\n- LEARN FROM PREVIOUS ERRORS: If this is a retry attempt, carefully avoid the errors mentioned above\n\n## RESPONSE FORMAT\nReturn JSON:\n{\n \"implementation\": {\n \"source_code\": \"<complete Python class code following the template>\",\n \"dependencies\": [\"<required packages>\"],\n \"imports\": [\"<import statements>\"]\n }\n}\n\nBe creative, be thorough, and make it work!",
|
|
512
|
+
"input_arguments": [
|
|
513
|
+
"tool_specification",
|
|
514
|
+
"reference_info",
|
|
515
|
+
"template_example",
|
|
516
|
+
"error_feedback"
|
|
517
|
+
],
|
|
518
|
+
"parameter": {
|
|
519
|
+
"type": "object",
|
|
520
|
+
"properties": {
|
|
521
|
+
"tool_specification": {
|
|
522
|
+
"type": "string",
|
|
523
|
+
"description": "Complete tool specification as JSON string"
|
|
524
|
+
},
|
|
525
|
+
"reference_info": {
|
|
526
|
+
"type": "string",
|
|
527
|
+
"description": "Optional reference information including API docs and package recommendations",
|
|
528
|
+
"default": "{}"
|
|
529
|
+
},
|
|
530
|
+
"template_example": {
|
|
531
|
+
"type": "string",
|
|
532
|
+
"description": "Template example showing correct @register_tool usage",
|
|
533
|
+
"default": ""
|
|
534
|
+
},
|
|
535
|
+
"error_feedback": {
|
|
536
|
+
"type": "string",
|
|
537
|
+
"description": "Error feedback from previous generation attempts",
|
|
538
|
+
"default": ""
|
|
539
|
+
}
|
|
540
|
+
},
|
|
541
|
+
"required": [
|
|
542
|
+
"tool_specification"
|
|
543
|
+
]
|
|
544
|
+
},
|
|
545
|
+
"configs": {
|
|
546
|
+
"api_type": "CHATGPT",
|
|
547
|
+
"model_id": "gpt-5",
|
|
548
|
+
"temperature": 1.0,
|
|
549
|
+
"max_new_tokens": 50000,
|
|
550
|
+
"return_json": true
|
|
551
|
+
},
|
|
552
|
+
"return_schema": {
|
|
553
|
+
"type": "object",
|
|
554
|
+
"properties": {
|
|
555
|
+
"implementation": {
|
|
556
|
+
"type": "object",
|
|
557
|
+
"description": "Generated implementation details",
|
|
558
|
+
"properties": {
|
|
559
|
+
"source_code": {
|
|
560
|
+
"type": "string"
|
|
561
|
+
},
|
|
562
|
+
"dependencies": {
|
|
563
|
+
"type": "array",
|
|
564
|
+
"items": {
|
|
565
|
+
"type": "string"
|
|
566
|
+
}
|
|
567
|
+
},
|
|
568
|
+
"imports": {
|
|
569
|
+
"type": "array",
|
|
570
|
+
"items": {
|
|
571
|
+
"type": "string"
|
|
572
|
+
}
|
|
573
|
+
},
|
|
574
|
+
"algorithm_description": {
|
|
575
|
+
"type": "string"
|
|
576
|
+
},
|
|
577
|
+
"complexity": {
|
|
578
|
+
"type": "string"
|
|
579
|
+
},
|
|
580
|
+
"test_cases": {
|
|
581
|
+
"type": "array"
|
|
582
|
+
},
|
|
583
|
+
"package_justification": {
|
|
584
|
+
"type": "string"
|
|
585
|
+
},
|
|
586
|
+
"alternative_packages": {
|
|
587
|
+
"type": "array",
|
|
588
|
+
"items": {
|
|
589
|
+
"type": "string"
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
},
|
|
594
|
+
"quality_metrics": {
|
|
595
|
+
"type": "object",
|
|
596
|
+
"description": "Quality assessment metrics",
|
|
597
|
+
"properties": {
|
|
598
|
+
"estimated_accuracy": {
|
|
599
|
+
"type": "string"
|
|
600
|
+
},
|
|
601
|
+
"performance_characteristics": {
|
|
602
|
+
"type": "string"
|
|
603
|
+
},
|
|
604
|
+
"robustness_level": {
|
|
605
|
+
"type": "string"
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
},
|
|
609
|
+
"documentation": {
|
|
610
|
+
"type": "object",
|
|
611
|
+
"description": "Implementation documentation",
|
|
612
|
+
"properties": {
|
|
613
|
+
"usage_examples": {
|
|
614
|
+
"type": "array",
|
|
615
|
+
"items": {
|
|
616
|
+
"type": "string"
|
|
617
|
+
}
|
|
618
|
+
},
|
|
619
|
+
"parameter_explanations": {
|
|
620
|
+
"type": "string"
|
|
621
|
+
},
|
|
622
|
+
"return_format": {
|
|
623
|
+
"type": "string"
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
},
|
|
628
|
+
"required": [
|
|
629
|
+
"implementation",
|
|
630
|
+
"quality_metrics",
|
|
631
|
+
"documentation"
|
|
632
|
+
]
|
|
633
|
+
}
|
|
634
|
+
},
|
|
635
|
+
{
|
|
636
|
+
"type": "AgenticTool",
|
|
637
|
+
"name": "UnifiedCodeOptimizer",
|
|
638
|
+
"description": "Comprehensive code optimizer that handles simplification, performance, stability, and quality improvements in a single unified agent",
|
|
639
|
+
"prompt": "You are an expert code optimizer. Analyze and improve the implementation based on the optimization context.\n\n## CURRENT TOOL SPECIFICATION\n{tool_spec}\n\n## CURRENT IMPLEMENTATION\n{tool_implementation}\n\n## OPTIMIZATION CONTEXT\n{optimization_context}\n\n## CRITICAL INSTRUCTION CHECK\n\n🚨 **HIGHEST PRIORITY**: Check optimization_context for \"instruction\" field. If present, follow it as TOP PRIORITY.\n\nCommon instructions:\n- User refused dependencies → Use standard library only\n- Installation failed → Use different packages or standard library\n- Reimplementation required → Generate completely new approach\n\n## ERROR ANALYSIS & FIXING\n\n🔍 **CRITICAL**: If optimization_context contains \"detailed_errors\", you MUST fix these specific errors:\n\nFor each error in detailed_errors:\n- **Error Type**: Analyze the specific exception (ImportError, AttributeError, etc.)\n- **Error Message**: Read the exact error message\n- **Error Location**: Check the file and line number\n- **Root Cause**: Understand WHY this error occurs\n- **Fix Strategy**: Determine the EXACT fix needed\n\n### 🚨 **REPEATED ERROR HANDLING**\n\n**CRITICAL RULE**: If you see the SAME error repeated across iterations:\n1. **STOP** using the same approach\n2. **CHANGE** the implementation strategy completely\n3. **USE** different packages or standard library\n4. **AVOID** the problematic import/code pattern\n\n**For ImportError specifically:**\n- If `ModuleNotFoundError: No module named 'X.Y'` → Use main package `X` instead\n- If `ImportError: cannot import name 'Y'` → Use different import or alternative package\n- If package doesn't exist → Use standard library or different package\n\n### 🔄 **ITERATION LEARNING**\n\n- **Check improvement_history**: See what was tried before\n- **Avoid repeating**: Don't use the same failing approach\n- **Learn from errors**: Each error tells you what NOT to do\n- **Change strategy**: If same error persists, completely change approach\n\n## REQUIREMENT: ONLY OPTIMIZE IMPLEMENTATION\n\n🚨 **ONLY modify implementation code. DO NOT change:**\n- Tool specification (name, description, parameters)\n- Return schema\n- Test examples\n- Metadata\n\n## OPTIMIZATION GOALS\n\n### 🎯 Core Objectives\n- **FIX SPECIFIC ERRORS**: Address each error in detailed_errors\n- **AVOID REPEATING**: Don't repeat the same failing approach\n- **CHANGE STRATEGY**: If same error persists, use completely different approach\n- Improve efficiency and performance\n- Enhance readability and maintainability\n- Simplify complex logic\n- Strengthen error handling\n\n### 🔧 Quality Focus\n- Remove redundant/dead code\n- Optimize algorithms and data structures\n- Improve naming and comments\n- Ensure consistent style\n- Learn from improvement history\n\n### 🛡️ Stability\n- Add input validation\n- Handle edge cases\n- Improve exception handling\n- Add resource cleanup\n- Make code robust\n\n### ⚡ Performance\n- Replace inefficient patterns\n- Optimize memory usage\n- Cache expensive operations\n- Use appropriate data structures\n- Minimize unnecessary computations\n\n### ⚠️ What NOT to Do\n- Don't break existing functionality\n- Don't add unnecessary complexity\n- Don't optimize prematurely\n- Don't remove important error handling\n- **DON'T REPEAT THE SAME FAILING APPROACH**\n- **DON'T IGNORE SPECIFIC ERRORS**\n- **Don't modify tool specification**\n\n## RESPONSE FORMAT\nReturn JSON:\n{\n \"implementation\": {\n \"source_code\": \"<improved Python code>\",\n \"dependencies\": [\"<required packages>\"],\n \"imports\": [\"<import statements>\"]\n }\n}\n\n**REMEMBER**: Fix the specific errors mentioned in detailed_errors. If the same error keeps happening, change your approach completely!\n\n## MANDATORY SELF-CHECK\n\nBefore generating code, you MUST ask yourself:\n1. \"What specific errors am I trying to fix?\"\n2. \"Have I seen these exact errors before?\"\n3. \"Am I using the same approach that failed?\"\n4. \"What completely different approach can I try?\"\n\nIf you answered YES to questions 2 and 3, you MUST use a completely different approach!\n\n**Examples of different approaches:**\n- ImportError → Use different package or standard library\n- AttributeError → Use different method or API\n- ConnectionError → Use different endpoint or offline approach\n- ValidationError → Use different validation logic\n\n**CRITICAL**: If the same error persists across iterations, you MUST change your implementation strategy completely!\n\n## ERROR-SPECIFIC FIXES\n\n**For 'dict' object has no attribute 'to_dict' error:**\n- This suggests the code is trying to call .to_dict() on a dictionary\n- Fix: Remove .to_dict() calls or convert dict to proper object first\n- Alternative: Use json.dumps() instead of .to_dict()\n\n**For 'Missing or empty function name' error:**\n- This suggests tool registration or function call issues\n- Fix: Ensure proper tool registration and function naming\n- Alternative: Use different tool calling mechanism\n\n**For AttributeError in general:**\n- Check if the object has the expected attributes\n- Use hasattr() to check before calling methods\n- Convert objects to proper types before method calls",
|
|
640
|
+
"input_arguments": [
|
|
641
|
+
"tool_spec",
|
|
642
|
+
"tool_implementation",
|
|
643
|
+
"optimization_context"
|
|
644
|
+
],
|
|
645
|
+
"parameter": {
|
|
646
|
+
"type": "object",
|
|
647
|
+
"properties": {
|
|
648
|
+
"tool_spec": {
|
|
649
|
+
"type": "string",
|
|
650
|
+
"description": "Tool specification (name, description, parameter schema, return schema, test examples) as JSON string"
|
|
651
|
+
},
|
|
652
|
+
"tool_implementation": {
|
|
653
|
+
"type": "string",
|
|
654
|
+
"description": "Current tool implementation (source_code, dependencies, imports) as JSON string"
|
|
655
|
+
},
|
|
656
|
+
"optimization_context": {
|
|
657
|
+
"type": "string",
|
|
658
|
+
"description": "JSON string containing comprehensive optimization context including quality report, test results, iteration info, and improvement history"
|
|
659
|
+
}
|
|
660
|
+
},
|
|
661
|
+
"required": [
|
|
662
|
+
"tool_spec",
|
|
663
|
+
"tool_implementation",
|
|
664
|
+
"optimization_context"
|
|
665
|
+
]
|
|
666
|
+
},
|
|
667
|
+
"configs": {
|
|
668
|
+
"api_type": "CHATGPT",
|
|
669
|
+
"model_id": "gpt-5",
|
|
670
|
+
"temperature": 1.0,
|
|
671
|
+
"max_new_tokens": 4096,
|
|
672
|
+
"return_json": true
|
|
673
|
+
},
|
|
674
|
+
"return_schema": {
|
|
675
|
+
"type": "object",
|
|
676
|
+
"properties": {
|
|
677
|
+
"implementation": {
|
|
678
|
+
"type": "object",
|
|
679
|
+
"description": "Optimized implementation",
|
|
680
|
+
"properties": {
|
|
681
|
+
"source_code": {
|
|
682
|
+
"type": "string",
|
|
683
|
+
"description": "Optimized Python source code"
|
|
684
|
+
},
|
|
685
|
+
"dependencies": {
|
|
686
|
+
"type": "array",
|
|
687
|
+
"items": {
|
|
688
|
+
"type": "string"
|
|
689
|
+
},
|
|
690
|
+
"description": "Required packages"
|
|
691
|
+
},
|
|
692
|
+
"imports": {
|
|
693
|
+
"type": "array",
|
|
694
|
+
"items": {
|
|
695
|
+
"type": "string"
|
|
696
|
+
},
|
|
697
|
+
"description": "Import statements"
|
|
698
|
+
},
|
|
699
|
+
"improvements": {
|
|
700
|
+
"type": "array",
|
|
701
|
+
"description": "List of improvements made",
|
|
702
|
+
"items": {
|
|
703
|
+
"type": "object",
|
|
704
|
+
"properties": {
|
|
705
|
+
"category": {
|
|
706
|
+
"type": "string",
|
|
707
|
+
"enum": [
|
|
708
|
+
"simplification",
|
|
709
|
+
"performance",
|
|
710
|
+
"stability",
|
|
711
|
+
"quality"
|
|
712
|
+
]
|
|
713
|
+
},
|
|
714
|
+
"description": {
|
|
715
|
+
"type": "string"
|
|
716
|
+
},
|
|
717
|
+
"impact": {
|
|
718
|
+
"type": "string"
|
|
719
|
+
},
|
|
720
|
+
"lines_changed": {
|
|
721
|
+
"type": "string"
|
|
722
|
+
},
|
|
723
|
+
"before_after": {
|
|
724
|
+
"type": "string"
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
},
|
|
729
|
+
"complexity_analysis": {
|
|
730
|
+
"type": "object",
|
|
731
|
+
"properties": {
|
|
732
|
+
"time_complexity": {
|
|
733
|
+
"type": "string"
|
|
734
|
+
},
|
|
735
|
+
"space_complexity": {
|
|
736
|
+
"type": "string"
|
|
737
|
+
},
|
|
738
|
+
"improvements": {
|
|
739
|
+
"type": "string"
|
|
740
|
+
},
|
|
741
|
+
"optimization_notes": {
|
|
742
|
+
"type": "string"
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
},
|
|
747
|
+
"required": [
|
|
748
|
+
"source_code",
|
|
749
|
+
"dependencies",
|
|
750
|
+
"imports",
|
|
751
|
+
"improvements"
|
|
752
|
+
]
|
|
753
|
+
}
|
|
754
|
+
},
|
|
755
|
+
"required": [
|
|
756
|
+
"implementation"
|
|
757
|
+
]
|
|
758
|
+
}
|
|
759
|
+
},
|
|
760
|
+
{
|
|
761
|
+
"type": "AgenticTool",
|
|
762
|
+
"name": "ToolSpecificationOptimizer",
|
|
763
|
+
"description": "Optimizes tool specifications for clarity, completeness, and usability with comprehensive benchmarking against similar tools",
|
|
764
|
+
"prompt": "You are an expert in tool design and user experience optimization. Analyze and optimize the provided tool specification based on comprehensive optimization context.\n\n## TOOL SPECIFICATION TO OPTIMIZE\nCurrent Configuration:\n{tool_config}\n\n## OPTIMIZATION CONTEXT\n{optimization_context}\n\n## OPTIMIZATION FRAMEWORK\n\n### 1. QUALITY-BASED IMPROVEMENTS\n- Address issues identified in quality report\n- Improve areas with low scores\n- Enhance clarity and completeness\n- Fix parameter validation issues\n\n### 2. TEST-DRIVEN OPTIMIZATION\n- Analyze test failures and edge cases\n- Improve error handling based on test results\n- Add missing test scenarios\n- Optimize for robustness\n\n### 3. ITERATIVE IMPROVEMENTS\n- Consider current iteration and target score\n- Learn from improvement history\n- Focus on areas with most impact\n- Avoid repeating previous attempts\n\n### 4. USABILITY ENHANCEMENTS\n- Simplify complex parameters\n- Add better defaults\n- Improve error messages\n- Enhance documentation\n\n### 5. PERFORMANCE OPTIMIZATION\n- Optimize parameter validation\n- Improve resource efficiency\n- Enhance response time\n- Better error handling\n\n## CRITICAL: RETURN SCHEMA & TEST EXAMPLES\n\nYour optimized_config MUST include:\n\n1. **return_schema** - Detailed JSON schema for the tool's output:\n {\n \"type\": \"object\",\n \"properties\": {\n \"success\": {\"type\": \"boolean\", \"description\": \"Whether operation succeeded\"},\n \"result\": {\"type\": \"object\", \"description\": \"Main result data\"},\n \"error\": {\"type\": \"string\", \"description\": \"Error message if failed\"},\n \"execution_time_ms\": {\"type\": \"integer\", \"description\": \"Execution time in milliseconds\"}\n },\n \"required\": [\"success\"]\n }\n\n2. **test_examples** - Comprehensive test cases based on actual test results:\n [\n {\n \"input\": {\"param\": \"value\"},\n \"expected_output\": {\"success\": true, \"result\": {...}},\n \"description\": \"Test case description\"\n }\n ]\n\n## OPTIMIZATION PROCESS\n1. Analyze optimization context comprehensively\n2. Review quality report and test execution results\n3. Consider iteration history and target goals\n4. Identify specific improvement areas\n5. Generate optimized specification\n6. Provide detailed rationale for changes\n\n## RESPONSE FORMAT\nReturn ONLY a valid JSON object (no markdown code blocks, no additional text):\n\n{\n \"optimized_config\": {\n \"name\": \"<optimized name>\",\n \"description\": \"<improved description>\",\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": \"<optimized parameters>\",\n \"required\": \"<updated required fields>\"\n },\n \"return_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"success\": {\"type\": \"boolean\", \"description\": \"Whether operation succeeded\"},\n \"result\": {\"type\": \"object\", \"description\": \"Main result data\"},\n \"error\": {\"type\": \"string\", \"description\": \"Error message if failed\"},\n \"execution_time_ms\": {\"type\": \"integer\", \"description\": \"Execution time in milliseconds\"}\n },\n \"required\": [\"success\"]\n },\n \"test_examples\": [\n {\n \"input\": {\"param\": \"value\"},\n \"expected_output\": {\"success\": true, \"result\": {...}},\n \"description\": \"Test case description\"\n }\n ],\n \"metadata\": {\n \"tags\": [\"<relevant tags>\"],\n \"difficulty_level\": \"<user difficulty>\",\n \"estimated_execution_time\": \"<typical runtime>\"\n }\n },\n \"improvements\": [\n {\n \"area\": \"<improvement area>\",\n \"change\": \"<what was changed>\",\n \"rationale\": \"<why this improves the tool>\",\n \"impact\": \"<expected user impact>\",\n \"based_on\": \"<quality report issue or test failure>\",\n \"iteration_context\": \"<how this fits in optimization history>\"\n }\n ],\n \"quality_score\": {\n \"before\": \"<0-10>\",\n \"after\": \"<0-10>\",\n \"improvement\": \"<difference>\",\n \"target_progress\": \"<progress toward target score>\"\n },\n \"recommendations\": [\n {\n \"type\": \"enhancement|fix|optimization\",\n \"description\": \"<recommendation>\",\n \"priority\": \"high|medium|low\",\n \"source\": \"<quality report or test results>\",\n \"next_iteration_focus\": \"<what to focus on next>\"\n }\n ]\n}",
|
|
765
|
+
"input_arguments": [
|
|
766
|
+
"tool_config",
|
|
767
|
+
"optimization_context"
|
|
768
|
+
],
|
|
769
|
+
"parameter": {
|
|
770
|
+
"type": "object",
|
|
771
|
+
"properties": {
|
|
772
|
+
"tool_config": {
|
|
773
|
+
"type": "string",
|
|
774
|
+
"description": "JSON string of the current tool configuration to optimize"
|
|
775
|
+
},
|
|
776
|
+
"optimization_context": {
|
|
777
|
+
"type": "string",
|
|
778
|
+
"description": "JSON string containing comprehensive optimization context including quality report, test results, iteration info, and improvement history"
|
|
779
|
+
}
|
|
780
|
+
},
|
|
781
|
+
"required": [
|
|
782
|
+
"tool_config",
|
|
783
|
+
"optimization_context"
|
|
784
|
+
]
|
|
785
|
+
},
|
|
786
|
+
"configs": {
|
|
787
|
+
"api_type": "CHATGPT",
|
|
788
|
+
"model_id": "gpt-5",
|
|
789
|
+
"temperature": 1.0,
|
|
790
|
+
"max_new_tokens": 2048,
|
|
791
|
+
"return_json": true
|
|
792
|
+
},
|
|
793
|
+
"return_schema": {
|
|
794
|
+
"type": "object",
|
|
795
|
+
"properties": {
|
|
796
|
+
"optimized_config": {
|
|
797
|
+
"type": "object",
|
|
798
|
+
"description": "Optimized tool configuration",
|
|
799
|
+
"properties": {
|
|
800
|
+
"name": {
|
|
801
|
+
"type": "string"
|
|
802
|
+
},
|
|
803
|
+
"description": {
|
|
804
|
+
"type": "string"
|
|
805
|
+
},
|
|
806
|
+
"parameter": {
|
|
807
|
+
"type": "object"
|
|
808
|
+
},
|
|
809
|
+
"return_schema": {
|
|
810
|
+
"type": "object",
|
|
811
|
+
"description": "JSON schema for tool output"
|
|
812
|
+
},
|
|
813
|
+
"test_examples": {
|
|
814
|
+
"type": "array",
|
|
815
|
+
"description": "Test cases for the tool",
|
|
816
|
+
"items": {
|
|
817
|
+
"type": "object",
|
|
818
|
+
"properties": {
|
|
819
|
+
"input": {
|
|
820
|
+
"type": "object"
|
|
821
|
+
},
|
|
822
|
+
"expected_output": {
|
|
823
|
+
"type": "object"
|
|
824
|
+
},
|
|
825
|
+
"description": {
|
|
826
|
+
"type": "string"
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
},
|
|
831
|
+
"examples": {
|
|
832
|
+
"type": "array",
|
|
833
|
+
"items": {
|
|
834
|
+
"type": "string"
|
|
835
|
+
}
|
|
836
|
+
},
|
|
837
|
+
"metadata": {
|
|
838
|
+
"type": "object"
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
},
|
|
842
|
+
"improvements": {
|
|
843
|
+
"type": "array",
|
|
844
|
+
"description": "List of improvements made",
|
|
845
|
+
"items": {
|
|
846
|
+
"type": "object",
|
|
847
|
+
"properties": {
|
|
848
|
+
"area": {
|
|
849
|
+
"type": "string"
|
|
850
|
+
},
|
|
851
|
+
"change": {
|
|
852
|
+
"type": "string"
|
|
853
|
+
},
|
|
854
|
+
"rationale": {
|
|
855
|
+
"type": "string"
|
|
856
|
+
},
|
|
857
|
+
"impact": {
|
|
858
|
+
"type": "string"
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
},
|
|
863
|
+
"quality_score": {
|
|
864
|
+
"type": "object",
|
|
865
|
+
"description": "Quality score comparison",
|
|
866
|
+
"properties": {
|
|
867
|
+
"before": {
|
|
868
|
+
"type": "number"
|
|
869
|
+
},
|
|
870
|
+
"after": {
|
|
871
|
+
"type": "number"
|
|
872
|
+
},
|
|
873
|
+
"improvement": {
|
|
874
|
+
"type": "number"
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
},
|
|
878
|
+
"recommendations": {
|
|
879
|
+
"type": "array",
|
|
880
|
+
"description": "Further recommendations",
|
|
881
|
+
"items": {
|
|
882
|
+
"type": "object",
|
|
883
|
+
"properties": {
|
|
884
|
+
"type": {
|
|
885
|
+
"type": "string",
|
|
886
|
+
"enum": [
|
|
887
|
+
"enhancement",
|
|
888
|
+
"fix",
|
|
889
|
+
"optimization"
|
|
890
|
+
]
|
|
891
|
+
},
|
|
892
|
+
"description": {
|
|
893
|
+
"type": "string"
|
|
894
|
+
},
|
|
895
|
+
"priority": {
|
|
896
|
+
"type": "string",
|
|
897
|
+
"enum": [
|
|
898
|
+
"high",
|
|
899
|
+
"medium",
|
|
900
|
+
"low"
|
|
901
|
+
]
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
},
|
|
907
|
+
"required": [
|
|
908
|
+
"optimized_config",
|
|
909
|
+
"improvements",
|
|
910
|
+
"quality_score",
|
|
911
|
+
"recommendations"
|
|
912
|
+
]
|
|
913
|
+
}
|
|
914
|
+
},
|
|
915
|
+
{
|
|
916
|
+
"type": "AgenticTool",
|
|
917
|
+
"name": "ToolOptimizer",
|
|
918
|
+
"description": "Optimizes tool configurations based on quality feedback. Improves tool specifications and implementations to address identified issues.",
|
|
919
|
+
"prompt": "You are an expert tool optimizer. Improve this tool based on the quality feedback:\n\nOriginal Tool Configuration: {tool_config}\nQuality Feedback: {quality_feedback}\nOptimization Target: {optimization_target}\n\nGenerate an optimized version in this JSON format:\n{\n \"optimized_tool\": {\n \"name\": \"improved_tool_name\",\n \"type\": \"tool_type\",\n \"description\": \"enhanced_description\",\n \"parameter\": {\n \"type\": \"object\",\n \"properties\": {},\n \"required\": []\n },\n \"category\": \"category\",\n \"implementation\": {\n \"source_code\": \"improved_code\",\n \"dependencies\": [],\n \"main_function\": \"execute_tool\"\n }\n },\n \"improvements_made\": [\n \"Added input validation\",\n \"Enhanced error handling\",\n \"Improved parameter descriptions\"\n ],\n \"optimization_notes\": \"Summary of key improvements\"\n}\n\nFocus on:\n1. Fixing identified weaknesses\n2. Enhancing parameter validation\n3. Improving error handling\n4. Adding missing functionality\n5. Optimizing for the specified target\n6. Maintaining backward compatibility where possible",
|
|
920
|
+
"input_arguments": [
|
|
921
|
+
"tool_config",
|
|
922
|
+
"quality_feedback",
|
|
923
|
+
"optimization_target"
|
|
924
|
+
],
|
|
925
|
+
"parameter": {
|
|
926
|
+
"type": "object",
|
|
927
|
+
"properties": {
|
|
928
|
+
"tool_config": {
|
|
929
|
+
"type": "string",
|
|
930
|
+
"description": "JSON string of the original tool configuration"
|
|
931
|
+
},
|
|
932
|
+
"quality_feedback": {
|
|
933
|
+
"type": "string",
|
|
934
|
+
"description": "JSON string of quality evaluation feedback"
|
|
935
|
+
},
|
|
936
|
+
"optimization_target": {
|
|
937
|
+
"type": "string",
|
|
938
|
+
"description": "What to optimize for (improve_quality, enhance_performance, etc.)"
|
|
939
|
+
}
|
|
940
|
+
},
|
|
941
|
+
"required": [
|
|
942
|
+
"tool_config",
|
|
943
|
+
"quality_feedback",
|
|
944
|
+
"optimization_target"
|
|
945
|
+
]
|
|
946
|
+
},
|
|
947
|
+
"configs": {
|
|
948
|
+
"api_type": "CHATGPT",
|
|
949
|
+
"model_id": "gpt-5",
|
|
950
|
+
"temperature": 1.0,
|
|
951
|
+
"max_new_tokens": 3072,
|
|
952
|
+
"return_json": true
|
|
953
|
+
},
|
|
954
|
+
"return_schema": {
|
|
955
|
+
"type": "object",
|
|
956
|
+
"properties": {
|
|
957
|
+
"optimized_tool": {
|
|
958
|
+
"type": "object",
|
|
959
|
+
"description": "Optimized tool configuration",
|
|
960
|
+
"properties": {
|
|
961
|
+
"name": {
|
|
962
|
+
"type": "string"
|
|
963
|
+
},
|
|
964
|
+
"type": {
|
|
965
|
+
"type": "string"
|
|
966
|
+
},
|
|
967
|
+
"description": {
|
|
968
|
+
"type": "string"
|
|
969
|
+
},
|
|
970
|
+
"parameter": {
|
|
971
|
+
"type": "object"
|
|
972
|
+
},
|
|
973
|
+
"category": {
|
|
974
|
+
"type": "string"
|
|
975
|
+
},
|
|
976
|
+
"implementation": {
|
|
977
|
+
"type": "object"
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
},
|
|
981
|
+
"improvements_made": {
|
|
982
|
+
"type": "array",
|
|
983
|
+
"description": "List of improvements made",
|
|
984
|
+
"items": {
|
|
985
|
+
"type": "string"
|
|
986
|
+
}
|
|
987
|
+
},
|
|
988
|
+
"optimization_notes": {
|
|
989
|
+
"type": "string",
|
|
990
|
+
"description": "Summary of key improvements"
|
|
991
|
+
}
|
|
992
|
+
},
|
|
993
|
+
"required": [
|
|
994
|
+
"optimized_tool",
|
|
995
|
+
"improvements_made",
|
|
996
|
+
"optimization_notes"
|
|
997
|
+
]
|
|
998
|
+
}
|
|
999
|
+
},
|
|
1000
|
+
{
|
|
1001
|
+
"type": "ComposeTool",
|
|
1002
|
+
"name": "ToolDiscover",
|
|
1003
|
+
"description": "Generates new ToolUniverse-compliant tools based on short descriptions through an intelligent discovery and refinement process. Automatically determines the optimal tool type and category, discovers similar existing tools, generates initial specifications, and iteratively refines the tool configuration using agentic optimization tools until it meets quality standards.",
|
|
1004
|
+
"parameter": {
|
|
1005
|
+
"type": "object",
|
|
1006
|
+
"properties": {
|
|
1007
|
+
"tool_description": {
|
|
1008
|
+
"type": "string",
|
|
1009
|
+
"description": "Short description of the desired tool functionality and purpose. Tool Discover will automatically analyze this to determine the optimal tool type (PackageTool, RESTTool, XMLTool, or AgenticTool) and appropriate category."
|
|
1010
|
+
},
|
|
1011
|
+
"max_iterations": {
|
|
1012
|
+
"type": "integer",
|
|
1013
|
+
"description": "Maximum number of refinement iterations to perform.",
|
|
1014
|
+
"default": 20
|
|
1015
|
+
},
|
|
1016
|
+
"save_to_file": {
|
|
1017
|
+
"type": "boolean",
|
|
1018
|
+
"description": "Whether to save the generated tool configuration and report to a file.",
|
|
1019
|
+
"default": true
|
|
1020
|
+
},
|
|
1021
|
+
"output_file": {
|
|
1022
|
+
"type": "string",
|
|
1023
|
+
"description": "Optional file path to save the generated tool. If not provided, uses auto-generated filename."
|
|
1024
|
+
}
|
|
1025
|
+
},
|
|
1026
|
+
"required": [
|
|
1027
|
+
"tool_description",
|
|
1028
|
+
"max_iterations",
|
|
1029
|
+
"save_to_file",
|
|
1030
|
+
"output_file"
|
|
1031
|
+
]
|
|
1032
|
+
},
|
|
1033
|
+
"auto_load_dependencies": true,
|
|
1034
|
+
"fail_on_missing_tools": false,
|
|
1035
|
+
"required_tools": [],
|
|
1036
|
+
"composition_file": "tool_discover.py",
|
|
1037
|
+
"composition_function": "compose",
|
|
1038
|
+
"return_schema": {
|
|
1039
|
+
"type": "object",
|
|
1040
|
+
"properties": {
|
|
1041
|
+
"tool_config": {
|
|
1042
|
+
"type": "object",
|
|
1043
|
+
"description": "Generated tool configuration"
|
|
1044
|
+
},
|
|
1045
|
+
"quality_score": {
|
|
1046
|
+
"type": "number",
|
|
1047
|
+
"description": "Final quality score"
|
|
1048
|
+
},
|
|
1049
|
+
"saved_files": {
|
|
1050
|
+
"type": "array",
|
|
1051
|
+
"description": "List of saved files",
|
|
1052
|
+
"items": {
|
|
1053
|
+
"type": "string"
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
},
|
|
1057
|
+
"required": [
|
|
1058
|
+
"tool_config",
|
|
1059
|
+
"quality_score",
|
|
1060
|
+
"saved_files"
|
|
1061
|
+
]
|
|
1062
|
+
}
|
|
1063
|
+
},
|
|
1064
|
+
{
|
|
1065
|
+
"type": "ComposeTool",
|
|
1066
|
+
"name": "ToolDescriptionOptimizer",
|
|
1067
|
+
"description": "Optimizes a tool's description and parameter descriptions by generating test cases, executing them, analyzing the results, and suggesting improved descriptions for both the tool and its arguments. Optionally saves a comprehensive optimization report to a file without overwriting the original.",
|
|
1068
|
+
"parameter": {
|
|
1069
|
+
"type": "object",
|
|
1070
|
+
"properties": {
|
|
1071
|
+
"tool_config": {
|
|
1072
|
+
"type": "object",
|
|
1073
|
+
"description": "The full configuration of the tool to optimize."
|
|
1074
|
+
},
|
|
1075
|
+
"save_to_file": {
|
|
1076
|
+
"type": "boolean",
|
|
1077
|
+
"description": "If true, save the optimized description to a file (do not overwrite the original).",
|
|
1078
|
+
"default": false
|
|
1079
|
+
},
|
|
1080
|
+
"output_file": {
|
|
1081
|
+
"type": "string",
|
|
1082
|
+
"description": "Optional file path to save the optimized description. If not provided, use '<tool_name>_optimized_description.txt'."
|
|
1083
|
+
},
|
|
1084
|
+
"max_iterations": {
|
|
1085
|
+
"type": "integer",
|
|
1086
|
+
"description": "Maximum number of optimization rounds to perform.",
|
|
1087
|
+
"default": 3
|
|
1088
|
+
},
|
|
1089
|
+
"satisfaction_threshold": {
|
|
1090
|
+
"type": "number",
|
|
1091
|
+
"description": "Quality score threshold (1-10) to consider optimization satisfactory.",
|
|
1092
|
+
"default": 8
|
|
1093
|
+
}
|
|
1094
|
+
},
|
|
1095
|
+
"required": [
|
|
1096
|
+
"tool_config",
|
|
1097
|
+
"save_to_file",
|
|
1098
|
+
"output_file",
|
|
1099
|
+
"max_iterations",
|
|
1100
|
+
"satisfaction_threshold"
|
|
1101
|
+
]
|
|
1102
|
+
},
|
|
1103
|
+
"auto_load_dependencies": true,
|
|
1104
|
+
"fail_on_missing_tools": false,
|
|
1105
|
+
"required_tools": [
|
|
1106
|
+
"TestCaseGenerator",
|
|
1107
|
+
"DescriptionAnalyzer",
|
|
1108
|
+
"ArgumentDescriptionOptimizer",
|
|
1109
|
+
"DescriptionQualityEvaluator"
|
|
1110
|
+
],
|
|
1111
|
+
"composition_file": "tool_description_optimizer.py",
|
|
1112
|
+
"composition_function": "compose",
|
|
1113
|
+
"return_schema": {
|
|
1114
|
+
"type": "object",
|
|
1115
|
+
"properties": {
|
|
1116
|
+
"optimized_tool": {
|
|
1117
|
+
"type": "object",
|
|
1118
|
+
"description": "Tool with optimized descriptions"
|
|
1119
|
+
},
|
|
1120
|
+
"optimization_report": {
|
|
1121
|
+
"type": "object",
|
|
1122
|
+
"description": "Detailed optimization report",
|
|
1123
|
+
"properties": {
|
|
1124
|
+
"iterations_performed": {
|
|
1125
|
+
"type": "integer"
|
|
1126
|
+
},
|
|
1127
|
+
"final_quality_score": {
|
|
1128
|
+
"type": "number"
|
|
1129
|
+
},
|
|
1130
|
+
"improvements_made": {
|
|
1131
|
+
"type": "array",
|
|
1132
|
+
"items": {
|
|
1133
|
+
"type": "string"
|
|
1134
|
+
}
|
|
1135
|
+
},
|
|
1136
|
+
"saved_files": {
|
|
1137
|
+
"type": "array",
|
|
1138
|
+
"items": {
|
|
1139
|
+
"type": "string"
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
},
|
|
1145
|
+
"required": [
|
|
1146
|
+
"optimized_tool",
|
|
1147
|
+
"optimization_report"
|
|
1148
|
+
]
|
|
1149
|
+
}
|
|
1150
|
+
},
|
|
1151
|
+
{
|
|
1152
|
+
"type": "AgenticTool",
|
|
1153
|
+
"name": "PackageEvaluator",
|
|
1154
|
+
"description": "Evaluates and ranks Python packages based on requirements using LLM analysis",
|
|
1155
|
+
"prompt": "You are an expert Python package evaluator. Analyze packages and recommend the best fit.\n\n## REQUIREMENTS\n{requirements}\n\n## FUNCTIONALITY NEEDED\n{functionality}\n\n## PACKAGE CANDIDATES\n{candidates}\n\n## EVALUATION CRITERIA\n1. **Functionality Match**: Does it provide exactly what's needed?\n2. **Maturity**: Version number, last update, stability\n3. **Popularity**: Downloads, stars, community size\n4. **Maintenance**: Recent commits, active development\n5. **Documentation**: Quality and completeness\n6. **Dependencies**: Minimal, well-maintained dependencies\n7. **License**: Compatible with project needs\n8. **Performance**: Known for speed/efficiency\n9. **Compatibility**: Python version support\n10. **Ease of Use**: API simplicity, learning curve\n\n## ANALYSIS APPROACH\nFor each package:\n1. Match functionality against requirements\n2. Assess quality indicators (version, docs, maintenance)\n3. Identify strengths and weaknesses\n4. Calculate suitability score (0-100)\n5. Provide specific reasoning\n\n## RESPONSE FORMAT\nReturn JSON:\n{\n \"rankings\": [\n {\n \"name\": \"package-name\",\n \"score\": 95,\n \"strengths\": [\"specific strengths\"],\n \"weaknesses\": [\"specific weaknesses\"],\n \"functionality_match\": \"how well it matches requirements\",\n \"recommendation\": \"use|consider|avoid\",\n \"reasoning\": \"detailed explanation\",\n \"usage_example\": \"quick code snippet showing how to use it\"\n }\n ],\n \"top_recommendation\": {\n \"name\": \"best-package\",\n \"why\": \"clear explanation of why this is best\",\n \"alternatives\": [\"backup options\"]\n },\n \"implementation_guidance\": \"how to implement using recommended package\"\n}",
|
|
1156
|
+
"input_arguments": [
|
|
1157
|
+
"requirements",
|
|
1158
|
+
"functionality",
|
|
1159
|
+
"candidates"
|
|
1160
|
+
],
|
|
1161
|
+
"parameter": {
|
|
1162
|
+
"type": "object",
|
|
1163
|
+
"properties": {
|
|
1164
|
+
"requirements": {
|
|
1165
|
+
"type": "string",
|
|
1166
|
+
"description": "What the tool needs to accomplish"
|
|
1167
|
+
},
|
|
1168
|
+
"functionality": {
|
|
1169
|
+
"type": "string",
|
|
1170
|
+
"description": "Specific functions/features required"
|
|
1171
|
+
},
|
|
1172
|
+
"candidates": {
|
|
1173
|
+
"type": "string",
|
|
1174
|
+
"description": "JSON string of candidate packages with metadata"
|
|
1175
|
+
}
|
|
1176
|
+
},
|
|
1177
|
+
"required": [
|
|
1178
|
+
"requirements",
|
|
1179
|
+
"functionality",
|
|
1180
|
+
"candidates"
|
|
1181
|
+
]
|
|
1182
|
+
},
|
|
1183
|
+
"configs": {
|
|
1184
|
+
"api_type": "CHATGPT",
|
|
1185
|
+
"model_id": "gpt-5",
|
|
1186
|
+
"temperature": 1.0,
|
|
1187
|
+
"max_new_tokens": 4096,
|
|
1188
|
+
"return_json": true
|
|
1189
|
+
},
|
|
1190
|
+
"return_schema": {
|
|
1191
|
+
"type": "object",
|
|
1192
|
+
"properties": {
|
|
1193
|
+
"rankings": {
|
|
1194
|
+
"type": "array",
|
|
1195
|
+
"items": {
|
|
1196
|
+
"type": "object",
|
|
1197
|
+
"properties": {
|
|
1198
|
+
"name": {
|
|
1199
|
+
"type": "string"
|
|
1200
|
+
},
|
|
1201
|
+
"score": {
|
|
1202
|
+
"type": "number"
|
|
1203
|
+
},
|
|
1204
|
+
"strengths": {
|
|
1205
|
+
"type": "array",
|
|
1206
|
+
"items": {
|
|
1207
|
+
"type": "string"
|
|
1208
|
+
}
|
|
1209
|
+
},
|
|
1210
|
+
"weaknesses": {
|
|
1211
|
+
"type": "array",
|
|
1212
|
+
"items": {
|
|
1213
|
+
"type": "string"
|
|
1214
|
+
}
|
|
1215
|
+
},
|
|
1216
|
+
"functionality_match": {
|
|
1217
|
+
"type": "string"
|
|
1218
|
+
},
|
|
1219
|
+
"recommendation": {
|
|
1220
|
+
"type": "string"
|
|
1221
|
+
},
|
|
1222
|
+
"reasoning": {
|
|
1223
|
+
"type": "string"
|
|
1224
|
+
},
|
|
1225
|
+
"usage_example": {
|
|
1226
|
+
"type": "string"
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
},
|
|
1231
|
+
"top_recommendation": {
|
|
1232
|
+
"type": "object",
|
|
1233
|
+
"properties": {
|
|
1234
|
+
"name": {
|
|
1235
|
+
"type": "string"
|
|
1236
|
+
},
|
|
1237
|
+
"why": {
|
|
1238
|
+
"type": "string"
|
|
1239
|
+
},
|
|
1240
|
+
"alternatives": {
|
|
1241
|
+
"type": "array",
|
|
1242
|
+
"items": {
|
|
1243
|
+
"type": "string"
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
},
|
|
1248
|
+
"implementation_guidance": {
|
|
1249
|
+
"type": "string"
|
|
1250
|
+
}
|
|
1251
|
+
},
|
|
1252
|
+
"required": [
|
|
1253
|
+
"rankings",
|
|
1254
|
+
"top_recommendation",
|
|
1255
|
+
"implementation_guidance"
|
|
1256
|
+
]
|
|
1257
|
+
}
|
|
1258
|
+
},
|
|
1259
|
+
{
|
|
1260
|
+
"type": "AgenticTool",
|
|
1261
|
+
"name": "ImplementationDebugger",
|
|
1262
|
+
"description": "Analyzes failed implementation generation and suggests retry strategies",
|
|
1263
|
+
"prompt": "You are a debugging expert. Analyze the error and provide a fix.\n\n## ERROR INFO\nTool Name: {tool_name}\nTool Description: {tool_description}\nTool Parameters: {tool_parameters}\nImplementation Code: {implementation_code}\nRuntime Error: {runtime_error}\nTest Case: {test_case}\n\n## KEY POINTS TO REMEMBER\n\n### 🔍 Error Analysis Process\n- Identify the exact error type (ImportError, NameError, AttributeError, etc.)\n- Trace the error to its root cause\n- Understand the context and execution flow\n- Check for missing dependencies or imports\n- Look for data type mismatches or logic errors\n\n### 🛠️ Common Error Types & Solutions\n- **ImportError**: Missing packages, wrong import paths, circular imports\n- **NameError**: Undefined variables, scope issues, typos in variable names\n- **AttributeError**: Wrong object types, missing methods, API changes\n- **TypeError**: Wrong argument types, missing required parameters\n- **ValueError**: Invalid data values, parsing errors, validation failures\n- **KeyError**: Missing dictionary keys, wrong data structure access\n- **IndexError**: List/array bounds, empty collections\n- **FileNotFoundError**: Missing files, wrong paths, permission issues\n\n### 🎯 Fix Strategy\n- Provide specific, actionable code changes\n- Explain WHY the fix will work\n- Consider edge cases and potential side effects\n- Ensure the fix doesn't break other functionality\n- Test the fix with the given test case\n\n### ⚠️ What NOT to Do\n- Don't give generic advice without specific code changes\n- Don't ignore the context of the error\n- Don't suggest fixes that might break other parts\n- Don't assume the error is always in the obvious place\n- Don't provide fixes without understanding the root cause\n\n## RESPONSE FORMAT\nPlease respond with a valid JSON object:\n\n{\n \"error_analysis\": {\n \"error_type\": \"<type of error>\",\n \"root_cause\": \"<what caused the error>\",\n \"fix_needed\": \"<what needs to be fixed>\"\n },\n \"fix_recommendations\": [\n {\n \"description\": \"<what to fix>\",\n \"code_changes\": \"<specific changes>\",\n \"explanation\": \"<why this works>\"\n }\n ]\n}\n\nFix it!",
|
|
1264
|
+
"input_arguments": [
|
|
1265
|
+
"tool_description",
|
|
1266
|
+
"tool_parameters",
|
|
1267
|
+
"previous_attempts",
|
|
1268
|
+
"error_messages",
|
|
1269
|
+
"api_documentation_context"
|
|
1270
|
+
],
|
|
1271
|
+
"parameter": {
|
|
1272
|
+
"type": "object",
|
|
1273
|
+
"properties": {
|
|
1274
|
+
"tool_description": {
|
|
1275
|
+
"type": "string",
|
|
1276
|
+
"description": "Description of the tool that failed to generate"
|
|
1277
|
+
},
|
|
1278
|
+
"tool_parameters": {
|
|
1279
|
+
"type": "string",
|
|
1280
|
+
"description": "JSON string of tool parameters"
|
|
1281
|
+
},
|
|
1282
|
+
"previous_attempts": {
|
|
1283
|
+
"type": "string",
|
|
1284
|
+
"description": "JSON string of previous generation attempts and their issues"
|
|
1285
|
+
},
|
|
1286
|
+
"error_messages": {
|
|
1287
|
+
"type": "string",
|
|
1288
|
+
"description": "Error messages from failed attempts"
|
|
1289
|
+
},
|
|
1290
|
+
"api_documentation_context": {
|
|
1291
|
+
"type": "string",
|
|
1292
|
+
"description": "API documentation context from web search",
|
|
1293
|
+
"default": "{}"
|
|
1294
|
+
}
|
|
1295
|
+
},
|
|
1296
|
+
"required": [
|
|
1297
|
+
"tool_description",
|
|
1298
|
+
"tool_parameters"
|
|
1299
|
+
]
|
|
1300
|
+
},
|
|
1301
|
+
"configs": {
|
|
1302
|
+
"api_type": "CHATGPT",
|
|
1303
|
+
"model_id": "gpt-5",
|
|
1304
|
+
"temperature": 1.0,
|
|
1305
|
+
"max_new_tokens": 3072,
|
|
1306
|
+
"return_json": true
|
|
1307
|
+
},
|
|
1308
|
+
"return_schema": {
|
|
1309
|
+
"type": "object",
|
|
1310
|
+
"properties": {
|
|
1311
|
+
"failure_reason": {
|
|
1312
|
+
"type": "string"
|
|
1313
|
+
},
|
|
1314
|
+
"simplified_description": {
|
|
1315
|
+
"type": "string"
|
|
1316
|
+
},
|
|
1317
|
+
"retry_strategy": {
|
|
1318
|
+
"type": "string"
|
|
1319
|
+
},
|
|
1320
|
+
"better_search_terms": {
|
|
1321
|
+
"type": "array",
|
|
1322
|
+
"items": {
|
|
1323
|
+
"type": "string"
|
|
1324
|
+
}
|
|
1325
|
+
},
|
|
1326
|
+
"requirement_breakdown": {
|
|
1327
|
+
"type": "array",
|
|
1328
|
+
"items": {
|
|
1329
|
+
"type": "string"
|
|
1330
|
+
}
|
|
1331
|
+
},
|
|
1332
|
+
"implementation_hints": {
|
|
1333
|
+
"type": "string"
|
|
1334
|
+
}
|
|
1335
|
+
},
|
|
1336
|
+
"required": [
|
|
1337
|
+
"failure_reason",
|
|
1338
|
+
"simplified_description",
|
|
1339
|
+
"retry_strategy"
|
|
1340
|
+
]
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
]
|