tooluniverse 0.2.0__py3-none-any.whl ā 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info ā tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info ā tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info ā tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def compose(arguments, tooluniverse, call_tool):
|
|
7
|
+
tool_config = arguments["tool_config"]
|
|
8
|
+
tool_name = tool_config.get("name", "unnamed_tool")
|
|
9
|
+
arguments.get("save_to_file", False)
|
|
10
|
+
output_file = arguments.get("output_file")
|
|
11
|
+
max_iterations = arguments.get("max_iterations", 3) # Maximum optimization rounds
|
|
12
|
+
satisfaction_threshold = arguments.get(
|
|
13
|
+
"satisfaction_threshold", 8
|
|
14
|
+
) # Quality score threshold (1-10)
|
|
15
|
+
|
|
16
|
+
# 1. Generate test cases
|
|
17
|
+
tc_result = call_tool("TestCaseGenerator", {"tool_config": tool_config})
|
|
18
|
+
print("TestCaseGenerator result:", json.dumps(tc_result, indent=2))
|
|
19
|
+
|
|
20
|
+
# Handle the result - it should be a list of test cases or a dict containing test cases
|
|
21
|
+
test_cases = []
|
|
22
|
+
if isinstance(tc_result, list):
|
|
23
|
+
test_cases = tc_result
|
|
24
|
+
elif isinstance(tc_result, dict):
|
|
25
|
+
# Check if it has a 'result' key (from agentic tool)
|
|
26
|
+
if "result" in tc_result:
|
|
27
|
+
result_data = tc_result["result"]
|
|
28
|
+
if isinstance(result_data, list):
|
|
29
|
+
test_cases = result_data
|
|
30
|
+
elif isinstance(result_data, str):
|
|
31
|
+
# Try to parse JSON string with robust whitespace handling
|
|
32
|
+
try:
|
|
33
|
+
# Multiple parsing strategies for robust handling
|
|
34
|
+
strategies = [
|
|
35
|
+
result_data.strip(), # Simple strip
|
|
36
|
+
re.sub(r"\s+", " ", result_data.strip()), # Collapse whitespace
|
|
37
|
+
re.sub(r"\s", "", result_data), # Remove all whitespace
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
for strategy in strategies:
|
|
41
|
+
try:
|
|
42
|
+
parsed_result = json.loads(strategy)
|
|
43
|
+
if isinstance(parsed_result, list):
|
|
44
|
+
test_cases = parsed_result
|
|
45
|
+
break
|
|
46
|
+
elif isinstance(parsed_result, dict):
|
|
47
|
+
test_cases = parsed_result.get("test_cases", [])
|
|
48
|
+
break
|
|
49
|
+
except json.JSONDecodeError:
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
# If direct parsing fails, try pattern matching
|
|
53
|
+
if not test_cases:
|
|
54
|
+
json_patterns = [
|
|
55
|
+
r"\[.*?\]", # Array pattern
|
|
56
|
+
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Single object
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
for strategy in strategies:
|
|
60
|
+
for pattern in json_patterns:
|
|
61
|
+
matches = re.findall(pattern, strategy, re.DOTALL)
|
|
62
|
+
for match in matches:
|
|
63
|
+
try:
|
|
64
|
+
parsed_result = json.loads(match)
|
|
65
|
+
if isinstance(parsed_result, list):
|
|
66
|
+
test_cases = parsed_result
|
|
67
|
+
break
|
|
68
|
+
elif isinstance(parsed_result, dict):
|
|
69
|
+
test_cases = [parsed_result]
|
|
70
|
+
break
|
|
71
|
+
except json.JSONDecodeError:
|
|
72
|
+
continue
|
|
73
|
+
if test_cases:
|
|
74
|
+
break
|
|
75
|
+
if test_cases:
|
|
76
|
+
break
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"Failed to parse test cases from result: {e}")
|
|
79
|
+
test_cases = []
|
|
80
|
+
else:
|
|
81
|
+
test_cases = (
|
|
82
|
+
result_data.get("test_cases", [])
|
|
83
|
+
if isinstance(result_data, dict)
|
|
84
|
+
else []
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
test_cases = tc_result.get("test_cases", [])
|
|
88
|
+
|
|
89
|
+
# If we still don't have test cases, generate some basic ones from the tool config
|
|
90
|
+
if not test_cases:
|
|
91
|
+
print("No valid test cases found, generating basic test cases from tool config")
|
|
92
|
+
tool_params = tool_config.get("parameter", {}).get("properties", {})
|
|
93
|
+
required_params = []
|
|
94
|
+
|
|
95
|
+
# Extract required parameters correctly
|
|
96
|
+
if "parameter" in tool_config and "properties" in tool_config["parameter"]:
|
|
97
|
+
properties = tool_config["parameter"]["properties"]
|
|
98
|
+
for param_name, param_info in properties.items():
|
|
99
|
+
if param_info.get("required", False):
|
|
100
|
+
required_params.append(param_name)
|
|
101
|
+
|
|
102
|
+
# If no explicitly required params found, check if there's a 'required' field at the parameter level
|
|
103
|
+
if not required_params and "required" in tool_config["parameter"]:
|
|
104
|
+
required_params = tool_config["parameter"]["required"]
|
|
105
|
+
|
|
106
|
+
# Generate a basic test case with required parameters
|
|
107
|
+
if required_params and tool_params:
|
|
108
|
+
basic_case = {}
|
|
109
|
+
for param in required_params:
|
|
110
|
+
if param in tool_params:
|
|
111
|
+
param_type = tool_params[param].get("type", "string")
|
|
112
|
+
if param_type == "string":
|
|
113
|
+
basic_case[param] = f"test_{param}_value"
|
|
114
|
+
elif param_type == "integer":
|
|
115
|
+
basic_case[param] = 10
|
|
116
|
+
elif param_type == "boolean":
|
|
117
|
+
basic_case[param] = True
|
|
118
|
+
else:
|
|
119
|
+
basic_case[param] = "test_value"
|
|
120
|
+
if basic_case:
|
|
121
|
+
test_cases = [basic_case]
|
|
122
|
+
|
|
123
|
+
# If still no test cases, create a minimal one with available params
|
|
124
|
+
if not test_cases and tool_params:
|
|
125
|
+
basic_case = {}
|
|
126
|
+
for param_name, param_info in list(tool_params.items())[
|
|
127
|
+
:1
|
|
128
|
+
]: # Take first param
|
|
129
|
+
param_type = param_info.get("type", "string")
|
|
130
|
+
if param_type == "string":
|
|
131
|
+
basic_case[param_name] = f"test_{param_name}_value"
|
|
132
|
+
elif param_type == "integer":
|
|
133
|
+
basic_case[param_name] = 10
|
|
134
|
+
elif param_type == "boolean":
|
|
135
|
+
basic_case[param_name] = True
|
|
136
|
+
else:
|
|
137
|
+
basic_case[param_name] = "test_value"
|
|
138
|
+
if basic_case:
|
|
139
|
+
test_cases = [basic_case]
|
|
140
|
+
|
|
141
|
+
if not test_cases:
|
|
142
|
+
return {
|
|
143
|
+
"error": "No test cases generated and could not create basic test cases.",
|
|
144
|
+
"raw_result": tc_result,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# 2. Run tool on each test case
|
|
148
|
+
results = []
|
|
149
|
+
for case in test_cases:
|
|
150
|
+
try:
|
|
151
|
+
# If case is a full tool call dict with 'name' and 'arguments', extract arguments
|
|
152
|
+
if isinstance(case, dict) and "arguments" in case:
|
|
153
|
+
arguments = case["arguments"]
|
|
154
|
+
elif isinstance(case, dict):
|
|
155
|
+
# If case is already just the arguments
|
|
156
|
+
arguments = case
|
|
157
|
+
else:
|
|
158
|
+
arguments = case
|
|
159
|
+
|
|
160
|
+
result = tooluniverse.run_one_function(
|
|
161
|
+
{"name": tool_name, "arguments": arguments}
|
|
162
|
+
)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
result = {"error": str(e)}
|
|
165
|
+
results.append({"input": arguments, "output": result})
|
|
166
|
+
|
|
167
|
+
# 3. Multi-round optimization until satisfactory
|
|
168
|
+
current_tool_config = tool_config.copy()
|
|
169
|
+
original_description = tool_config.get("description", "")
|
|
170
|
+
optimization_history = []
|
|
171
|
+
previous_feedback = "" # Track previous round feedback
|
|
172
|
+
all_test_results = results.copy() # Accumulate test results from all rounds
|
|
173
|
+
|
|
174
|
+
for iteration in range(max_iterations):
|
|
175
|
+
print(f"\n=== Optimization Round {iteration + 1}/{max_iterations} ===")
|
|
176
|
+
|
|
177
|
+
current_description = current_tool_config.get("description", "")
|
|
178
|
+
|
|
179
|
+
# 3a. Generate additional test cases based on previous feedback (after first round)
|
|
180
|
+
current_round_results = []
|
|
181
|
+
if iteration > 0 and previous_feedback:
|
|
182
|
+
print("š§Ŗ Generating additional test cases based on previous feedback...")
|
|
183
|
+
try:
|
|
184
|
+
# Create an enhanced TestCaseGenerator prompt that includes previous feedback
|
|
185
|
+
enhanced_tool_config = current_tool_config.copy()
|
|
186
|
+
enhanced_tool_config["_optimization_feedback"] = previous_feedback
|
|
187
|
+
enhanced_tool_config["_iteration"] = iteration + 1
|
|
188
|
+
|
|
189
|
+
new_tc_result = call_tool(
|
|
190
|
+
"TestCaseGenerator", {"tool_config": enhanced_tool_config}
|
|
191
|
+
)
|
|
192
|
+
print(
|
|
193
|
+
f"Additional TestCaseGenerator result: {json.dumps(new_tc_result, indent=2)}"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Parse new test cases with robust whitespace handling
|
|
197
|
+
new_test_cases = []
|
|
198
|
+
if isinstance(new_tc_result, dict) and "result" in new_tc_result:
|
|
199
|
+
result_data = new_tc_result["result"]
|
|
200
|
+
if isinstance(result_data, str):
|
|
201
|
+
# Aggressive cleaning of whitespace and newlines
|
|
202
|
+
cleaned_result = re.sub(r"\s+", " ", result_data.strip())
|
|
203
|
+
# Remove all whitespace and newlines completely for pure JSON detection
|
|
204
|
+
minimal_result = re.sub(r"\s", "", result_data)
|
|
205
|
+
|
|
206
|
+
# Try multiple parsing strategies
|
|
207
|
+
parsing_strategies = [
|
|
208
|
+
cleaned_result, # Whitespace-collapsed version
|
|
209
|
+
minimal_result, # All whitespace removed
|
|
210
|
+
result_data.strip(), # Simple strip
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
# Look for JSON array patterns
|
|
214
|
+
json_patterns = [
|
|
215
|
+
r"\[.*?\]", # Array pattern
|
|
216
|
+
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Single object
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
for strategy in parsing_strategies:
|
|
220
|
+
# Try direct parsing first
|
|
221
|
+
try:
|
|
222
|
+
parsed_result = json.loads(strategy)
|
|
223
|
+
if isinstance(parsed_result, list):
|
|
224
|
+
new_test_cases = parsed_result
|
|
225
|
+
break
|
|
226
|
+
elif isinstance(parsed_result, dict):
|
|
227
|
+
new_test_cases = [parsed_result]
|
|
228
|
+
break
|
|
229
|
+
except json.JSONDecodeError:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
# Try pattern matching if direct parsing fails
|
|
233
|
+
if not new_test_cases:
|
|
234
|
+
for pattern in json_patterns:
|
|
235
|
+
matches = re.findall(pattern, strategy, re.DOTALL)
|
|
236
|
+
for match in matches:
|
|
237
|
+
try:
|
|
238
|
+
parsed_result = json.loads(match)
|
|
239
|
+
if isinstance(parsed_result, list):
|
|
240
|
+
new_test_cases = parsed_result
|
|
241
|
+
break
|
|
242
|
+
elif isinstance(parsed_result, dict):
|
|
243
|
+
new_test_cases = [parsed_result]
|
|
244
|
+
break
|
|
245
|
+
except json.JSONDecodeError:
|
|
246
|
+
continue
|
|
247
|
+
if new_test_cases:
|
|
248
|
+
break
|
|
249
|
+
if new_test_cases:
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
if not new_test_cases:
|
|
253
|
+
print(
|
|
254
|
+
f"Failed to parse new test cases from: {result_data[:200]}..."
|
|
255
|
+
)
|
|
256
|
+
elif isinstance(result_data, list):
|
|
257
|
+
new_test_cases = result_data
|
|
258
|
+
|
|
259
|
+
# Run new test cases
|
|
260
|
+
if new_test_cases:
|
|
261
|
+
print(f"š Running {len(new_test_cases)} additional test cases...")
|
|
262
|
+
for case in new_test_cases:
|
|
263
|
+
try:
|
|
264
|
+
if isinstance(case, dict) and "arguments" in case:
|
|
265
|
+
arguments = case["arguments"]
|
|
266
|
+
elif isinstance(case, dict):
|
|
267
|
+
arguments = case
|
|
268
|
+
else:
|
|
269
|
+
arguments = case
|
|
270
|
+
|
|
271
|
+
result = tooluniverse.run_one_function(
|
|
272
|
+
{"name": tool_name, "arguments": arguments}
|
|
273
|
+
)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
result = {"error": str(e)}
|
|
276
|
+
current_round_results.append(
|
|
277
|
+
{"input": arguments, "output": result}
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Add new results to accumulated results
|
|
281
|
+
all_test_results.extend(current_round_results)
|
|
282
|
+
print(f"ā
Added {len(current_round_results)} new test results")
|
|
283
|
+
else:
|
|
284
|
+
print("ā ļø No additional test cases generated")
|
|
285
|
+
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print(f"ā Failed to generate additional test cases: {str(e)}")
|
|
288
|
+
|
|
289
|
+
# 3b. Analyze results and suggest optimized description using ALL accumulated test results
|
|
290
|
+
# Include previous feedback for iterative improvement
|
|
291
|
+
analysis_input = {
|
|
292
|
+
"original_description": current_description,
|
|
293
|
+
"test_results": json.dumps(
|
|
294
|
+
all_test_results
|
|
295
|
+
), # Use ALL accumulated test results
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# Add previous feedback to help guide the next optimization
|
|
299
|
+
if previous_feedback and iteration > 0:
|
|
300
|
+
enhanced_description = f"{current_description}\n\nPrevious optimization feedback: {previous_feedback}"
|
|
301
|
+
analysis_input["original_description"] = enhanced_description
|
|
302
|
+
|
|
303
|
+
analysis = call_tool("DescriptionAnalyzer", analysis_input)
|
|
304
|
+
|
|
305
|
+
# Handle the analysis result
|
|
306
|
+
optimized_description = None
|
|
307
|
+
rationale = None
|
|
308
|
+
|
|
309
|
+
if isinstance(analysis, dict):
|
|
310
|
+
if "result" in analysis:
|
|
311
|
+
# If it's wrapped in a result key
|
|
312
|
+
result_data = analysis["result"]
|
|
313
|
+
if isinstance(result_data, str):
|
|
314
|
+
try:
|
|
315
|
+
parsed_analysis = json.loads(result_data)
|
|
316
|
+
optimized_description = parsed_analysis.get(
|
|
317
|
+
"optimized_description"
|
|
318
|
+
)
|
|
319
|
+
rationale = parsed_analysis.get("rationale")
|
|
320
|
+
except json.JSONDecodeError:
|
|
321
|
+
optimized_description = result_data
|
|
322
|
+
rationale = "Parsed from raw text result"
|
|
323
|
+
elif isinstance(result_data, dict):
|
|
324
|
+
optimized_description = result_data.get("optimized_description")
|
|
325
|
+
rationale = result_data.get("rationale")
|
|
326
|
+
else:
|
|
327
|
+
# Direct dict result
|
|
328
|
+
optimized_description = analysis.get("optimized_description")
|
|
329
|
+
rationale = analysis.get("rationale")
|
|
330
|
+
elif isinstance(analysis, str):
|
|
331
|
+
optimized_description = analysis
|
|
332
|
+
rationale = "Generated from string result"
|
|
333
|
+
|
|
334
|
+
# Fallback if we still don't have an optimized description
|
|
335
|
+
if not optimized_description:
|
|
336
|
+
optimized_description = f"Enhanced description: {current_description} (Based on test results analysis)"
|
|
337
|
+
rationale = (
|
|
338
|
+
"Generated fallback description based on original and test results"
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# 3c. Optimize argument descriptions using ALL accumulated test results
|
|
342
|
+
optimized_parameters = {}
|
|
343
|
+
argument_rationale = ""
|
|
344
|
+
|
|
345
|
+
if (
|
|
346
|
+
"parameter" in current_tool_config
|
|
347
|
+
and "properties" in current_tool_config["parameter"]
|
|
348
|
+
):
|
|
349
|
+
try:
|
|
350
|
+
# Include previous feedback for parameter optimization too
|
|
351
|
+
arg_analysis_input = {
|
|
352
|
+
"parameter_schema": json.dumps(current_tool_config["parameter"]),
|
|
353
|
+
"test_results": json.dumps(
|
|
354
|
+
all_test_results
|
|
355
|
+
), # Use ALL accumulated test results
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
# Add previous feedback to parameter optimization
|
|
359
|
+
if previous_feedback and iteration > 0:
|
|
360
|
+
# Extract parameter-specific feedback from previous round
|
|
361
|
+
param_feedback = (
|
|
362
|
+
f"Previous feedback for improvement: {previous_feedback}"
|
|
363
|
+
)
|
|
364
|
+
enhanced_schema = current_tool_config["parameter"].copy()
|
|
365
|
+
enhanced_schema["_previous_feedback"] = param_feedback
|
|
366
|
+
arg_analysis_input["parameter_schema"] = json.dumps(enhanced_schema)
|
|
367
|
+
|
|
368
|
+
arg_analysis = call_tool(
|
|
369
|
+
"ArgumentDescriptionOptimizer", arg_analysis_input
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Parse argument optimization results
|
|
373
|
+
if isinstance(arg_analysis, dict):
|
|
374
|
+
if "result" in arg_analysis:
|
|
375
|
+
result_data = arg_analysis["result"]
|
|
376
|
+
if isinstance(result_data, str):
|
|
377
|
+
try:
|
|
378
|
+
parsed_arg_analysis = json.loads(result_data)
|
|
379
|
+
raw_params = parsed_arg_analysis.get(
|
|
380
|
+
"optimized_parameters", {}
|
|
381
|
+
)
|
|
382
|
+
# Extract description strings from the result structure
|
|
383
|
+
optimized_parameters = {}
|
|
384
|
+
for param_name, param_data in raw_params.items():
|
|
385
|
+
if (
|
|
386
|
+
isinstance(param_data, dict)
|
|
387
|
+
and "description" in param_data
|
|
388
|
+
):
|
|
389
|
+
optimized_parameters[param_name] = param_data[
|
|
390
|
+
"description"
|
|
391
|
+
]
|
|
392
|
+
elif isinstance(param_data, str):
|
|
393
|
+
optimized_parameters[param_name] = param_data
|
|
394
|
+
else:
|
|
395
|
+
optimized_parameters[param_name] = str(
|
|
396
|
+
param_data
|
|
397
|
+
)
|
|
398
|
+
argument_rationale = parsed_arg_analysis.get(
|
|
399
|
+
"rationale", ""
|
|
400
|
+
)
|
|
401
|
+
except json.JSONDecodeError:
|
|
402
|
+
print("Failed to parse argument optimization result")
|
|
403
|
+
elif isinstance(result_data, dict):
|
|
404
|
+
raw_params = result_data.get("optimized_parameters", {})
|
|
405
|
+
# Extract description strings from the result structure
|
|
406
|
+
optimized_parameters = {}
|
|
407
|
+
for param_name, param_data in raw_params.items():
|
|
408
|
+
if (
|
|
409
|
+
isinstance(param_data, dict)
|
|
410
|
+
and "description" in param_data
|
|
411
|
+
):
|
|
412
|
+
optimized_parameters[param_name] = param_data[
|
|
413
|
+
"description"
|
|
414
|
+
]
|
|
415
|
+
elif isinstance(param_data, str):
|
|
416
|
+
optimized_parameters[param_name] = param_data
|
|
417
|
+
else:
|
|
418
|
+
optimized_parameters[param_name] = str(param_data)
|
|
419
|
+
argument_rationale = result_data.get("rationale", "")
|
|
420
|
+
else:
|
|
421
|
+
raw_params = arg_analysis.get("optimized_parameters", {})
|
|
422
|
+
# Extract description strings from the result structure
|
|
423
|
+
optimized_parameters = {}
|
|
424
|
+
for param_name, param_data in raw_params.items():
|
|
425
|
+
if (
|
|
426
|
+
isinstance(param_data, dict)
|
|
427
|
+
and "description" in param_data
|
|
428
|
+
):
|
|
429
|
+
optimized_parameters[param_name] = param_data[
|
|
430
|
+
"description"
|
|
431
|
+
]
|
|
432
|
+
elif isinstance(param_data, str):
|
|
433
|
+
optimized_parameters[param_name] = param_data
|
|
434
|
+
else:
|
|
435
|
+
optimized_parameters[param_name] = str(param_data)
|
|
436
|
+
argument_rationale = arg_analysis.get("rationale", "")
|
|
437
|
+
|
|
438
|
+
except Exception as e:
|
|
439
|
+
print(f"Failed to optimize argument descriptions: {str(e)}")
|
|
440
|
+
argument_rationale = (
|
|
441
|
+
f"Failed to optimize argument descriptions: {str(e)}"
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# 3d. Update current tool config with optimizations
|
|
445
|
+
current_tool_config["description"] = optimized_description
|
|
446
|
+
if (
|
|
447
|
+
optimized_parameters
|
|
448
|
+
and "parameter" in current_tool_config
|
|
449
|
+
and "properties" in current_tool_config["parameter"]
|
|
450
|
+
):
|
|
451
|
+
for param_name, new_description in optimized_parameters.items():
|
|
452
|
+
if param_name in current_tool_config["parameter"]["properties"]:
|
|
453
|
+
current_tool_config["parameter"]["properties"][param_name][
|
|
454
|
+
"description"
|
|
455
|
+
] = new_description
|
|
456
|
+
|
|
457
|
+
# 3e. Evaluate quality of current optimization using ALL accumulated test results
|
|
458
|
+
try:
|
|
459
|
+
quality_evaluation = call_tool(
|
|
460
|
+
"DescriptionQualityEvaluator",
|
|
461
|
+
{
|
|
462
|
+
"tool_description": optimized_description,
|
|
463
|
+
"parameter_descriptions": json.dumps(optimized_parameters),
|
|
464
|
+
"test_results": json.dumps(
|
|
465
|
+
all_test_results
|
|
466
|
+
), # Use ALL accumulated test results
|
|
467
|
+
},
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Parse quality evaluation result
|
|
471
|
+
quality_score = 0
|
|
472
|
+
is_satisfactory = False
|
|
473
|
+
feedback = ""
|
|
474
|
+
criteria_scores = {}
|
|
475
|
+
|
|
476
|
+
if isinstance(quality_evaluation, dict):
|
|
477
|
+
if "result" in quality_evaluation:
|
|
478
|
+
result_data = quality_evaluation["result"]
|
|
479
|
+
if isinstance(result_data, str):
|
|
480
|
+
try:
|
|
481
|
+
parsed_eval = json.loads(result_data)
|
|
482
|
+
quality_score = parsed_eval.get("overall_score", 0)
|
|
483
|
+
is_satisfactory = parsed_eval.get("is_satisfactory", False)
|
|
484
|
+
feedback = parsed_eval.get("feedback", "")
|
|
485
|
+
criteria_scores = parsed_eval.get("criteria_scores", {})
|
|
486
|
+
except json.JSONDecodeError:
|
|
487
|
+
quality_score = 5 # Default middle score
|
|
488
|
+
feedback = "Failed to parse evaluation result"
|
|
489
|
+
elif isinstance(result_data, dict):
|
|
490
|
+
quality_score = result_data.get("overall_score", 0)
|
|
491
|
+
is_satisfactory = result_data.get("is_satisfactory", False)
|
|
492
|
+
feedback = result_data.get("feedback", "")
|
|
493
|
+
criteria_scores = result_data.get("criteria_scores", {})
|
|
494
|
+
else:
|
|
495
|
+
quality_score = quality_evaluation.get("overall_score", 0)
|
|
496
|
+
is_satisfactory = quality_evaluation.get("is_satisfactory", False)
|
|
497
|
+
feedback = quality_evaluation.get("feedback", "")
|
|
498
|
+
criteria_scores = quality_evaluation.get("criteria_scores", {})
|
|
499
|
+
|
|
500
|
+
except Exception as e:
|
|
501
|
+
print(f"Failed to evaluate quality: {str(e)}")
|
|
502
|
+
quality_score = 5 # Default middle score
|
|
503
|
+
is_satisfactory = quality_score >= satisfaction_threshold
|
|
504
|
+
feedback = f"Quality evaluation failed: {str(e)}"
|
|
505
|
+
criteria_scores = {}
|
|
506
|
+
|
|
507
|
+
# Record this iteration
|
|
508
|
+
iteration_record = {
|
|
509
|
+
"iteration": iteration + 1,
|
|
510
|
+
"description": optimized_description,
|
|
511
|
+
"parameters": optimized_parameters.copy(),
|
|
512
|
+
"description_rationale": rationale,
|
|
513
|
+
"argument_rationale": argument_rationale,
|
|
514
|
+
"quality_score": quality_score,
|
|
515
|
+
"criteria_scores": criteria_scores,
|
|
516
|
+
"feedback": feedback,
|
|
517
|
+
"is_satisfactory": is_satisfactory,
|
|
518
|
+
}
|
|
519
|
+
optimization_history.append(iteration_record)
|
|
520
|
+
|
|
521
|
+
print(f"Quality Score: {quality_score}/10")
|
|
522
|
+
print(f"Satisfactory: {is_satisfactory}")
|
|
523
|
+
print(f"Feedback: {feedback}")
|
|
524
|
+
|
|
525
|
+
# Store current feedback for next iteration
|
|
526
|
+
previous_feedback = str(
|
|
527
|
+
feedback
|
|
528
|
+
) # Convert to string to ensure it's serializable
|
|
529
|
+
|
|
530
|
+
# Check if we've reached satisfactory quality
|
|
531
|
+
if is_satisfactory or quality_score >= satisfaction_threshold:
|
|
532
|
+
print(f"ā
Reached satisfactory quality in round {iteration + 1}")
|
|
533
|
+
break
|
|
534
|
+
elif iteration < max_iterations - 1:
|
|
535
|
+
print(f"š Quality not satisfactory, continuing to round {iteration + 2}")
|
|
536
|
+
feedback_preview = (
|
|
537
|
+
previous_feedback[:100] + "..."
|
|
538
|
+
if len(previous_feedback) > 100
|
|
539
|
+
else previous_feedback
|
|
540
|
+
)
|
|
541
|
+
print(f"š Using feedback for next round: {feedback_preview}")
|
|
542
|
+
else:
|
|
543
|
+
print("ā ļø Reached maximum iterations without achieving satisfactory quality")
|
|
544
|
+
|
|
545
|
+
# Use the final optimized configuration
|
|
546
|
+
final_optimized_tool_config = current_tool_config
|
|
547
|
+
final_description = current_tool_config.get("description", "")
|
|
548
|
+
final_parameters = {}
|
|
549
|
+
final_rationale = (
|
|
550
|
+
optimization_history[-1]["description_rationale"]
|
|
551
|
+
if optimization_history
|
|
552
|
+
else "No optimization performed"
|
|
553
|
+
)
|
|
554
|
+
final_argument_rationale = (
|
|
555
|
+
optimization_history[-1]["argument_rationale"] if optimization_history else ""
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
# Extract final parameter descriptions
|
|
559
|
+
if (
|
|
560
|
+
"parameter" in final_optimized_tool_config
|
|
561
|
+
and "properties" in final_optimized_tool_config["parameter"]
|
|
562
|
+
):
|
|
563
|
+
for param_name, param_info in final_optimized_tool_config["parameter"][
|
|
564
|
+
"properties"
|
|
565
|
+
].items():
|
|
566
|
+
final_parameters[param_name] = param_info.get("description", "")
|
|
567
|
+
|
|
568
|
+
# Print final optimization results
|
|
569
|
+
print("\n" + "=" * 80)
|
|
570
|
+
print("š OPTIMIZATION COMPLETED!")
|
|
571
|
+
print("=" * 80)
|
|
572
|
+
print("\nš Final Results Summary:")
|
|
573
|
+
print(f" ⢠Total optimization rounds: {len(optimization_history)}")
|
|
574
|
+
print(
|
|
575
|
+
f" ⢠Final quality score: {optimization_history[-1]['quality_score'] if optimization_history else 0}/10"
|
|
576
|
+
)
|
|
577
|
+
print(
|
|
578
|
+
f" ⢠Achieved satisfaction: {optimization_history[-1]['is_satisfactory'] if optimization_history else False}"
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
print("\n⨠Final Optimized Tool Configuration:")
|
|
582
|
+
print(json.dumps(final_optimized_tool_config, indent=2, ensure_ascii=False))
|
|
583
|
+
|
|
584
|
+
# 4. Save the optimized description to a file (always save, regardless of save_to_file flag)
|
|
585
|
+
file_path = None
|
|
586
|
+
if final_description:
|
|
587
|
+
if not output_file:
|
|
588
|
+
file_path = f"{tool_name}_optimized_description.txt"
|
|
589
|
+
else:
|
|
590
|
+
file_path = output_file
|
|
591
|
+
|
|
592
|
+
# Create directory if it doesn't exist (only if there's a directory part)
|
|
593
|
+
dir_path = os.path.dirname(file_path)
|
|
594
|
+
if dir_path:
|
|
595
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
596
|
+
|
|
597
|
+
# Do not overwrite if file exists
|
|
598
|
+
if os.path.exists(file_path):
|
|
599
|
+
base, ext = os.path.splitext(file_path)
|
|
600
|
+
file_path = f"{base}_new{ext}"
|
|
601
|
+
|
|
602
|
+
print(f"\nš¾ Saving optimization report to: {file_path}")
|
|
603
|
+
|
|
604
|
+
# Save comprehensive optimization report
|
|
605
|
+
optimization_report = {
|
|
606
|
+
"original_tool_config": tool_config,
|
|
607
|
+
"final_optimized_tool_config": final_optimized_tool_config,
|
|
608
|
+
"optimization_history": optimization_history,
|
|
609
|
+
"optimization_summary": {
|
|
610
|
+
"total_iterations": len(optimization_history),
|
|
611
|
+
"final_description_changed": final_description != original_description,
|
|
612
|
+
"final_parameters_optimized": (
|
|
613
|
+
list(final_parameters.keys()) if final_parameters else []
|
|
614
|
+
),
|
|
615
|
+
"final_description_rationale": final_rationale,
|
|
616
|
+
"final_argument_rationale": final_argument_rationale,
|
|
617
|
+
"final_quality_score": (
|
|
618
|
+
optimization_history[-1]["quality_score"]
|
|
619
|
+
if optimization_history
|
|
620
|
+
else 0
|
|
621
|
+
),
|
|
622
|
+
"achieved_satisfaction": (
|
|
623
|
+
optimization_history[-1]["is_satisfactory"]
|
|
624
|
+
if optimization_history
|
|
625
|
+
else False
|
|
626
|
+
),
|
|
627
|
+
},
|
|
628
|
+
"test_results": results,
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
632
|
+
f.write("# Multi-Round Tool Description Optimization Report\n\n")
|
|
633
|
+
f.write(f"## Final Optimized Tool Description\n{final_description}\n\n")
|
|
634
|
+
if final_parameters:
|
|
635
|
+
f.write("## Final Optimized Parameter Descriptions\n")
|
|
636
|
+
for param_name, new_desc in final_parameters.items():
|
|
637
|
+
f.write(f"- **{param_name}**: {new_desc}\n")
|
|
638
|
+
f.write("\n")
|
|
639
|
+
f.write(f"## Final Description Rationale\n{final_rationale}\n\n")
|
|
640
|
+
if final_argument_rationale:
|
|
641
|
+
f.write(
|
|
642
|
+
f"## Final Argument Optimization Rationale\n{final_argument_rationale}\n\n"
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
# Write optimization history
|
|
646
|
+
f.write("## Optimization History\n")
|
|
647
|
+
for _i, record in enumerate(optimization_history):
|
|
648
|
+
f.write(f"### Round {record['iteration']}\n")
|
|
649
|
+
f.write(f"- **Quality Score**: {record['quality_score']}/10\n")
|
|
650
|
+
f.write(f"- **Satisfactory**: {record['is_satisfactory']}\n")
|
|
651
|
+
f.write(f"- **Description**: {record['description']}\n")
|
|
652
|
+
f.write(f"- **Feedback**: {record['feedback']}\n\n")
|
|
653
|
+
|
|
654
|
+
f.write("## Complete Optimization Report\n")
|
|
655
|
+
f.write("```json\n")
|
|
656
|
+
f.write(json.dumps(optimization_report, indent=2))
|
|
657
|
+
f.write("\n```\n")
|
|
658
|
+
|
|
659
|
+
print(f"ā
Optimization report saved successfully to: {file_path}")
|
|
660
|
+
else:
|
|
661
|
+
print("ā ļø No optimized description to save")
|
|
662
|
+
|
|
663
|
+
return {
|
|
664
|
+
"optimized_description": final_description,
|
|
665
|
+
"optimized_parameters": final_parameters,
|
|
666
|
+
"optimized_tool_config": final_optimized_tool_config,
|
|
667
|
+
"rationale": final_rationale,
|
|
668
|
+
"argument_rationale": final_argument_rationale,
|
|
669
|
+
"optimization_history": optimization_history,
|
|
670
|
+
"total_iterations": len(optimization_history),
|
|
671
|
+
"final_quality_score": (
|
|
672
|
+
optimization_history[-1]["quality_score"] if optimization_history else 0
|
|
673
|
+
),
|
|
674
|
+
"achieved_satisfaction": (
|
|
675
|
+
optimization_history[-1]["is_satisfactory"]
|
|
676
|
+
if optimization_history
|
|
677
|
+
else False
|
|
678
|
+
),
|
|
679
|
+
"test_results": results,
|
|
680
|
+
"saved_to": file_path if final_description else None,
|
|
681
|
+
}
|