tooluniverse 1.0.10__py3-none-any.whl โ†’ 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (150) hide show
  1. tooluniverse/__init__.py +57 -1
  2. tooluniverse/blast_tool.py +132 -0
  3. tooluniverse/boltz_tool.py +2 -2
  4. tooluniverse/cbioportal_tool.py +42 -0
  5. tooluniverse/clinvar_tool.py +268 -74
  6. tooluniverse/compose_scripts/tool_discover.py +1941 -443
  7. tooluniverse/data/agentic_tools.json +0 -370
  8. tooluniverse/data/alphafold_tools.json +6 -6
  9. tooluniverse/data/blast_tools.json +112 -0
  10. tooluniverse/data/cbioportal_tools.json +87 -0
  11. tooluniverse/data/clinvar_tools.json +235 -0
  12. tooluniverse/data/compose_tools.json +0 -89
  13. tooluniverse/data/dbsnp_tools.json +275 -0
  14. tooluniverse/data/emdb_tools.json +61 -0
  15. tooluniverse/data/ensembl_tools.json +259 -0
  16. tooluniverse/data/file_download_tools.json +275 -0
  17. tooluniverse/data/geo_tools.json +200 -48
  18. tooluniverse/data/gnomad_tools.json +109 -0
  19. tooluniverse/data/gtopdb_tools.json +68 -0
  20. tooluniverse/data/gwas_tools.json +32 -0
  21. tooluniverse/data/interpro_tools.json +199 -0
  22. tooluniverse/data/jaspar_tools.json +70 -0
  23. tooluniverse/data/kegg_tools.json +356 -0
  24. tooluniverse/data/mpd_tools.json +87 -0
  25. tooluniverse/data/ols_tools.json +314 -0
  26. tooluniverse/data/package_discovery_tools.json +64 -0
  27. tooluniverse/data/packages/categorized_tools.txt +0 -1
  28. tooluniverse/data/packages/machine_learning_tools.json +0 -47
  29. tooluniverse/data/paleobiology_tools.json +91 -0
  30. tooluniverse/data/pride_tools.json +62 -0
  31. tooluniverse/data/pypi_package_inspector_tools.json +158 -0
  32. tooluniverse/data/python_executor_tools.json +341 -0
  33. tooluniverse/data/regulomedb_tools.json +50 -0
  34. tooluniverse/data/remap_tools.json +89 -0
  35. tooluniverse/data/screen_tools.json +89 -0
  36. tooluniverse/data/tool_discovery_agents.json +428 -0
  37. tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
  38. tooluniverse/data/uniprot_tools.json +77 -0
  39. tooluniverse/data/web_search_tools.json +250 -0
  40. tooluniverse/data/worms_tools.json +55 -0
  41. tooluniverse/dbsnp_tool.py +196 -58
  42. tooluniverse/default_config.py +35 -2
  43. tooluniverse/emdb_tool.py +30 -0
  44. tooluniverse/ensembl_tool.py +140 -47
  45. tooluniverse/execute_function.py +74 -14
  46. tooluniverse/file_download_tool.py +269 -0
  47. tooluniverse/geo_tool.py +81 -28
  48. tooluniverse/gnomad_tool.py +100 -52
  49. tooluniverse/gtopdb_tool.py +41 -0
  50. tooluniverse/interpro_tool.py +72 -0
  51. tooluniverse/jaspar_tool.py +30 -0
  52. tooluniverse/kegg_tool.py +230 -0
  53. tooluniverse/mpd_tool.py +42 -0
  54. tooluniverse/ncbi_eutils_tool.py +96 -0
  55. tooluniverse/ols_tool.py +435 -0
  56. tooluniverse/package_discovery_tool.py +217 -0
  57. tooluniverse/paleobiology_tool.py +30 -0
  58. tooluniverse/pride_tool.py +30 -0
  59. tooluniverse/pypi_package_inspector_tool.py +593 -0
  60. tooluniverse/python_executor_tool.py +711 -0
  61. tooluniverse/regulomedb_tool.py +30 -0
  62. tooluniverse/remap_tool.py +44 -0
  63. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +1 -1
  64. tooluniverse/screen_tool.py +44 -0
  65. tooluniverse/smcp_server.py +3 -3
  66. tooluniverse/tool_finder_embedding.py +3 -1
  67. tooluniverse/tool_finder_keyword.py +3 -1
  68. tooluniverse/tool_finder_llm.py +6 -2
  69. tooluniverse/tools/{UCSC_get_genes_by_region.py โ†’ BLAST_nucleotide_search.py} +22 -26
  70. tooluniverse/tools/BLAST_protein_search.py +63 -0
  71. tooluniverse/tools/ClinVar_search_variants.py +26 -15
  72. tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
  73. tooluniverse/tools/EMDB_get_structure.py +46 -0
  74. tooluniverse/tools/GtoPdb_get_targets.py +52 -0
  75. tooluniverse/tools/InterPro_get_domain_details.py +46 -0
  76. tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
  77. tooluniverse/tools/InterPro_search_domains.py +52 -0
  78. tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
  79. tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
  80. tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
  81. tooluniverse/tools/PackageAnalyzer.py +55 -0
  82. tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
  83. tooluniverse/tools/PyPIPackageInspector.py +59 -0
  84. tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
  85. tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
  86. tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
  87. tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
  88. tooluniverse/tools/{ArgumentDescriptionOptimizer.py โ†’ TestResultsAnalyzer.py} +13 -13
  89. tooluniverse/tools/ToolDiscover.py +11 -11
  90. tooluniverse/tools/UniProt_id_mapping.py +63 -0
  91. tooluniverse/tools/UniProt_search.py +63 -0
  92. tooluniverse/tools/UnifiedToolGenerator.py +59 -0
  93. tooluniverse/tools/WoRMS_search_species.py +49 -0
  94. tooluniverse/tools/XMLToolOptimizer.py +55 -0
  95. tooluniverse/tools/__init__.py +119 -29
  96. tooluniverse/tools/alphafold_get_annotations.py +3 -3
  97. tooluniverse/tools/alphafold_get_prediction.py +3 -3
  98. tooluniverse/tools/alphafold_get_summary.py +3 -3
  99. tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
  100. tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
  101. tooluniverse/tools/{gnomAD_query_variant.py โ†’ clinvar_get_clinical_significance.py} +8 -11
  102. tooluniverse/tools/clinvar_get_variant_details.py +49 -0
  103. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
  104. tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
  105. tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
  106. tooluniverse/tools/download_binary_file.py +66 -0
  107. tooluniverse/tools/download_file.py +71 -0
  108. tooluniverse/tools/download_text_content.py +55 -0
  109. tooluniverse/tools/dynamic_package_discovery.py +59 -0
  110. tooluniverse/tools/ensembl_get_sequence.py +52 -0
  111. tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py โ†’ ensembl_get_variants.py} +11 -11
  112. tooluniverse/tools/ensembl_lookup_gene.py +46 -0
  113. tooluniverse/tools/geo_get_dataset_info.py +46 -0
  114. tooluniverse/tools/geo_get_sample_info.py +46 -0
  115. tooluniverse/tools/geo_search_datasets.py +67 -0
  116. tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
  117. tooluniverse/tools/kegg_find_genes.py +52 -0
  118. tooluniverse/tools/kegg_get_gene_info.py +46 -0
  119. tooluniverse/tools/kegg_get_pathway_info.py +46 -0
  120. tooluniverse/tools/kegg_list_organisms.py +44 -0
  121. tooluniverse/tools/kegg_search_pathway.py +46 -0
  122. tooluniverse/tools/ols_find_similar_terms.py +63 -0
  123. tooluniverse/tools/{get_hyperopt_info.py โ†’ ols_get_ontology_info.py} +13 -10
  124. tooluniverse/tools/ols_get_term_ancestors.py +67 -0
  125. tooluniverse/tools/ols_get_term_children.py +67 -0
  126. tooluniverse/tools/{TestCaseGenerator.py โ†’ ols_get_term_info.py} +12 -9
  127. tooluniverse/tools/{CodeOptimizer.py โ†’ ols_search_ontologies.py} +22 -14
  128. tooluniverse/tools/ols_search_terms.py +71 -0
  129. tooluniverse/tools/python_code_executor.py +79 -0
  130. tooluniverse/tools/python_script_runner.py +79 -0
  131. tooluniverse/tools/web_api_documentation_search.py +63 -0
  132. tooluniverse/tools/web_search.py +71 -0
  133. tooluniverse/uniprot_tool.py +219 -16
  134. tooluniverse/url_tool.py +18 -0
  135. tooluniverse/utils.py +2 -2
  136. tooluniverse/web_search_tool.py +229 -0
  137. tooluniverse/worms_tool.py +64 -0
  138. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/METADATA +3 -2
  139. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/RECORD +143 -54
  140. tooluniverse/data/genomics_tools.json +0 -174
  141. tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
  142. tooluniverse/tools/ToolImplementationGenerator.py +0 -67
  143. tooluniverse/tools/ToolOptimizer.py +0 -59
  144. tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
  145. tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
  146. tooluniverse/ucsc_tool.py +0 -60
  147. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
  148. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
  149. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
  150. {tooluniverse-1.0.10.dist-info โ†’ tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,142 @@
1
1
  import json
2
2
  import os
3
+ import time
4
+
5
+
6
+ def _search_api_documentation(tool_description, call_tool):
7
+ """Search for API documentation and libraries related to the tool description"""
8
+ api_context = {
9
+ "packages": [],
10
+ "documentation_urls": [],
11
+ "github_repos": [],
12
+ "search_queries": [],
13
+ }
14
+
15
+ try:
16
+ # Search for API documentation
17
+ print("๐ŸŒ Searching for API documentation...", flush=True)
18
+ try:
19
+ api_search_result = call_tool(
20
+ "web_search",
21
+ {
22
+ "query": f"{tool_description} API documentation official docs",
23
+ "max_results": 10,
24
+ "search_type": "api_documentation",
25
+ },
26
+ )
27
+
28
+ if api_search_result.get("status") == "success":
29
+ api_context["documentation_urls"] = [
30
+ {"title": r["title"], "url": r["url"], "snippet": r["snippet"]}
31
+ for r in api_search_result.get("results", [])
32
+ ]
33
+ api_context["search_queries"].append(api_search_result.get("query", ""))
34
+ except Exception as e:
35
+ print(f"โš ๏ธ API documentation search failed: {e}", flush=True)
36
+
37
+ # Search for Python packages
38
+ print("๐Ÿ“ฆ Searching for Python packages...", flush=True)
39
+ try:
40
+ package_search_result = call_tool(
41
+ "web_search",
42
+ {
43
+ "query": f"{tool_description} python package pypi",
44
+ "max_results": 10,
45
+ "search_type": "python_packages",
46
+ },
47
+ )
48
+
49
+ if package_search_result.get("status") == "success":
50
+ api_context["packages"] = [
51
+ {"title": r["title"], "url": r["url"], "snippet": r["snippet"]}
52
+ for r in package_search_result.get("results", [])
53
+ ]
54
+ api_context["search_queries"].append(
55
+ package_search_result.get("query", "")
56
+ )
57
+ except Exception as e:
58
+ print(f"โš ๏ธ Python packages search failed: {e}", flush=True)
59
+
60
+ # Search for GitHub repositories
61
+ print("๐Ÿ™ Searching for GitHub repositories...", flush=True)
62
+ try:
63
+ github_search_result = call_tool(
64
+ "web_search",
65
+ {
66
+ "query": f"{tool_description} github repository",
67
+ "max_results": 3,
68
+ "search_type": "github_repos",
69
+ },
70
+ )
71
+
72
+ if github_search_result.get("status") == "success":
73
+ api_context["github_repos"] = [
74
+ {"title": r["title"], "url": r["url"], "snippet": r["snippet"]}
75
+ for r in github_search_result.get("results", [])
76
+ ]
77
+ api_context["search_queries"].append(
78
+ github_search_result.get("query", "")
79
+ )
80
+ except Exception as e:
81
+ print(f"โš ๏ธ GitHub repositories search failed: {e}", flush=True)
82
+
83
+ print(
84
+ f"โœ… Found {len(api_context['documentation_urls'])} docs, {len(api_context['packages'])} packages, {len(api_context['github_repos'])} repos"
85
+ )
86
+
87
+ except Exception as e:
88
+ print(f"โš ๏ธ Web search failed: {e}", flush=True)
89
+ api_context["error"] = str(e)
90
+
91
+ return api_context
3
92
 
4
93
 
5
94
  def _discover_similar_tools(tool_description, call_tool):
6
- """Discover similar tools"""
95
+ """Discover similar tools using both web search and internal tool finder"""
7
96
  similar_tools = []
8
97
 
98
+ # First, try web search for additional context
99
+ try:
100
+ print("๐ŸŒ Performing web search for additional context...")
101
+ web_search_result = call_tool(
102
+ "web_search",
103
+ {
104
+ "query": f"{tool_description} python library API",
105
+ "max_results": 3,
106
+ "search_type": "api_documentation",
107
+ },
108
+ )
109
+
110
+ if web_search_result.get("status") == "success":
111
+ # Convert web search results to tool-like format for consistency
112
+ web_tools = []
113
+ for i, result in enumerate(web_search_result.get("results", [])):
114
+ web_tools.append(
115
+ {
116
+ "name": f"web_result_{i+1}",
117
+ "title": result.get("title", ""),
118
+ "url": result.get("url", ""),
119
+ "snippet": result.get("snippet", ""),
120
+ "source": "web_search",
121
+ }
122
+ )
123
+ similar_tools.extend(web_tools)
124
+ print(f"Found {len(web_tools)} web search results")
125
+ except Exception as e:
126
+ print(f"โš ๏ธ Web search failed: {e}")
127
+
128
+ # Then use internal tool finder
9
129
  discovery_methods = [
10
130
  ("Tool_Finder_Keyword", {"description": tool_description, "limit": 5})
11
131
  ]
12
132
 
13
133
  for method_name, args in discovery_methods:
14
- result = call_tool(method_name, args)
15
- if result and isinstance(result, list):
16
- similar_tools.extend(result)
134
+ try:
135
+ result = call_tool(method_name, args)
136
+ if result and isinstance(result, list):
137
+ similar_tools.extend(result)
138
+ except Exception as e:
139
+ print(f"โš ๏ธ Internal tool finder failed: {e}")
17
140
 
18
141
  # Deduplicate
19
142
  seen = set()
@@ -37,117 +160,714 @@ def _discover_similar_tools(tool_description, call_tool):
37
160
  return deduped_tools
38
161
 
39
162
 
40
- def _generate_tool_specification(tool_description, similar_tools, call_tool):
41
- """Generate tool specification"""
163
+ def _discover_packages_dynamically(tool_description, call_tool):
164
+ """Dynamically discover relevant packages using web search and PyPI"""
165
+
166
+ print("๐Ÿ” Discovering packages dynamically...")
167
+
168
+ # Step 0: Use Dynamic_Package_Search tool for intelligent package discovery
169
+ try:
170
+ dynamic_result = call_tool(
171
+ "dynamic_package_discovery",
172
+ {
173
+ "requirements": tool_description,
174
+ "functionality": "API access and data processing",
175
+ "constraints": {"python_version": ">=3.8"},
176
+ },
177
+ )
178
+
179
+ if dynamic_result.get("status") == "success":
180
+ candidates = dynamic_result.get("candidates", [])
181
+ if candidates:
182
+ print(
183
+ f"โœ… Dynamic search found {len(candidates)} package candidates",
184
+ flush=True,
185
+ )
186
+ return candidates
187
+ except Exception as e:
188
+ print(f"โš ๏ธ Dynamic package search failed: {e}", flush=True)
189
+
190
+ # Step 1: Web search for packages and libraries
191
+ web_packages = []
192
+ try:
193
+ search_queries = [
194
+ f"{tool_description} python library",
195
+ f"{tool_description} python package pypi",
196
+ f"{tool_description} python implementation",
197
+ ]
198
+
199
+ for query in search_queries:
200
+ result = call_tool(
201
+ "web_search",
202
+ {"query": query, "max_results": 5, "search_type": "python_packages"},
203
+ )
204
+
205
+ if result.get("status") == "success":
206
+ for item in result.get("results", []):
207
+ # Extract package names from URLs and titles
208
+ if "pypi.org" in item.get("url", ""):
209
+ pkg_name = (
210
+ item["url"].split("/")[-1] or item["url"].split("/")[-2]
211
+ )
212
+ web_packages.append(
213
+ {
214
+ "name": pkg_name,
215
+ "source": "pypi_web",
216
+ "title": item.get("title", ""),
217
+ "snippet": item.get("snippet", ""),
218
+ "url": item.get("url", ""),
219
+ }
220
+ )
221
+ elif "github.com" in item.get("url", ""):
222
+ web_packages.append(
223
+ {
224
+ "name": item.get("title", "").split()[0],
225
+ "source": "github",
226
+ "title": item.get("title", ""),
227
+ "snippet": item.get("snippet", ""),
228
+ "url": item.get("url", ""),
229
+ }
230
+ )
231
+
232
+ except Exception as e:
233
+ print(f"โš ๏ธ Web package search failed: {e}")
234
+
235
+ # Step 2: Use API documentation search
236
+ api_packages = []
237
+ try:
238
+ api_result = call_tool(
239
+ "web_search",
240
+ {
241
+ "query": f"{tool_description} python package pypi",
242
+ "max_results": 5,
243
+ "search_type": "python_packages",
244
+ },
245
+ )
246
+
247
+ if api_result.get("status") == "success":
248
+ api_packages = api_result.get("results", [])
249
+
250
+ except Exception as e:
251
+ print(f"โš ๏ธ API documentation search failed: {e}")
252
+
253
+ # Step 3: Combine and deduplicate
254
+ all_packages = []
255
+ seen_names = set()
256
+
257
+ for pkg in web_packages + api_packages:
258
+ name = pkg.get("name", "").lower().strip()
259
+ if name and name not in seen_names:
260
+ seen_names.add(name)
261
+ all_packages.append(pkg)
262
+
263
+ print(f"โœ… Discovered {len(all_packages)} package candidates")
264
+
265
+ # Step 4: Inspect packages using PyPIPackageInspector for comprehensive metrics
266
+ inspected_packages = []
267
+ for pkg in all_packages[:10]: # Limit to top 10 candidates to save API calls
268
+ try:
269
+ pkg_name = pkg.get("name", "").strip()
270
+ if not pkg_name:
271
+ continue
272
+
273
+ print(f" ๐Ÿ”ฌ Inspecting package: {pkg_name}")
274
+
275
+ # Use PyPIPackageInspector to get comprehensive package information
276
+ inspection_result = call_tool(
277
+ "PyPIPackageInspector",
278
+ {
279
+ "package_name": pkg_name,
280
+ "include_github": True,
281
+ "include_downloads": True,
282
+ },
283
+ )
284
+
285
+ if inspection_result.get("status") == "success":
286
+ # Merge original search data with comprehensive inspection results
287
+ enriched_pkg = pkg.copy()
288
+ enriched_pkg.update(
289
+ {
290
+ "pypi_metadata": inspection_result.get("pypi_metadata", {}),
291
+ "download_stats": inspection_result.get("download_stats", {}),
292
+ "github_stats": inspection_result.get("github_stats", {}),
293
+ "quality_scores": inspection_result.get("quality_scores", {}),
294
+ "recommendation": inspection_result.get("recommendation", ""),
295
+ "overall_score": inspection_result.get(
296
+ "quality_scores", {}
297
+ ).get("overall_score", 0),
298
+ }
299
+ )
300
+ inspected_packages.append(enriched_pkg)
301
+
302
+ # Print summary
303
+ scores = inspection_result.get("quality_scores", {})
304
+ print(
305
+ f" Overall: {scores.get('overall_score', 0)}/100 | "
306
+ f"Popularity: {scores.get('popularity_score', 0)} | "
307
+ f"Maintenance: {scores.get('maintenance_score', 0)} | "
308
+ f"Docs: {scores.get('documentation_score', 0)}"
309
+ )
310
+ else:
311
+ # If inspection fails, keep the basic package info
312
+ enriched_pkg = pkg.copy()
313
+ enriched_pkg["inspection_error"] = inspection_result.get(
314
+ "error", "Unknown error"
315
+ )
316
+ enriched_pkg["overall_score"] = 0
317
+ inspected_packages.append(enriched_pkg)
318
+ print(
319
+ f" โš ๏ธ Inspection failed: {inspection_result.get('error', 'Unknown')}"
320
+ )
321
+
322
+ time.sleep(0.5) # Rate limiting
323
+
324
+ except Exception as e:
325
+ enriched_pkg = pkg.copy()
326
+ enriched_pkg["inspection_error"] = str(e)
327
+ enriched_pkg["overall_score"] = 0
328
+ inspected_packages.append(enriched_pkg)
329
+ print(f" โš ๏ธ Could not inspect package {pkg_name}: {e}")
330
+
331
+ # Sort by overall score (descending)
332
+ inspected_packages.sort(key=lambda x: x.get("overall_score", 0), reverse=True)
333
+
334
+ print("\n๐Ÿ“Š Package inspection summary:")
335
+ for i, pkg in enumerate(inspected_packages[:5], 1):
336
+ score = pkg.get("overall_score", 0)
337
+ name = pkg.get("name", "unknown")
338
+ print(f" {i}. {name}: {score}/100")
339
+
340
+ # Step 5: Evaluate packages using PackageEvaluator with enhanced data
341
+ if inspected_packages:
342
+ try:
343
+ evaluation_result = call_tool(
344
+ "PackageEvaluator",
345
+ {
346
+ "requirements": tool_description,
347
+ "functionality": tool_description,
348
+ "candidates": json.dumps(inspected_packages),
349
+ "evaluation_criteria": json.dumps(
350
+ {
351
+ "popularity": "high_priority", # ไธ‹่ฝฝ้‡ใ€stars
352
+ "maintenance": "high_priority", # ๆœ€่ฟ‘ๆ›ดๆ–ฐๆ—ถ้—ด
353
+ "documentation": "medium_priority", # ๆ–‡ๆกฃๅฎŒๆ•ดๆ€ง
354
+ "compatibility": "high_priority", # Python็‰ˆๆœฌๅ…ผๅฎน
355
+ "security": "medium_priority", # ๅฎ‰ๅ…จๆ€ง
356
+ }
357
+ ),
358
+ },
359
+ )
360
+
361
+ if evaluation_result and "result" in evaluation_result:
362
+ eval_data = evaluation_result["result"]
363
+ if isinstance(eval_data, str):
364
+ eval_data = json.loads(eval_data)
365
+
366
+ print("๐Ÿ“Š Package evaluation completed")
367
+ top_rec = eval_data.get("top_recommendation", {})
368
+ print(f"๐Ÿ† Top recommendation: {top_rec.get('name', 'None')}")
369
+ if "popularity_score" in top_rec:
370
+ print(f" ๐Ÿ“ˆ Popularity: {top_rec.get('popularity_score', 'N/A')}")
371
+ if "maintenance_score" in top_rec:
372
+ print(
373
+ f" ๐Ÿ”ง Maintenance: {top_rec.get('maintenance_score', 'N/A')}"
374
+ )
375
+
376
+ return eval_data
377
+
378
+ except Exception as e:
379
+ print(f"โš ๏ธ Package evaluation failed: {e}")
380
+
381
+ return {
382
+ "rankings": [],
383
+ "top_recommendation": None,
384
+ "candidates": inspected_packages or all_packages,
385
+ }
386
+
387
+
388
+ def _get_specification_template_example():
389
+ """Get a template example for tool specification"""
390
+ return """
391
+ {
392
+ "type": "ExampleTool",
393
+ "name": "example_tool_name",
394
+ "description": "Custom implementation for [specific functionality]",
395
+ "implementation": "Implementation strategy: Based on package evaluation, use the 'top_recommended_package' library (score: 95/100) to handle [X]. Key steps: 1) Validate input parameters for [Y], 2) Call top_recommended_package.method() with [Z], 3) Parse and format response. Recommended packages: top_recommended_package (highly rated), alternative_package (backup). Installation: pip install top_recommended_package. Error handling: wrap API calls in try-except for ConnectionError and TimeoutError. This approach leverages the highest-rated, most maintained libraries for reliability.",
396
+ "parameter": {
397
+ "type": "object",
398
+ "properties": {
399
+ "input_param": {
400
+ "type": "string",
401
+ "description": "Description of input parameter",
402
+ "required": true
403
+ }
404
+ },
405
+ "required": ["input_param"]
406
+ },
407
+ "return_schema": {
408
+ "type": "object",
409
+ "properties": {
410
+ "result": {"type": "string", "description": "Tool output description"}
411
+ }
412
+ },
413
+ "test_examples": [
414
+ {"input_param": "test_value"},
415
+ {"input_param": "test_value2"},
416
+ ],
417
+ "label": [
418
+ "label1", "label2", "label3"
419
+ ]
420
+ }
421
+ """
422
+
423
+
424
+ def _generate_tool_with_xml(tool_description, reference_info, call_tool):
425
+ """Generate complete tool (spec + implementation) using UnifiedToolGenerator with XML format"""
426
+ import xml.etree.ElementTree as ET
427
+
428
+ specification_template = _get_specification_template_example()
429
+ code_template = _get_tool_template_example()
430
+ xml_template = f"""<code><![CDATA[
431
+ {code_template}
432
+ ]]></code>
433
+ <spec><![CDATA[
434
+ {specification_template}
435
+ ]]></spec>
436
+ """
437
+
42
438
  spec_input = {
43
439
  "tool_description": tool_description,
44
- "tool_category": "general",
45
- "tool_type": "CustomTool",
46
- "similar_tools": json.dumps(similar_tools) if similar_tools else "[]",
47
- "existing_tools_summary": "Available tools: standard ToolUniverse tools",
440
+ "reference_info": json.dumps(reference_info),
441
+ "xml_template": xml_template,
48
442
  }
49
443
 
50
- result = call_tool("ToolSpecificationGenerator", spec_input)
51
- if not result or "result" not in result:
52
- raise RuntimeError("ToolSpecificationGenerator returned invalid result")
53
-
54
- tool_config = result["result"]
444
+ result = call_tool("UnifiedToolGenerator", spec_input)
445
+ print(result["result"])
55
446
 
56
- # Ensure tool_config is a dictionary
57
- if isinstance(tool_config, str):
58
- try:
59
- tool_config = json.loads(tool_config)
60
- except json.JSONDecodeError:
61
- raise ValueError(f"Failed to parse tool_config JSON: {tool_config}")
62
- elif not isinstance(tool_config, dict):
63
- raise TypeError(
64
- f"tool_config must be a dictionary, " f"got: {type(tool_config)}"
447
+ # Handle both AgenticTool format (success/result) and standard format (status/data)
448
+ if isinstance(result, dict):
449
+ if result.get("success"):
450
+ xml_content = result.get("result", "")
451
+ elif result.get("status") == "success":
452
+ xml_content = result.get("data", "")
453
+ else:
454
+ raise RuntimeError(
455
+ f"UnifiedToolGenerator returned invalid result: {result}"
456
+ )
457
+ else:
458
+ raise RuntimeError(f"UnifiedToolGenerator returned non-dict result: {result}")
459
+
460
+ # Parse XML to extract spec and code
461
+ # The XML format is: <code>...</code><spec>...</spec> (no root element, no CDATA)
462
+ xml_content = xml_content.strip()
463
+
464
+ # Remove markdown code blocks if present
465
+ if "```xml" in xml_content:
466
+ xml_content = xml_content.split("```xml")[1].split("```")[0].strip()
467
+ elif "```" in xml_content:
468
+ xml_content = xml_content.split("```")[1].split("```")[0].strip()
469
+
470
+ # Wrap in a root element for parsing since the template doesn't have one
471
+ wrapped_xml = f"<root>{xml_content}</root>"
472
+
473
+ try:
474
+ root = ET.fromstring(wrapped_xml)
475
+ except ET.ParseError as e:
476
+ print(f"โŒ XML Parse Error: {e}")
477
+ print(f"๐Ÿ“„ XML Content (first 500 chars):\n{xml_content[:500]}")
478
+ print("๐Ÿ“„ XML Content (around error line):")
479
+ lines = xml_content.split("\n")
480
+ error_line = (
481
+ int(str(e).split("line")[1].split(",")[0].strip())
482
+ if "line" in str(e)
483
+ else 0
65
484
  )
485
+ if error_line > 0 and len(lines) >= error_line:
486
+ for i in range(max(0, error_line - 3), min(len(lines), error_line + 3)):
487
+ print(f"Line {i+1}: {lines[i]}")
488
+ raise RuntimeError(f"Failed to parse XML from UnifiedToolGenerator: {e}")
489
+
490
+ # Extract code
491
+ code_elem = root.find("code")
492
+ implementation_code = (
493
+ code_elem.text.strip() if code_elem is not None and code_elem.text else ""
494
+ )
495
+
496
+ # Extract spec
497
+ spec_elem = root.find("spec")
498
+ spec_text = (
499
+ spec_elem.text.strip() if spec_elem is not None and spec_elem.text else "{}"
500
+ )
501
+ tool_config = json.loads(spec_text)
502
+
503
+ # Add implementation directly to tool_config
504
+ tool_config["implementation"] = {
505
+ "source_code": implementation_code,
506
+ "dependencies": [],
507
+ "imports": [],
508
+ }
509
+
510
+ # Verify type field matches the actual class name in code
511
+ # Extract class name from code using regex
512
+ import re
513
+
514
+ class_match = re.search(r"class\s+(\w+)\s*\(", implementation_code)
515
+ if class_match:
516
+ actual_class_name = class_match.group(1)
517
+ if tool_config.get("type") != actual_class_name:
518
+ print(
519
+ f"โš ๏ธ Fixing type mismatch: '{tool_config.get('type')}' -> '{actual_class_name}'"
520
+ )
521
+ tool_config["type"] = actual_class_name
66
522
 
67
523
  return tool_config
68
524
 
69
525
 
70
- def _generate_implementation(tool_config, call_tool):
71
- """Generate implementation code for all tool types"""
72
- if "implementation" in tool_config:
73
- return tool_config
526
+ def _get_tool_template_example():
527
+ """Get a simple, correct example of @register_tool usage"""
528
+ return '''
529
+ # Example of correct @register_tool usage:
74
530
 
75
- impl_input = {
76
- "tool_description": tool_config.get("description", ""),
77
- "tool_parameters": json.dumps(tool_config.get("parameter", {})),
78
- "domain": "general",
79
- "complexity_level": "intermediate",
80
- }
531
+ from typing import Dict, Any
532
+ from tooluniverse.base_tool import BaseTool
533
+ from tooluniverse.tool_registry import register_tool
81
534
 
82
- # Try multiple times to generate implementation
83
- for attempt in range(3):
535
+ @register_tool("ExampleTool")
536
+ class ExampleTool(BaseTool):
537
+ """Example tool showing correct structure"""
538
+
539
+ def __init__(self, tool_config):
540
+ super().__init__(tool_config)
541
+ # Initialize any required resources here
542
+
543
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
544
+ """
545
+ Main tool execution method
546
+
547
+ Args:
548
+ arguments: Dictionary containing tool parameters
549
+
550
+ Returns:
551
+ Dictionary with tool results (format varies by tool type)
552
+ """
84
553
  try:
554
+ # Extract parameters
555
+ param1 = arguments.get('param1')
556
+ param2 = arguments.get('param2')
557
+
558
+ # Your tool logic here
559
+ result = f"Processed {param1} with {param2}"
560
+
561
+ # Return format can vary - choose what's appropriate for your tool:
562
+ return {
563
+ "status": "success",
564
+ "data": result
565
+ }
566
+
567
+ except Exception as e:
568
+ return {
569
+ "status": "error",
570
+ "error": f"Tool execution failed: {str(e)}"
571
+ }
572
+ '''
573
+
574
+
575
+ def _collect_reference_info(tool_description, call_tool):
576
+ """Collect all reference information for tool implementation"""
577
+ print("๐ŸŒ Collecting reference information...", flush=True)
578
+
579
+ # Search for API documentation and libraries
580
+ print(" ๐Ÿ“š Searching for API documentation...", flush=True)
581
+ api_documentation_context = _search_api_documentation(tool_description, call_tool)
582
+ print(
583
+ f" โœ… Found {len(api_documentation_context.get('packages', []))} packages, {len(api_documentation_context.get('documentation_urls', []))} docs"
584
+ )
585
+
586
+ # Dynamic package discovery
587
+ print(" ๐Ÿ”ฌ Discovering packages dynamically...", flush=True)
588
+ package_recommendations = _discover_packages_dynamically(
589
+ tool_description, call_tool
590
+ )
591
+ print(f" โœ… Found {len(package_recommendations)} package recommendations")
592
+
593
+ # Discover similar tools
594
+ print(" ๐Ÿ“Š Discovering similar tools...", flush=True)
595
+ similar_tools = _discover_similar_tools(tool_description, call_tool)
596
+ print(f" โœ… Found {len(similar_tools)} similar tools")
597
+
598
+ # Combine all reference information
599
+ reference_info = {
600
+ "similar_tools": similar_tools or [],
601
+ "api_documentation": api_documentation_context or {},
602
+ "package_recommendations": package_recommendations or {},
603
+ }
604
+
605
+ print(f" ๐Ÿ“‹ Reference info collected: {list(reference_info.keys())}")
606
+ return reference_info
607
+
608
+
609
+ def _optimize_tool_with_xml(tool_config, optimization_context, call_tool):
610
+ """Optimize complete tool (spec + implementation) using XMLToolOptimizer with XML format"""
611
+ import xml.etree.ElementTree as ET
612
+
613
+ print("๐Ÿ”ง Optimizing tool...")
614
+
615
+ try:
616
+ # Build XML from current tool_config
617
+ # Format: <code><![CDATA[...]]></code><spec><![CDATA[...]]></spec>
618
+ implementation_data = tool_config.get("implementation", {})
619
+ implementation_code = implementation_data.get("source_code", "")
620
+
621
+ # Save original implementation as backup
622
+ original_implementation = implementation_code
623
+
624
+ # Build spec (without internal fields)
625
+ tool_spec = {
626
+ k: v
627
+ for k, v in tool_config.items()
628
+ if not k.startswith("_") and k != "implementation"
629
+ }
630
+ spec_json = json.dumps(tool_spec, indent=2, ensure_ascii=False)
631
+
632
+ xml_tool = f"""<code><![CDATA[
633
+ {implementation_code}
634
+ ]]></code>
635
+ <spec><![CDATA[
636
+ {spec_json}
637
+ ]]></spec>"""
638
+
639
+ # Enhance optimization context with detailed error information
640
+ enhanced_context = optimization_context.copy()
641
+
642
+ # Extract test results and analyze for errors
643
+ test_results = optimization_context.get("test_results", {})
644
+ if test_results and "test_details" in test_results:
645
+ test_details = test_results["test_details"]
646
+
647
+ # Find all tests with errors in their output
648
+ error_tests = []
649
+ for test in test_details:
650
+ output = test.get("output", {})
651
+ result = output.get("result", {})
652
+
653
+ # Check if result contains an error
654
+ if isinstance(result, dict) and "error" in result:
655
+ error_tests.append(
656
+ {
657
+ "test_id": test.get("test_id"),
658
+ "test_input": test.get("test_input"),
659
+ "error": result.get("error"),
660
+ "error_details": result.get("error_details", {}),
661
+ "error_type": result.get("error_details", {}).get(
662
+ "type", "Unknown"
663
+ ),
664
+ }
665
+ )
666
+
667
+ if error_tests:
668
+ enhanced_context["test_errors"] = error_tests
669
+ enhanced_context["error_summary"] = (
670
+ f"Found {len(error_tests)}/{len(test_details)} tests with errors"
671
+ )
672
+ # Also include raw test details for LLM to analyze
673
+ enhanced_context["raw_test_details"] = test_details
674
+
675
+ # Call XMLToolOptimizer
676
+ result = call_tool(
677
+ "XMLToolOptimizer",
678
+ {
679
+ "xml_tool": xml_tool,
680
+ "optimization_context": json.dumps(enhanced_context),
681
+ },
682
+ )
683
+
684
+ # Handle both AgenticTool format (success/result) and standard format (status/data)
685
+ optimized_xml = None
686
+ if isinstance(result, dict):
687
+ if result.get("success"):
688
+ optimized_xml = result.get("result", "")
689
+ elif result.get("status") == "success":
690
+ optimized_xml = result.get("data", "")
691
+
692
+ if optimized_xml:
693
+
694
+ # Parse optimized XML
695
+ # Format: <code><![CDATA[...]]></code><spec><![CDATA[...]]></spec>
696
+ optimized_xml = optimized_xml.strip()
697
+ if "```xml" in optimized_xml:
698
+ optimized_xml = optimized_xml.split("```xml")[1].split("```")[0].strip()
699
+ elif "```" in optimized_xml:
700
+ optimized_xml = optimized_xml.split("```")[1].split("```")[0].strip()
701
+
702
+ # Wrap in a root element for parsing
703
+ wrapped_xml = f"<root>{optimized_xml}</root>"
704
+ root = ET.fromstring(wrapped_xml)
705
+
706
+ # Extract optimized code
707
+ code_elem = root.find("code")
708
+ optimized_code = (
709
+ code_elem.text.strip()
710
+ if code_elem is not None and code_elem.text
711
+ else implementation_code
712
+ )
713
+
714
+ # Extract optimized spec (if changed)
715
+ spec_elem = root.find("spec")
716
+ if spec_elem is not None and spec_elem.text:
717
+ spec_text = spec_elem.text.strip()
718
+ optimized_spec = json.loads(spec_text)
719
+ # Update ALL fields from optimized spec (except implementation)
720
+ for key, value in optimized_spec.items():
721
+ if key != "implementation": # Don't overwrite implementation dict
722
+ tool_config[key] = value
723
+ print(f" ๐Ÿ“‹ Updated spec fields: {list(optimized_spec.keys())}")
724
+
725
+ # Update implementation
726
+ if "implementation" not in tool_config:
727
+ tool_config["implementation"] = {}
728
+ tool_config["implementation"]["source_code"] = optimized_code
729
+
730
+ # Verify type field matches the actual class name in optimized code
731
+ import re
732
+
733
+ class_match = re.search(r"class\s+(\w+)\s*\(", optimized_code)
734
+ if class_match:
735
+ actual_class_name = class_match.group(1)
736
+ if tool_config.get("type") != actual_class_name:
737
+ print(
738
+ f"โš ๏ธ Fixing type mismatch after optimization: '{tool_config.get('type')}' -> '{actual_class_name}'"
739
+ )
740
+ tool_config["type"] = actual_class_name
741
+
742
+ print("โœ… Tool optimized")
743
+ else:
85
744
  print(
86
- f"๐Ÿ”„ Attempting to generate implementation code "
87
- f"(attempt {attempt + 1}/3)..."
745
+ "โš ๏ธ Optimization failed or returned empty result, keeping original code"
88
746
  )
89
- result = call_tool("ToolImplementationGenerator", impl_input)
747
+ # Restore original code if optimization failed
748
+ tool_config["implementation"]["source_code"] = original_implementation
749
+ except Exception as e:
750
+ print(f"โŒ Error during optimization: {e}")
751
+ print(" Keeping original code due to optimization error")
752
+ import traceback
753
+
754
+ traceback.print_exc()
755
+ # Restore original code on error
756
+ tool_config["implementation"]["source_code"] = original_implementation
757
+
758
+ return tool_config
759
+
760
+
761
+ # Keep old function for backward compatibility
762
+ def _generate_implementation(
763
+ tool_config, call_tool, reference_info=None, max_attempts=3
764
+ ):
765
+ """Legacy function - implementation is now generated together with spec
766
+
767
+ Args:
768
+ tool_config: Tool configuration with implementation already included
769
+ call_tool: Function to call other tools
770
+ reference_info: Optional reference information
771
+ max_attempts: Maximum number of generation attempts (default: 3)
772
+
773
+ Returns:
774
+ dict: Implementation data containing source_code, dependencies, etc.
775
+ """
776
+ if (
777
+ "implementation" in tool_config
778
+ and isinstance(tool_config["implementation"], dict)
779
+ and "source_code" in tool_config["implementation"]
780
+ ):
781
+ # Already has actual code implementation
782
+ return tool_config["implementation"]
783
+
784
+ # Fallback to old generation method if needed
785
+ if reference_info is None:
786
+ reference_info = {}
787
+
788
+ template_example = _get_tool_template_example()
789
+ reference_info["template_example"] = template_example
90
790
 
91
- if result and "result" in result:
92
- result_data = result["result"]
93
- if isinstance(result_data, str):
791
+ print("๐Ÿ”„ Generating initial implementation code...")
792
+
793
+ # Retry loop to ensure we get syntactically valid code
794
+ error_messages = []
795
+
796
+ for attempt in range(max_attempts):
797
+ if attempt > 0:
798
+ print(f" ๐Ÿ”„ Retry attempt {attempt + 1}/{max_attempts}")
799
+ # Add error feedback to reference_info for subsequent attempts
800
+ reference_info["error_feedback"] = {
801
+ "previous_errors": error_messages,
802
+ "instruction": "Previous attempts failed with syntax errors. Please carefully avoid these errors and generate syntactically correct code.",
803
+ }
804
+
805
+ # Prepare input with updated reference_info
806
+ impl_input = {
807
+ "tool_specification": json.dumps(tool_config),
808
+ "reference_info": json.dumps(reference_info),
809
+ "template_example": template_example,
810
+ }
811
+
812
+ result = call_tool("ToolImplementationGenerator", impl_input)
813
+
814
+ if result and "result" in result:
815
+ impl_data = _parse_result(result["result"])
816
+ if impl_data and "implementation" in impl_data:
817
+ impl = impl_data["implementation"]
818
+
819
+ # Basic validation: check syntax only
820
+ source_code = impl.get("source_code", "")
821
+ if source_code:
94
822
  try:
95
- impl_data = json.loads(result_data)
96
- except json.JSONDecodeError as e:
97
- print(f"โš ๏ธ JSON parsing failed: {e}")
823
+ compile(source_code, "<generated>", "exec")
824
+ print("โœ… Initial implementation generated (syntax valid)")
825
+ return impl
826
+ except SyntaxError as e:
827
+ error_msg = f"Attempt {attempt + 1}: Syntax error at line {e.lineno}: {e.msg}"
828
+ print(f" โš ๏ธ {error_msg}")
829
+ error_messages.append(error_msg)
98
830
  continue
99
831
  else:
100
- impl_data = result_data
101
-
102
- if (
103
- "implementation" in impl_data
104
- and "source_code" in impl_data["implementation"]
105
- ):
106
- tool_config["implementation"] = impl_data["implementation"]
107
- print("โœ… Successfully generated implementation code")
108
- return tool_config
109
- else:
110
- missing_fields = list(impl_data.get("implementation", {}).keys())
111
- print(
112
- f"โš ๏ธ Generated implementation missing required "
113
- f"fields: {missing_fields}"
114
- )
115
- else:
116
- print("โš ๏ธ ToolImplementationGenerator returned invalid result")
832
+ error_msg = f"Attempt {attempt + 1}: No source code generated"
833
+ print(f" โš ๏ธ {error_msg}")
834
+ error_messages.append(error_msg)
117
835
 
118
- except Exception as e:
836
+ if attempt == max_attempts - 1:
119
837
  print(
120
- f"โŒ Error generating implementation code "
121
- f"(attempt {attempt + 1}/3): {e}"
838
+ f"โŒ Failed to generate syntactically valid code after {max_attempts} attempts"
122
839
  )
123
- continue
840
+ print(f" Errors encountered: {error_messages}")
124
841
 
125
- return tool_config
842
+ return None
126
843
 
127
844
 
128
845
  def _generate_test_cases(tool_config, call_tool):
129
- """Generate test cases"""
130
- test_input = {"tool_config": tool_config}
846
+ """Generate test cases - uses test_examples from tool_config
131
847
 
132
- for attempt in range(5):
133
- try:
134
- result = call_tool("TestCaseGenerator", test_input)
135
- if result and "result" in result:
136
- result_data = result["result"]
137
- if isinstance(result_data, str):
138
- test_data = json.loads(result_data)
139
- else:
140
- test_data = result_data
848
+ Note: Test cases are now generated by UnifiedToolGenerator as part of the spec.
849
+ This function extracts and formats them for execution.
850
+ """
851
+ # Get test_examples from tool_config (already generated by UnifiedToolGenerator)
852
+ test_examples = tool_config.get("test_examples", [])
141
853
 
142
- if "test_cases" in test_data:
143
- test_cases = test_data["test_cases"]
144
- if _validate_test_cases(test_cases, tool_config):
145
- return test_cases
146
- except Exception as e:
147
- print(f"๐Ÿ”ง TestCaseGenerator attempt #{attempt + 1}/5 failed: {e}")
148
- continue
854
+ if not test_examples:
855
+ print("โš ๏ธ No test_examples found in tool_config")
856
+ return []
857
+
858
+ # Convert simplified test_examples format to full test case format
859
+ # test_examples: [{"param1": "value1"}, {"param2": "value2"}]
860
+ # test_cases: [{"name": "toolName", "arguments": {...}}, ...]
861
+ tool_name = tool_config.get("name")
862
+ test_cases = []
863
+
864
+ for test_input in test_examples:
865
+ if isinstance(test_input, dict):
866
+ test_case = {"name": tool_name, "arguments": test_input}
867
+ test_cases.append(test_case)
149
868
 
150
- return []
869
+ print(f"๐Ÿ“‹ Using {len(test_cases)} test cases from tool configuration")
870
+ return test_cases
151
871
 
152
872
 
153
873
  def _validate_test_cases(test_cases, tool_config):
@@ -171,296 +891,866 @@ def _validate_test_cases(test_cases, tool_config):
171
891
  return True
172
892
 
173
893
 
174
- def _execute_test_cases(tool_config, test_cases):
175
- """Execute test cases to validate code functionality"""
176
- print("๐Ÿงช Executing test cases to validate code functionality...")
894
+ def _execute_code_safely_with_executor(code_file, tool_name, test_arguments, call_tool):
895
+ """
896
+ ไฝฟ็”จ python_code_executor ๅฎ‰ๅ…จๆ‰ง่กŒ็”Ÿๆˆ็š„ๅทฅๅ…ทไปฃ็ 
897
+
898
+ Args:
899
+ code_file: ็”Ÿๆˆ็š„ไปฃ็ ๆ–‡ไปถ่ทฏๅพ„
900
+ tool_name: ๅทฅๅ…ทๅ็งฐ
901
+ test_arguments: ๆต‹่ฏ•ๅ‚ๆ•ฐ
902
+ call_tool: ่ฐƒ็”จๅ…ถไป–ๅทฅๅ…ท็š„ๅ‡ฝๆ•ฐ
903
+
904
+ Returns:
905
+ dict: {
906
+ "success": bool,
907
+ "result": Any,
908
+ "error": str,
909
+ "error_type": str,
910
+ "traceback": str,
911
+ "stdout": str,
912
+ "stderr": str,
913
+ "execution_time_ms": int
914
+ }
915
+ """
916
+ print(" ๐Ÿ” Executing code via python_code_executor...")
917
+
918
+ # ้ชŒ่ฏๆ–‡ไปถๅญ˜ๅœจ
919
+ if not os.path.exists(code_file):
920
+ return {
921
+ "success": False,
922
+ "error": f"Code file not found: {code_file}",
923
+ "error_type": "FileNotFoundError",
924
+ "traceback": "",
925
+ }
926
+
927
+ # ๆž„ๅปบๆต‹่ฏ•ๆ‰ง่กŒไปฃ็ 
928
+ test_code = f"""
929
+ import sys
930
+ import os
931
+ import importlib.util
932
+
933
+ # ๆทปๅŠ ๅฝ“ๅ‰็›ฎๅฝ•ๅˆฐ่ทฏๅพ„
934
+ sys.path.insert(0, os.getcwd())
935
+
936
+ # ๅŠจๆ€ๅŠ ่ฝฝ็”Ÿๆˆ็š„ๆจกๅ—
937
+ spec = importlib.util.spec_from_file_location("{tool_name}", "{code_file}")
938
+ if spec is None:
939
+ raise ImportError(f"Cannot create spec for {tool_name} from {code_file}")
940
+
941
+ module = importlib.util.module_from_spec(spec)
942
+ spec.loader.exec_module(module)
943
+
944
+ # ่Žทๅ–ๅทฅๅ…ท็ฑป
945
+ ToolClass = getattr(module, "{tool_name}", None)
946
+ if ToolClass is None:
947
+ raise AttributeError(f"Tool class {tool_name} not found in module")
948
+
949
+ # ๅฎžไพ‹ๅŒ–ๅทฅๅ…ท
950
+ tool_instance = ToolClass({{"name": "{tool_name}"}})
951
+
952
+ # ๆ‰ง่กŒๆต‹่ฏ•
953
+ test_args = {test_arguments}
954
+ result = tool_instance.run(test_args)
955
+ """
956
+
957
+ # ่ฐƒ็”จ python_code_executor
958
+ try:
959
+ execution_result = call_tool(
960
+ "python_code_executor",
961
+ {
962
+ "code": test_code,
963
+ "arguments": {},
964
+ "timeout": 30,
965
+ "allowed_imports": [
966
+ "requests",
967
+ "xml",
968
+ "json",
969
+ "urllib",
970
+ "http",
971
+ "bs4",
972
+ "lxml",
973
+ "pandas",
974
+ "numpy",
975
+ "scipy",
976
+ "matplotlib",
977
+ "seaborn",
978
+ "sys",
979
+ "os",
980
+ "importlib",
981
+ "importlib.util",
982
+ "typing",
983
+ "Bio",
984
+ ],
985
+ },
986
+ )
987
+
988
+ # ๆ ‡ๅ‡†ๅŒ–่ฟ”ๅ›žๆ ผๅผ
989
+ if execution_result.get("success"):
990
+ return {
991
+ "success": True,
992
+ "result": execution_result.get("result"),
993
+ "stdout": execution_result.get("stdout", ""),
994
+ "stderr": execution_result.get("stderr", ""),
995
+ "execution_time_ms": execution_result.get("execution_time_ms", 0),
996
+ }
997
+ else:
998
+ return {
999
+ "success": False,
1000
+ "error": execution_result.get("error", "Unknown error"),
1001
+ "error_type": execution_result.get("error_type", "UnknownError"),
1002
+ "traceback": execution_result.get("traceback", ""),
1003
+ "stdout": execution_result.get("stdout", ""),
1004
+ "stderr": execution_result.get("stderr", ""),
1005
+ }
1006
+
1007
+ except Exception as e:
1008
+ import traceback as tb
1009
+
1010
+ return {
1011
+ "success": False,
1012
+ "error": str(e),
1013
+ "error_type": type(e).__name__,
1014
+ "traceback": tb.format_exc(),
1015
+ }
1016
+
1017
+
1018
+ def _execute_test_cases_with_template(execution_context, call_tool):
1019
+ """Execute the pre-saved execution template and return results
1020
+
1021
+ Args:
1022
+ execution_context: Dict containing execution information:
1023
+ - execution_file: Path to the execution template file
1024
+ - tool_config: Tool configuration (optional)
1025
+ - test_cases: Test cases (optional)
1026
+ - temp_dir: Temporary directory (optional)
1027
+ call_tool: Function to call other tools
1028
+ """
1029
+ execution_file = execution_context.get("execution_file")
1030
+ print(f"๐Ÿš€ Running execution template: {execution_file}")
177
1031
 
178
1032
  test_results = {
179
- "total_tests": len(test_cases),
1033
+ "total_tests": 0,
180
1034
  "passed_tests": 0,
181
1035
  "failed_tests": 0,
182
1036
  "test_details": [],
183
1037
  "overall_success_rate": 0.0,
1038
+ "errors_fixed": 0,
1039
+ "fix_attempts": 0,
184
1040
  }
185
1041
 
186
- if not test_cases:
187
- print("โš ๏ธ No test cases to execute")
1042
+ if not execution_file or not os.path.exists(execution_file):
1043
+ print(f"โŒ Execution template not found: {execution_file}")
188
1044
  return test_results
189
1045
 
190
- # Dynamic import of generated tool code
191
- # try:
192
- # Build tool code file path
193
- tool_name = tool_config.get("name", "UnknownTool")
194
- base_filename = f"generated_tool_{tool_config['name']}"
195
- code_file = f"generated_tool_{tool_name.lower()}_code.py"
196
-
197
- print("๐Ÿ’พ Saving tool files for testing...")
198
-
199
- saved_files = _save_tool_files(tool_config, base_filename)
200
- print(f"Saved: {saved_files}")
201
-
202
- if os.path.exists(code_file):
203
- # ๅŠจๆ€ๅฏผๅ…ฅๅทฅๅ…ท
204
- import importlib.util
205
-
206
- spec = importlib.util.spec_from_file_location(tool_name, code_file)
207
- tool_module = importlib.util.module_from_spec(spec)
208
- spec.loader.exec_module(tool_module)
209
-
210
- # Get tool function
211
- tool_function = getattr(tool_module, tool_name.lower(), None)
212
-
213
- if tool_function:
214
- print(f"โœ… Successfully imported tool: {tool_name}")
215
-
216
- # Execute each test case
217
- for i, test_case in enumerate(test_cases):
218
- test_result = {
219
- "test_id": i + 1,
220
- "test_case": test_case,
221
- "status": "unknown",
222
- "result": None,
223
- "error": None,
224
- "execution_time": 0,
225
- }
226
-
227
- try:
228
- import time
229
-
230
- start_time = time.time()
231
-
232
- # Extract test parameters
233
- if isinstance(test_case, dict) and "input" in test_case:
234
- test_args = test_case["input"]
235
- elif isinstance(test_case, dict) and "arguments" in test_case:
236
- test_args = test_case["arguments"]
237
- else:
238
- test_args = test_case
239
-
240
- # Execute test
241
- result = tool_function(test_args)
242
- print(f"result: {result}")
243
- execution_time = time.time() - start_time
244
-
245
- # Validate result
246
- if result is not None and not isinstance(result, dict):
247
- test_result["status"] = "failed"
248
- test_result["error"] = "Return value is not a dictionary"
249
- elif result is None:
250
- test_result["status"] = "failed"
251
- test_result["error"] = "Return value is None"
252
- else:
253
- test_result["status"] = "passed"
254
- test_result["result"] = result
255
-
256
- test_result["execution_time"] = execution_time
257
-
258
- except Exception as e:
259
- test_result["status"] = "failed"
260
- test_result["error"] = str(e)
261
- test_result["execution_time"] = 0
262
-
263
- # Count results
264
- if test_result["status"] == "passed":
265
- test_results["passed_tests"] += 1
266
- else:
267
- test_results["failed_tests"] += 1
1046
+ # Execute using python_script_runner tool
1047
+ try:
1048
+ import json
268
1049
 
269
- test_results["test_details"].append(test_result)
1050
+ # Use python_script_runner to execute the file
1051
+ working_dir = os.path.dirname(execution_file) if execution_file else "."
270
1052
 
271
- # Print test results
272
- status_emoji = "โœ…" if test_result["status"] == "passed" else "โŒ"
273
- print(f" {status_emoji} Test {i+1}: {test_result['status']}")
274
- if test_result["error"]:
275
- print(f" Error: {test_result['error']}")
1053
+ # Call python_script_runner without validation parameter (default validate=True)
1054
+ execution_result = call_tool(
1055
+ "python_script_runner",
1056
+ {
1057
+ "script_path": execution_file,
1058
+ "timeout": 120,
1059
+ "working_directory": working_dir,
1060
+ },
1061
+ )
276
1062
 
277
- # Calculate success rate
278
- test_results["overall_success_rate"] = (
279
- (test_results["passed_tests"] / test_results["total_tests"])
280
- if test_results["total_tests"] > 0
281
- else 0.0
1063
+ print("๐Ÿ“‹ Execution output:")
1064
+ if execution_result.get("success"):
1065
+ print(execution_result.get("stdout", ""))
1066
+ if execution_result.get("stderr"):
1067
+ print("โš ๏ธ Execution errors:")
1068
+ print(execution_result.get("stderr"))
1069
+ else:
1070
+ print(
1071
+ f"โŒ Execution failed: {execution_result.get('error', 'Unknown error')}"
282
1072
  )
283
1073
 
284
- passed = test_results["passed_tests"]
285
- total = test_results["total_tests"]
286
- print(f"๐Ÿ“Š Test execution completed: {passed}/{total} passed")
287
- print(f"๐ŸŽฏ Success rate: {test_results['overall_success_rate']:.1%}")
1074
+ # Parse execution results directly from stdout
1075
+ stdout = execution_result.get("stdout", "")
288
1076
 
1077
+ # Extract JSON results
1078
+ if "### TEST_RESULTS_JSON ###" in stdout:
1079
+ try:
1080
+ json_start = stdout.index("### TEST_RESULTS_JSON ###") + len(
1081
+ "### TEST_RESULTS_JSON ###\n"
1082
+ )
1083
+ json_end = stdout.index("### END_TEST_RESULTS_JSON ###")
1084
+ json_str = stdout[json_start:json_end].strip()
1085
+ parsed_results = json.loads(json_str)
1086
+
1087
+ # Store raw test results for optimizer to analyze
1088
+ test_results["test_details"] = parsed_results.get("test_cases", [])
1089
+ test_results["total_tests"] = len(test_results["test_details"])
1090
+
1091
+ print(f"๐Ÿ“Š Executed {test_results['total_tests']} test cases")
1092
+
1093
+ except Exception as e:
1094
+ print(f"โš ๏ธ Failed to parse results: {e}")
1095
+ test_results["parse_error"] = str(e)
1096
+ # Fallback to simple counting
1097
+ test_results["total_tests"] = 0
289
1098
  else:
290
- print(f"โŒ Unable to find tool function: {tool_name.lower()}")
291
- test_results["error"] = f"Tool function not found: {tool_name.lower()}"
292
- else:
293
- print(f"โŒ Tool code file does not exist: {code_file}")
294
- test_results["error"] = f"Code file does not exist: {code_file}"
1099
+ print("โš ๏ธ No JSON results found in output, falling back to simple counting")
1100
+ lines = stdout.split("\n")
1101
+ passed_count = sum(1 for line in lines if "โœ… Success:" in line)
1102
+ failed_count = sum(1 for line in lines if "โŒ Error:" in line)
1103
+ test_results["total_tests"] = passed_count + failed_count
1104
+ test_results["passed_tests"] = passed_count
1105
+ test_results["failed_tests"] = failed_count
1106
+
1107
+ except Exception as e:
1108
+ print(f"โŒ Error executing template: {e}")
1109
+ import traceback
1110
+
1111
+ traceback.print_exc()
295
1112
 
296
1113
  return test_results
297
1114
 
298
1115
 
299
- def _evaluate_quality(tool_config, test_cases, call_tool):
300
- """่ฏ„ไผฐไปฃ็ ่ดจ้‡ - ไฝฟ็”จๅขžๅผบ็š„CodeQualityAnalyzer + ๅฎž้™…ๆต‹่ฏ•ๆ‰ง่กŒ"""
1116
+ def _evaluate_quality(
1117
+ tool_config,
1118
+ test_cases,
1119
+ call_tool,
1120
+ test_execution_results=None,
1121
+ detailed=True,
1122
+ temp_dir=None,
1123
+ ):
1124
+ """่ฏ„ไผฐไปฃ็ ่ดจ้‡ - ๅŸบไบŽๆต‹่ฏ•ๆ‰ง่กŒ็ป“ๆžœ่ฎก็ฎ—ๅˆ†ๆ•ฐ"""
1125
+
1126
+ # ๅฆ‚ๆžœๅทฒๆไพ›ๆต‹่ฏ•็ป“ๆžœ๏ผŒ็›ดๆŽฅไฝฟ็”จ๏ผ›ๅฆๅˆ™ๆ‰ง่กŒๆต‹่ฏ•
1127
+ if test_execution_results is None:
1128
+ # Save tool files first
1129
+ base_filename = f"generated_tool_{tool_config['name']}"
1130
+ saved_files = _save_tool_files(
1131
+ tool_config, base_filename, call_tool, temp_dir, test_cases
1132
+ )
301
1133
 
302
- # ้ฆ–ๅ…ˆๆ‰ง่กŒๆต‹่ฏ•ๆ ทไพ‹ๆฅ้ชŒ่ฏๅŠŸ่ƒฝ
303
- test_execution_results = _execute_test_cases(tool_config, test_cases)
1134
+ # Extract execution file
1135
+ execution_file = next(
1136
+ (f for f in saved_files if f.endswith("_execute.py")), None
1137
+ )
304
1138
 
305
- # ๆๅ–ๅฎž็Žฐไปฃ็ 
1139
+ # Execute tests using the saved file
1140
+ execution_context = {
1141
+ "execution_file": execution_file,
1142
+ "tool_config": tool_config,
1143
+ "test_cases": test_cases,
1144
+ "temp_dir": temp_dir,
1145
+ }
1146
+ test_execution_results = _execute_test_cases_with_template(
1147
+ execution_context, call_tool
1148
+ )
1149
+ else:
1150
+ print(" โ™ป๏ธ Using pre-executed test results")
1151
+
1152
+ # Extract implementation code for analysis
306
1153
  implementation_code = ""
307
1154
  if "implementation" in tool_config:
308
1155
  impl = tool_config["implementation"]
309
- print("impl.keys():", impl.keys())
310
1156
  implementation_code = impl["source_code"]
311
1157
 
312
- # Build analysis input including test execution results
313
- eval_input = {
314
- "tool_name": tool_config.get("name", "UnknownTool"),
315
- "tool_description": tool_config.get("description", ""),
316
- "tool_parameters": json.dumps(tool_config.get("parameter", {})),
317
- "implementation_code": implementation_code,
318
- "test_cases": json.dumps(test_cases),
319
- "test_execution_results": json.dumps(test_execution_results),
320
- }
1158
+ # Extract test details for score calculation
1159
+ parsed_data = {"test_execution": test_execution_results}
1160
+
1161
+ # Calculate overall score based on test execution results
1162
+ if test_execution_results and "test_details" in test_execution_results:
1163
+ test_details = test_execution_results.get("test_details", [])
1164
+ total_tests = len(test_details)
1165
+ passed_tests = sum(
1166
+ 1
1167
+ for t in test_details
1168
+ if t.get("output", {}).get("result", {}).get("error") is None
1169
+ )
1170
+
1171
+ if total_tests > 0:
1172
+ parsed_data["overall_score"] = (passed_tests / total_tests) * 10
1173
+ print(
1174
+ f" ๐Ÿ“Š Score: {parsed_data['overall_score']:.2f}/10 ({passed_tests}/{total_tests})"
1175
+ )
1176
+ else:
1177
+ parsed_data["overall_score"] = 0.0
1178
+ else:
1179
+ parsed_data["overall_score"] = 5.0
1180
+
1181
+ # Try to enrich with CodeQualityAnalyzer analysis (optional, can fail)
1182
+ try:
1183
+ eval_input = {
1184
+ "tool_name": tool_config.get("name", "UnknownTool"),
1185
+ "tool_description": tool_config.get("description", "")[:200],
1186
+ "tool_parameters": json.dumps(tool_config.get("parameter", {})),
1187
+ "implementation_code": implementation_code[:2000],
1188
+ "test_cases": json.dumps(test_cases[:2] if test_cases else []),
1189
+ "test_execution_results": json.dumps(
1190
+ {
1191
+ "total": test_execution_results.get("total_tests", 0),
1192
+ "passed": (
1193
+ passed_tests if "test_details" in test_execution_results else 0
1194
+ ),
1195
+ }
1196
+ ),
1197
+ }
321
1198
 
322
- print("๐Ÿ” Using CodeQualityAnalyzer for deep code quality analysis...")
1199
+ result = call_tool("CodeQualityAnalyzer", eval_input)
323
1200
 
324
- result = call_tool("CodeQualityAnalyzer", eval_input)
325
- print(f"result: {result['result']}")
1201
+ if isinstance(result, dict):
1202
+ if result.get("success"):
1203
+ result_data = result.get("result", "{}")
1204
+ elif result.get("status") == "success":
1205
+ result_data = result.get("data", "{}")
1206
+ else:
1207
+ result_data = "{}"
1208
+ else:
1209
+ result_data = "{}"
326
1210
 
327
- result_data = result["result"]
328
- parsed_data = json.loads(result_data)
329
- parsed_data["test_execution"] = test_execution_results
1211
+ quality_data = _parse_result(result_data)
1212
+ if quality_data and "overall_score" in quality_data:
1213
+ # Use CodeQualityAnalyzer score if available
1214
+ parsed_data["overall_score"] = quality_data["overall_score"]
1215
+ parsed_data["quality_analysis"] = quality_data
1216
+ except Exception as e:
1217
+ print(f" โš ๏ธ CodeQualityAnalyzer skipped: {e}")
330
1218
 
331
1219
  return parsed_data
332
1220
 
333
1221
 
334
- def _expand_test_coverage(tool_config, call_tool):
335
- """Expand test coverage"""
336
- test_input = {
337
- "tool_config": tool_config,
338
- "focus_areas": ["edge_cases", "boundary_conditions", "error_scenarios"],
339
- }
340
-
341
- result = call_tool("TestCaseGenerator", test_input)
342
- if result and "result" in result:
343
- result_data = result["result"]
344
- if isinstance(result_data, str):
1222
+ def _check_and_install_dependencies(
1223
+ tool_config, installed_packages, user_confirmed_install, call_tool
1224
+ ):
1225
+ """Check and install dependencies with user confirmation
1226
+
1227
+ Args:
1228
+ tool_config: Tool configuration containing dependencies
1229
+ installed_packages: Set of already installed packages
1230
+ user_confirmed_install: Whether user has confirmed installation
1231
+ call_tool: Function to call other tools
1232
+
1233
+ Returns:
1234
+ tuple: (should_continue, user_confirmed, installed_packages, instruction)
1235
+ - should_continue: True to continue, False to trigger reimplementation
1236
+ - user_confirmed: Updated confirmation status
1237
+ - installed_packages: Updated set of installed packages
1238
+ - instruction: Instruction for optimizer if reimplementation needed, else None
1239
+ """
1240
+ dependencies = tool_config.get("implementation", {}).get("dependencies", [])
1241
+ if not dependencies:
1242
+ return True, user_confirmed_install, installed_packages, None
1243
+
1244
+ # Check missing packages by trying to import them
1245
+ missing_packages = []
1246
+ for dep in dependencies:
1247
+ if dep not in installed_packages:
1248
+ # Extract base package name for import test
1249
+ base_name = (
1250
+ dep.split(".")[0]
1251
+ .split(">=")[0]
1252
+ .split("==")[0]
1253
+ .split("<")[0]
1254
+ .replace("-", "_")
1255
+ )
345
1256
  try:
346
- test_cases = json.loads(result_data)
347
- if "test_cases" in test_cases:
348
- if "testing" not in tool_config:
349
- tool_config["testing"] = {}
350
- tool_config["testing"]["test_cases"] = test_cases["test_cases"]
351
- return tool_config
352
- except json.JSONDecodeError:
353
- pass
354
-
355
- return None
1257
+ result = call_tool(
1258
+ "python_code_executor",
1259
+ {"code": f"import {base_name}", "timeout": 3},
1260
+ )
1261
+ if result.get("success"):
1262
+ installed_packages.add(dep)
1263
+ else:
1264
+ missing_packages.append(dep)
1265
+ except Exception:
1266
+ missing_packages.append(dep)
1267
+
1268
+ if not missing_packages:
1269
+ return True, user_confirmed_install, installed_packages, None
1270
+
1271
+ # Get parent packages to install (extract base package name)
1272
+ packages_to_install = list(
1273
+ set(
1274
+ [
1275
+ pkg.split(".")[0].split(">=")[0].split("==")[0].split("<")[0]
1276
+ for pkg in missing_packages
1277
+ ]
1278
+ )
1279
+ )
356
1280
 
1281
+ # User confirmation (first time only)
1282
+ if not user_confirmed_install:
1283
+ print(f"\n๐Ÿ“ฆ Missing packages: {', '.join(packages_to_install)}")
1284
+ print(" Install these packages to continue?")
357
1285
 
358
- def _optimize_code(tool_config, call_tool, quality_evaluation):
359
- """General code optimization"""
360
- optimization_input = {
361
- "tool_config": json.dumps(tool_config),
362
- "quality_evaluation": json.dumps(quality_evaluation),
363
- }
1286
+ # DEBUG MODE: Auto-accept installation to avoid interactive prompts
1287
+ print("\n๐Ÿ”ง DEBUG MODE: Auto-installing packages...")
1288
+ user_confirmed_install = True
1289
+ else:
1290
+ print(f"๐Ÿ“ฆ Auto-installing: {', '.join(packages_to_install)}")
364
1291
 
365
- result = call_tool("CodeOptimizer", optimization_input)
1292
+ # Install packages
1293
+ import subprocess
1294
+ import sys
366
1295
 
367
- if result and "result" in result:
368
- result_data = result["result"]
369
- optimized = json.loads(result_data)
1296
+ failed = []
370
1297
 
371
- # Check return format, CodeOptimizer now returns {"implementation": {...}}
372
- if "implementation" in optimized:
373
- tool_config["implementation"] = optimized["implementation"]
374
- else:
375
- # Compatible with old format
376
- tool_config["implementation"] = optimized
1298
+ for pkg in packages_to_install:
1299
+ try:
1300
+ print(f" ๐Ÿ“ฅ Installing {pkg}...")
1301
+ result = subprocess.run(
1302
+ [sys.executable, "-m", "pip", "install", pkg],
1303
+ capture_output=True,
1304
+ text=True,
1305
+ timeout=300,
1306
+ )
1307
+ if result.returncode == 0:
1308
+ print(f" โœ… {pkg}")
1309
+ installed_packages.add(pkg)
1310
+ else:
1311
+ print(f" โŒ {pkg}")
1312
+ failed.append({"pkg": pkg, "err": result.stderr[:200]})
1313
+ except Exception as e:
1314
+ print(f" โŒ {pkg}")
1315
+ failed.append({"pkg": pkg, "err": str(e)})
1316
+
1317
+ if failed:
1318
+ print("๐Ÿ”„ Failed. Requesting reimplementation...")
1319
+ errors = "\n".join([f"- {f['pkg']}: {f['err']}" for f in failed])
1320
+ instruction = (
1321
+ f"CRITICAL: FAILED: {[f['pkg'] for f in failed]}\n"
1322
+ f"Errors:\n{errors}\n"
1323
+ f"Use different packages OR standard library OR installed: {list(installed_packages)}"
1324
+ )
1325
+ return False, user_confirmed_install, installed_packages, instruction
377
1326
 
378
- return tool_config
1327
+ return True, user_confirmed_install, installed_packages, None
379
1328
 
380
1329
 
381
- def iterative_code_improvement(
382
- tool_config, call_tool, max_iterations=5, target_score=9.5
1330
+ def iterative_comprehensive_optimization(
1331
+ tool_config, call_tool, max_iterations=5, target_score=8.5, temp_dir=None
383
1332
  ):
384
- """Iteratively improve code implementation until target quality score is reached"""
385
- print("\n๐Ÿš€ Starting iterative code improvement process")
386
- print(f"Target quality score: {target_score}/10")
387
- print(f"Maximum iterations: {max_iterations}")
1333
+ """
1334
+ Comprehensive optimization with guaranteed minimum iterations
1335
+ and multi-agent improvement strategy
1336
+ """
1337
+ print("\n๐Ÿš€ Starting comprehensive optimization")
1338
+ print(f"Target: {target_score}/10, Max iterations: {max_iterations}")
388
1339
 
389
- current_score = 0
390
1340
  improvement_history = []
1341
+ user_confirmed_install = False
1342
+ installed_packages = set()
391
1343
 
392
1344
  for iteration in range(max_iterations):
393
- print(f"\n๐Ÿ”„ Iteration {iteration + 1}/{max_iterations}")
394
- print(f"Current quality score: {current_score:.2f}/10")
1345
+ print(f"\n{'='*60}")
1346
+ print(f"๐Ÿ”„ Iteration {iteration + 1}/{max_iterations}")
1347
+
1348
+ # Check and install dependencies
1349
+ should_continue, user_confirmed_install, installed_packages, instruction = (
1350
+ _check_and_install_dependencies(
1351
+ tool_config, installed_packages, user_confirmed_install, call_tool
1352
+ )
1353
+ )
1354
+
1355
+ if not should_continue:
1356
+ # Dependency issue - trigger reimplementation
1357
+ optimization_context = {
1358
+ "quality_report": {"overall_score": 0, "issues": ["Dependency issue"]},
1359
+ "test_results": {"total_tests": 0, "failed_tests": 0},
1360
+ "iteration": iteration,
1361
+ "target_score": target_score,
1362
+ "current_score": 0,
1363
+ "improvement_history": improvement_history,
1364
+ "instruction": instruction,
1365
+ }
1366
+ tool_config = optimize_code(tool_config, optimization_context, call_tool)
1367
+ continue
395
1368
 
396
- # Generate test cases and evaluate quality
1369
+ # Generate and execute tests
397
1370
  test_cases = _generate_test_cases(tool_config, call_tool)
398
- print(f"Generated {len(test_cases)} test cases")
399
1371
 
400
- print(f"test_cases: {test_cases}")
1372
+ base_filename = f"generated_tool_{tool_config['name']}"
1373
+ saved_files = _save_tool_files(
1374
+ tool_config, base_filename, call_tool, temp_dir, test_cases
1375
+ )
1376
+ execution_file = next(
1377
+ (f for f in saved_files if f.endswith("_execute.py")), None
1378
+ )
401
1379
 
402
- quality_evaluation = _evaluate_quality(tool_config, test_cases, call_tool)
403
- new_score = quality_evaluation.get("overall_score", 0)
1380
+ execution_context = {
1381
+ "execution_file": execution_file,
1382
+ "tool_config": tool_config,
1383
+ "test_cases": test_cases,
1384
+ "temp_dir": temp_dir,
1385
+ }
1386
+ test_results = _execute_test_cases_with_template(execution_context, call_tool)
1387
+
1388
+ # Evaluate quality
1389
+ quality_report = _evaluate_quality(
1390
+ tool_config,
1391
+ test_cases,
1392
+ call_tool,
1393
+ test_execution_results=test_results,
1394
+ temp_dir=temp_dir,
1395
+ detailed=True,
1396
+ )
404
1397
 
405
- print(f"Quality evaluation result: {new_score:.2f}/10")
406
- if "scores" in quality_evaluation:
407
- for aspect, score in quality_evaluation["scores"].items():
408
- print(f" - {aspect}: {score:.2f}/10")
1398
+ current_score = quality_report["overall_score"]
1399
+ print(f"๐Ÿ“Š Score: {current_score:.2f}/10")
409
1400
 
410
- # Check if target is reached
411
- if new_score >= target_score:
412
- print(f"๐ŸŽ‰ Target quality score {target_score}/10 reached!")
1401
+ # Early stopping
1402
+ if current_score >= target_score:
1403
+ print("๐ŸŽฏ Target reached!")
413
1404
  improvement_history.append(
414
1405
  {
415
1406
  "iteration": iteration + 1,
416
- "score": new_score,
417
- "improvement": new_score - current_score,
418
- "status": "target_achieved",
1407
+ "score": current_score,
1408
+ "improvements": quality_report.get("improvement_suggestions", []),
1409
+ "early_stop": True,
419
1410
  }
420
1411
  )
421
1412
  break
422
1413
 
423
- # Record improvement
424
- improvement = new_score - current_score
425
- print(f"Improvement: {improvement:+.2f}")
1414
+ # Optimize code
1415
+ optimization_context = {
1416
+ "quality_report": quality_report,
1417
+ "test_results": test_results,
1418
+ "iteration": iteration,
1419
+ "target_score": target_score,
1420
+ "current_score": current_score,
1421
+ "improvement_history": improvement_history,
1422
+ }
1423
+ tool_config = optimize_code(tool_config, optimization_context, call_tool)
426
1424
 
427
1425
  improvement_history.append(
428
1426
  {
429
1427
  "iteration": iteration + 1,
430
- "score": new_score,
431
- "improvement": improvement,
432
- "status": "improved",
1428
+ "score": current_score,
1429
+ "improvements": quality_report.get("improvement_suggestions", []),
433
1430
  }
434
1431
  )
435
- current_score = new_score
436
-
437
- tool_config = _optimize_code(tool_config, call_tool, quality_evaluation)
438
1432
 
439
- # Final quality evaluation
1433
+ # Final evaluation
440
1434
  final_test_cases = _generate_test_cases(tool_config, call_tool)
441
- final_quality = _evaluate_quality(tool_config, final_test_cases, call_tool)
442
- final_score = final_quality.get("overall_score", current_score)
443
1435
 
444
- print("๐Ÿ Iterative improvement completed")
445
- print(f"Final quality score: {final_score:.2f}/10")
446
- print(f"Total iterations: {len(improvement_history)}")
1436
+ # Save final tool files
1437
+ final_base_filename = f"generated_tool_{tool_config['name']}_final"
1438
+ saved_files = _save_tool_files(
1439
+ tool_config, final_base_filename, call_tool, temp_dir
1440
+ )
447
1441
 
448
- print("\n๐Ÿ“ˆ Improvement history:")
449
- for record in improvement_history:
450
- status_emoji = "๐ŸŽฏ" if record["status"] == "target_achieved" else "๐Ÿ“ˆ"
451
- print(
452
- f" {status_emoji} Round {record['iteration']}: {record['score']:.2f}/10 (improvement: {record['score']:+.2f})"
1442
+ # Extract execution file
1443
+ execution_file = next((f for f in saved_files if f.endswith("_execute.py")), None)
1444
+
1445
+ # Execute final tests using the saved file
1446
+ execution_context = {
1447
+ "execution_file": execution_file,
1448
+ "tool_config": tool_config,
1449
+ "test_cases": final_test_cases,
1450
+ "temp_dir": temp_dir,
1451
+ }
1452
+ final_test_results = _execute_test_cases_with_template(execution_context, call_tool)
1453
+ final_quality = _evaluate_quality(
1454
+ tool_config,
1455
+ final_test_cases,
1456
+ call_tool,
1457
+ test_execution_results=final_test_results, # ๆ–ฐๅขžๅ‚ๆ•ฐ
1458
+ detailed=True,
1459
+ temp_dir=temp_dir,
1460
+ )
1461
+
1462
+ print(f"\n๐Ÿ Optimization completed after {max_iterations} iterations")
1463
+ print(f"Final score: {final_quality['overall_score']:.2f}/10")
1464
+
1465
+ return tool_config, final_quality, improvement_history
1466
+
1467
+
1468
+ def _optimize_specification_existing(tool_config, optimization_context, call_tool):
1469
+ """Use existing ToolSpecificationOptimizer with comprehensive optimization context"""
1470
+ result = call_tool(
1471
+ "ToolSpecificationOptimizer",
1472
+ {
1473
+ "tool_config": json.dumps(tool_config),
1474
+ "optimization_context": json.dumps(optimization_context),
1475
+ },
1476
+ )
1477
+
1478
+ if result and "result" in result:
1479
+ opt_data = _parse_result(result["result"])
1480
+ if "optimized_config" in opt_data:
1481
+ # Merge optimized spec
1482
+ merged = tool_config.copy()
1483
+ opt_config = opt_data["optimized_config"]
1484
+
1485
+ spec_fields = [
1486
+ "name",
1487
+ "description",
1488
+ "parameter",
1489
+ "return_schema",
1490
+ "test_examples",
1491
+ ]
1492
+ merged.update({k: v for k, v in opt_config.items() if k in spec_fields})
1493
+
1494
+ print(" โœ… Specification optimized")
1495
+ return merged
1496
+ else:
1497
+ return tool_config
1498
+
1499
+
1500
+ def _parse_result(result_data):
1501
+ """Parse result data from agent calls"""
1502
+ if isinstance(result_data, str):
1503
+ # ๆธ…็†ๅฏ่ƒฝ็š„ markdown ไปฃ็ ๅ—ๅฐ่ฃ…
1504
+ cleaned_data = result_data.strip()
1505
+
1506
+ # ็งป้™ค ```json ``` ไปฃ็ ๅ—ๅฐ่ฃ…
1507
+ if cleaned_data.startswith("```json"):
1508
+ cleaned_data = cleaned_data[7:] # ็งป้™ค ```json
1509
+ if cleaned_data.startswith("```"):
1510
+ cleaned_data = cleaned_data[3:] # ็งป้™ค ```
1511
+ if cleaned_data.endswith("```"):
1512
+ cleaned_data = cleaned_data[:-3] # ็งป้™ค็ป“ๅฐพ็š„ ```
1513
+
1514
+ cleaned_data = cleaned_data.strip()
1515
+
1516
+ try:
1517
+ return json.loads(cleaned_data)
1518
+ except json.JSONDecodeError as e:
1519
+ print(f"โš ๏ธ JSON ่งฃๆžๅคฑ่ดฅ: {e}")
1520
+ print(f"ๅŽŸๅง‹ๆ•ฐๆฎๅ‰200ๅญ—็ฌฆ: {result_data[:200]}")
1521
+ print(f"ๆธ…็†ๅŽๆ•ฐๆฎๅ‰200ๅญ—็ฌฆ: {cleaned_data[:200]}")
1522
+ return {}
1523
+ return result_data
1524
+
1525
+
1526
+ # Keep the old function for backward compatibility
1527
+ def _generate_execution_template(
1528
+ tool_config, base_filename, test_cases=None, temp_dir=None
1529
+ ):
1530
+ """Generate execution template script for testing the tool"""
1531
+ class_name = tool_config.get("name", "CustomTool")
1532
+ tool_config.get("type", "CustomTool")
1533
+
1534
+ execution_template = f'''#!/usr/bin/env python3
1535
+ """
1536
+ Execution template for {class_name}
1537
+ Generated by ToolDiscover
1538
+ """
1539
+
1540
+ import sys
1541
+ import json
1542
+ import os
1543
+ import traceback
1544
+ import subprocess
1545
+ from pathlib import Path
1546
+
1547
+ # Add the current directory to Python path
1548
+ current_dir = Path(__file__).parent
1549
+ sys.path.insert(0, str(current_dir))
1550
+
1551
+ def load_tool_config(config_file):
1552
+ """Load tool configuration from JSON file"""
1553
+ try:
1554
+ with open(config_file, 'r', encoding='utf-8') as f:
1555
+ return json.load(f)
1556
+ except Exception as e:
1557
+ print(f"โŒ Error loading config: {{e}}")
1558
+ return None
1559
+
1560
+ def load_test_cases(tool_config):
1561
+ """Extract test cases from tool configuration"""
1562
+ return tool_config.get("test_examples", [])
1563
+
1564
+ def execute_tool_test(client, tool_name, test_input):
1565
+ """Execute a single test case and return raw result"""
1566
+ try:
1567
+ # Build tool call in ToolUniverse format
1568
+ tool_call = {{
1569
+ "name": tool_name,
1570
+ "arguments": test_input
1571
+ }}
1572
+ # Execute the tool using tooluniverse.run method
1573
+ result = client.run(tool_call)
1574
+ return {{"status": "executed", "result": result}}
1575
+ except Exception as e:
1576
+ return {{"status": "exception", "exception_type": type(e).__name__, "exception_message": str(e)}}
1577
+
1578
+ def main():
1579
+ """Main execution function"""
1580
+ print("๐Ÿš€ Starting tool execution...")
1581
+
1582
+ # Load configuration
1583
+ config_file = f"{base_filename}_config.json"
1584
+ tool_config = load_tool_config(config_file)
1585
+ if not tool_config:
1586
+ return
1587
+
1588
+ print(f"โœ… Loaded tool config: {{tool_config.get('name', 'Unknown')}}")
1589
+
1590
+ # Load test cases
1591
+ test_cases = load_test_cases(tool_config)
1592
+ print(f"๐Ÿ“‹ Found {{len(test_cases)}} test cases")
1593
+
1594
+ # Import the tool class
1595
+ try:
1596
+ # Import the generated tool module using importlib
1597
+ import importlib.util
1598
+ import sys
1599
+
1600
+ code_file = f"{base_filename}_code.py"
1601
+ spec = importlib.util.spec_from_file_location("tool_module", code_file)
1602
+ if spec is None:
1603
+ raise ImportError(f"Cannot create spec for {{code_file}}")
1604
+
1605
+ tool_module = importlib.util.module_from_spec(spec)
1606
+ spec.loader.exec_module(tool_module)
1607
+
1608
+ # Get the tool class
1609
+ tool_type = tool_config.get("type")
1610
+ if not tool_type:
1611
+ raise ValueError("Tool config missing required 'type' field")
1612
+ tool_class = getattr(tool_module, tool_type)
1613
+
1614
+ # Initialize ToolUniverse and register the tool
1615
+ from tooluniverse import ToolUniverse
1616
+ client = ToolUniverse()
1617
+ client.register_custom_tool(
1618
+ tool_class=tool_class,
1619
+ tool_name=tool_type,
1620
+ tool_config=tool_config,
1621
+ instantiate=True
453
1622
  )
454
1623
 
455
- return tool_config, final_score, improvement_history
1624
+ # Get the instantiated tool
1625
+ print(f"โœ… Successfully loaded tool.")
1626
+
1627
+ except Exception as e:
1628
+ print(f"โŒ Error importing tool: {{e}}")
1629
+ print(f"Traceback: {{traceback.format_exc()}}")
1630
+ return
1631
+
1632
+ # Execute test cases and collect ALL results
1633
+ all_results = []
1634
+ tool_name = tool_config.get("name")
1635
+ for i, test_input in enumerate(test_cases, 1):
1636
+ print(f"\\n๐Ÿงช Test case {{i}}/{{len(test_cases)}}: {{test_input}}")
1637
+ test_result = execute_tool_test(client, tool_name, test_input)
1638
+ all_results.append({{"test_id": i, "test_input": test_input, "output": test_result}})
1639
+
1640
+ # Just print what we got, no interpretation
1641
+ print(f" ๐Ÿ“ค Result: {{test_result}}")
1642
+
1643
+ # Output everything as JSON
1644
+ print("\\n### TEST_RESULTS_JSON ###")
1645
+ print(json.dumps({{"test_cases": all_results}}, indent=2))
1646
+ print("### END_TEST_RESULTS_JSON ###")
1647
+
1648
+ if __name__ == "__main__":
1649
+ main()
1650
+ '''
1651
+
1652
+ # Save execution template
1653
+ execution_file = f"{base_filename}_execute.py"
1654
+ if temp_dir:
1655
+ execution_file = os.path.join(temp_dir, os.path.basename(execution_file))
1656
+
1657
+ # Ensure absolute path
1658
+ execution_file = os.path.abspath(execution_file)
1659
+
1660
+ with open(execution_file, "w", encoding="utf-8") as f:
1661
+ f.write(execution_template)
1662
+
1663
+ print(f" ๐Ÿ“œ Execution template saved: {execution_file}")
1664
+ return execution_file
1665
+
1666
+
1667
+ def _extract_imports_from_code(code_content):
1668
+ """Extract import statements from generated code"""
1669
+ import re
1670
+
1671
+ imports = []
1672
+
1673
+ # Find all import statements
1674
+ import_patterns = [
1675
+ r"^import\s+([a-zA-Z_][a-zA-Z0-9_.]*)", # import module
1676
+ r"^from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import", # from module import
1677
+ ]
1678
+
1679
+ for line in code_content.split("\n"):
1680
+ line = line.strip()
1681
+ for pattern in import_patterns:
1682
+ match = re.match(pattern, line)
1683
+ if match:
1684
+ module_name = match.group(1)
1685
+ # Skip standard library modules
1686
+ if not _is_standard_library_module(module_name):
1687
+ imports.append(module_name)
1688
+
1689
+ return list(set(imports)) # Remove duplicates
1690
+
1691
+
1692
+ def _is_standard_library_module(module_name):
1693
+ """Check if a module is part of Python standard library"""
1694
+ standard_modules = {
1695
+ "os",
1696
+ "sys",
1697
+ "json",
1698
+ "math",
1699
+ "datetime",
1700
+ "time",
1701
+ "random",
1702
+ "re",
1703
+ "collections",
1704
+ "itertools",
1705
+ "functools",
1706
+ "operator",
1707
+ "urllib",
1708
+ "http",
1709
+ "xml",
1710
+ "csv",
1711
+ "io",
1712
+ "pathlib",
1713
+ "glob",
1714
+ "shutil",
1715
+ "tempfile",
1716
+ "subprocess",
1717
+ "threading",
1718
+ "multiprocessing",
1719
+ "queue",
1720
+ "logging",
1721
+ "warnings",
1722
+ "traceback",
1723
+ "inspect",
1724
+ "abc",
1725
+ "enum",
1726
+ "dataclasses",
1727
+ "typing_extensions",
1728
+ "xml.etree.ElementTree",
1729
+ "tooluniverse",
1730
+ }
1731
+
1732
+ # Check if it's a standard module or starts with standard module
1733
+ base_module = module_name.split(".")[0]
1734
+ return base_module in standard_modules
456
1735
 
457
1736
 
458
- def _save_tool_files(tool_config, base_filename):
459
- """Save tool files"""
1737
+ def _save_tool_files(
1738
+ tool_config, base_filename, call_tool=None, temp_dir=None, test_cases=None
1739
+ ):
1740
+ """Save tool files to temporary directory"""
1741
+ print(" ๐Ÿ“ Preparing to save tool files...")
1742
+ print(f" ๐Ÿ“ Base filename: {base_filename}")
1743
+
1744
+ # Use temporary directory if provided
1745
+ if temp_dir:
1746
+ base_filename = os.path.join(temp_dir, os.path.basename(base_filename))
1747
+ print(f" ๐Ÿ“ Saving to temp directory: {temp_dir}")
1748
+
460
1749
  # Update configuration
461
1750
  config_to_save = tool_config.copy()
462
- class_name = config_to_save.get("name", "CustomTool")
463
- config_to_save["type"] = class_name
1751
+ tool_name = config_to_save.get("name", "CustomTool")
1752
+ # Keep the original type field (class name), don't overwrite it with the name
1753
+ print(f" ๐Ÿท๏ธ Tool name: {tool_name}")
464
1754
 
465
1755
  # Extract dependency information
466
1756
  dependencies = []
@@ -469,24 +1759,74 @@ def _save_tool_files(tool_config, base_filename):
469
1759
  and "dependencies" in tool_config["implementation"]
470
1760
  ):
471
1761
  dependencies = tool_config["implementation"]["dependencies"]
1762
+ print(f" ๐Ÿ“ฆ Dependencies: {dependencies}")
472
1763
 
473
1764
  # Add dependencies field to configuration
474
1765
  config_to_save["dependencies"] = dependencies
475
1766
 
1767
+ # Merge test cases if provided
1768
+ if test_cases:
1769
+ existing_test_examples = config_to_save.get("test_examples", [])
1770
+ # Combine provided test cases with existing ones
1771
+ combined_test_cases = list(test_cases) # Start with provided test cases
1772
+ # Add existing ones that are not duplicates
1773
+ for existing in existing_test_examples:
1774
+ if existing not in combined_test_cases:
1775
+ combined_test_cases.append(existing)
1776
+ config_to_save["test_examples"] = combined_test_cases
1777
+ print(f" ๐Ÿ“‹ Merged test cases: {len(combined_test_cases)} total")
1778
+
476
1779
  # Remove implementation code
477
1780
  if "implementation" in config_to_save:
478
1781
  del config_to_save["implementation"]
1782
+ print(" ๐Ÿ—‘๏ธ Removed implementation from config")
479
1783
 
480
1784
  # Save configuration file
481
1785
  config_file = f"{base_filename}_config.json"
1786
+ print(f" ๐Ÿ’พ Saving config file: {config_file}")
482
1787
  with open(config_file, "w", encoding="utf-8") as f:
483
1788
  json.dump(config_to_save, f, indent=2, ensure_ascii=False)
1789
+ print(f" โœ… Config file saved: {os.path.getsize(config_file)} bytes")
484
1790
 
485
1791
  # Generate code file
486
1792
  code_file = f"{base_filename}_code.py"
487
- _generate_tool_code(tool_config, code_file)
1793
+ print(f" ๐Ÿ”ง Generating code file: {code_file}")
1794
+ _generate_tool_code(tool_config, code_file, call_tool)
1795
+ print(f" โœ… Code file generated: {os.path.getsize(code_file)} bytes")
1796
+
1797
+ # Extract actual imports from generated code and update dependencies
1798
+ try:
1799
+ with open(code_file, "r", encoding="utf-8") as f:
1800
+ code_content = f.read()
1801
+
1802
+ actual_imports = _extract_imports_from_code(code_content)
1803
+ if actual_imports:
1804
+ print(f" ๐Ÿ” Extracted imports from code: {actual_imports}")
1805
+ # Update dependencies with actual imports
1806
+ dependencies = list(set(dependencies + actual_imports))
1807
+ config_to_save["dependencies"] = dependencies
1808
+ print(f" ๐Ÿ“ฆ Updated dependencies: {dependencies}")
1809
+
1810
+ # Update config file with new dependencies
1811
+ with open(config_file, "w", encoding="utf-8") as f:
1812
+ json.dump(config_to_save, f, indent=2, ensure_ascii=False)
1813
+ except Exception as e:
1814
+ print(f" โš ๏ธ Could not extract imports from code: {e}")
1815
+
1816
+ # Generate execution template
1817
+ execution_file = _generate_execution_template(
1818
+ tool_config, base_filename, test_cases, temp_dir
1819
+ )
1820
+ print(
1821
+ f" โœ… Execution template generated: {os.path.getsize(execution_file)} bytes"
1822
+ )
1823
+
1824
+ # Ensure all paths are absolute
1825
+ config_file = os.path.abspath(config_file)
1826
+ code_file = os.path.abspath(code_file)
1827
+ execution_file = os.path.abspath(execution_file)
488
1828
 
489
- return [config_file, code_file]
1829
+ return [config_file, code_file, execution_file]
490
1830
 
491
1831
 
492
1832
  def _convert_json_to_python(obj):
@@ -517,131 +1857,222 @@ def _convert_json_to_python(obj):
517
1857
 
518
1858
 
519
1859
  def _convert_python_types_to_strings(obj):
520
- """Recursively convert Python type objects to string representations for JSON serialization"""
1860
+ """Convert Python type objects to JSON schema standard types consistently"""
521
1861
  if isinstance(obj, dict):
522
1862
  result = {}
523
1863
  for key, value in obj.items():
524
- result[key] = _convert_python_types_to_strings(value)
1864
+ if key == "type":
1865
+ if isinstance(value, str):
1866
+ # Normalize to JSON schema standard
1867
+ type_mapping = {
1868
+ "str": "string",
1869
+ "int": "integer",
1870
+ "float": "number",
1871
+ "bool": "boolean",
1872
+ "dict": "object",
1873
+ "list": "array",
1874
+ "none": "null",
1875
+ # Already correct JSON schema types
1876
+ "string": "string",
1877
+ "integer": "integer",
1878
+ "number": "number",
1879
+ "boolean": "boolean",
1880
+ "object": "object",
1881
+ "array": "array",
1882
+ "null": "null",
1883
+ }
1884
+ result[key] = type_mapping.get(value.lower(), value)
1885
+ elif isinstance(value, type):
1886
+ # Handle Python type objects
1887
+ type_name = value.__name__.lower()
1888
+ type_mapping = {
1889
+ "str": "string",
1890
+ "int": "integer",
1891
+ "float": "number",
1892
+ "bool": "boolean",
1893
+ "dict": "object",
1894
+ "list": "array",
1895
+ "none": "null",
1896
+ }
1897
+ result[key] = type_mapping.get(type_name, "string")
1898
+ else:
1899
+ result[key] = value
1900
+ else:
1901
+ result[key] = _convert_python_types_to_strings(value)
525
1902
  return result
526
1903
  elif isinstance(obj, list):
527
1904
  return [_convert_python_types_to_strings(item) for item in obj]
528
1905
  elif obj is True:
529
- return "True"
1906
+ return True # Keep boolean values as booleans
530
1907
  elif obj is False:
531
- return "False"
1908
+ return False
532
1909
  elif obj is str:
533
- return "str"
1910
+ return "string"
534
1911
  elif obj is float:
535
- return "float"
1912
+ return "number"
536
1913
  elif obj is int:
537
- return "int"
1914
+ return "integer"
538
1915
  elif obj is dict:
539
- return "dict"
1916
+ return "object"
540
1917
  elif obj is list:
541
- return "list"
1918
+ return "array"
542
1919
  else:
543
1920
  return obj
544
1921
 
545
1922
 
546
- def _generate_tool_code(tool_config, code_file):
547
- """Generate Python code for all tool types using correct register_tool method"""
548
- tool_name = tool_config["name"]
1923
+ def _validate_generated_code(code_file, code_content=None):
1924
+ """Validate the generated code for syntax and structure"""
1925
+ print(" ๐Ÿ” Validating code syntax...")
549
1926
 
550
- with open(code_file, "w", encoding="utf-8") as f:
551
- # Add dependency instructions comment
552
- if (
553
- "implementation" in tool_config
554
- and "dependencies" in tool_config["implementation"]
555
- ):
556
- dependencies = tool_config["implementation"]["dependencies"]
557
- if dependencies:
558
- f.write("# Required packages:\n")
559
- for dep in dependencies:
560
- f.write(f"# pip install {dep}\n")
561
- f.write("\n")
562
-
563
- f.write("from typing import Dict, Any\n")
564
- f.write("from src.tooluniverse import register_tool\n\n")
565
-
566
- # Import dependencies
567
- if (
568
- "implementation" in tool_config
569
- and "imports" in tool_config["implementation"]
570
- ):
571
- for imp in tool_config["implementation"]["imports"]:
572
- f.write(f"{imp}\n")
573
-
574
- f.write("\n")
575
-
576
- # Generate function implementation directly, no classes
577
- f.write("@register_tool(\n")
578
- f.write(f' "{tool_name}",\n')
579
- f.write(" {\n")
580
- f.write(f' "name": "{tool_name}",\n')
581
- f.write(f' "type": "{tool_name}",\n')
582
- f.write(f' "description": "{tool_config.get("description", "")}",\n')
583
-
584
- # Use helper functions to convert JSON booleans and types to Python format
585
- parameter_json = _convert_json_to_python(tool_config.get("parameter", {}))
586
- # Convert Python type objects to string representations
587
- parameter_json_str = _convert_python_types_to_strings(parameter_json)
588
- f.write(f' "parameter": {json.dumps(parameter_json_str, indent=8)},\n')
589
-
590
- return_schema_json = _convert_json_to_python(
591
- tool_config.get("return_schema", {})
592
- )
593
- # Convert Python type objects to string representations
594
- return_schema_json_str = _convert_python_types_to_strings(return_schema_json)
595
- f.write(
596
- f' "return_schema": {json.dumps(return_schema_json_str, indent=8)},\n'
597
- )
1927
+ # Use provided content or read from file
1928
+ if code_content is None:
1929
+ with open(code_file, "r", encoding="utf-8") as f:
1930
+ code_content = f.read()
1931
+
1932
+ try:
1933
+ compile(code_content, code_file, "exec")
1934
+ print(f" โœ… Generated code syntax validated: {code_file}")
1935
+ return True, code_content
1936
+ except SyntaxError as e:
1937
+ print(f" โŒ Syntax error in generated code: {e}")
1938
+ print(f" Line {e.lineno}: {e.text}")
1939
+ print(f" Error type: {type(e).__name__}")
1940
+ return False, str(e)
598
1941
 
599
- # Add dependency information
600
- if (
601
- "implementation" in tool_config
602
- and "dependencies" in tool_config["implementation"]
603
- ):
604
- dependencies = tool_config["implementation"]["dependencies"]
605
- f.write(f' "dependencies": {json.dumps(dependencies, indent=8)}\n')
606
- else:
607
- f.write(' "dependencies": []\n')
608
1942
 
609
- f.write(" }\n")
610
- f.write(")\n")
611
- f.write(
612
- f"def {tool_name.lower()}(arguments: Dict[str, Any]) -> Dict[str, Any]:\n"
1943
+ def _fix_syntax_errors(tool_config, code_file, syntax_error, call_tool):
1944
+ """Attempt to fix syntax errors using agents"""
1945
+ if not call_tool:
1946
+ return False
1947
+
1948
+ print(" ๐Ÿ”ง Attempting to fix syntax error using ImplementationDebugger...")
1949
+ try:
1950
+ # Create a quality report for the syntax error
1951
+ quality_report = {
1952
+ "overall_score": 0.0,
1953
+ "scores": {"syntax_correctness": 0.0, "code_quality": 0.0},
1954
+ "issues": [f"Syntax error: {syntax_error}"],
1955
+ "improvement_suggestions": [
1956
+ "Fix syntax errors",
1957
+ "Ensure proper Python syntax",
1958
+ ],
1959
+ }
1960
+
1961
+ # Try to fix using UnifiedCodeOptimizer
1962
+ result = call_tool(
1963
+ "UnifiedCodeOptimizer",
1964
+ {
1965
+ "tool_config": json.dumps(tool_config),
1966
+ "quality_report": json.dumps(quality_report),
1967
+ "iteration": 0,
1968
+ "improvement_focus": json.dumps(["syntax_fix", "stability"]),
1969
+ },
613
1970
  )
614
- f.write(f' """{tool_config.get("description", "")}"""\n')
615
- f.write(" try:\n")
616
-
617
- # Add source code
618
- if (
619
- "implementation" in tool_config
620
- and "source_code" in tool_config["implementation"]
621
- ):
622
- source_code = tool_config["implementation"]["source_code"]
623
- f.write(" # Generated implementation:\n")
624
- for line in source_code.split("\n"):
625
- if line.strip(): # Skip empty lines
626
- f.write(f" {line}\n")
1971
+
1972
+ if result and "result" in result:
1973
+ opt_data = _parse_result(result["result"])
1974
+ if (
1975
+ "implementation" in opt_data
1976
+ and "source_code" in opt_data["implementation"]
1977
+ ):
1978
+ # Try to regenerate the code with the fixed implementation
1979
+ tool_config["implementation"] = opt_data["implementation"]
1980
+ print(" ๐Ÿ”„ Regenerating code with fixed implementation...")
1981
+
1982
+ # Regenerate the code file
1983
+ with open(code_file, "w", encoding="utf-8") as f:
1984
+ f.write(opt_data["implementation"]["source_code"])
1985
+
1986
+ # Validate the fixed code
1987
+ is_valid, _ = _validate_generated_code(code_file)
1988
+ if is_valid:
1989
+ print(" โœ… Syntax error fixed successfully!")
1990
+ return True
627
1991
  else:
628
- f.write("\n")
1992
+ print(" โš ๏ธ Fixed code still has syntax errors")
629
1993
 
630
- # Ensure execute_tool is called and result is returned
631
- f.write(" \n")
632
- f.write(" # Execute the tool and return result\n")
633
- f.write(" return execute_tool(arguments)\n")
634
- else:
635
- # Default implementation
636
- f.write(" result = {\n")
637
- f.write(' "status": "success",\n')
638
- f.write(' "message": "Tool executed successfully",\n')
639
- f.write(' "input": arguments\n')
640
- f.write(" }\n")
641
- f.write(" return result\n")
1994
+ except Exception as fix_error:
1995
+ print(f" โš ๏ธ Failed to fix syntax error: {fix_error}")
1996
+
1997
+ return False
642
1998
 
643
- f.write(" except Exception as e:\n")
644
- f.write(' return {"error": str(e)}\n')
1999
+
2000
+ def _validate_class_structure(code_content):
2001
+ """Validate that the generated code has the required class structure"""
2002
+ print(" ๐Ÿ” Validating class structure...")
2003
+
2004
+ required_elements = [
2005
+ ("@register_tool", "Generated code missing @register_tool decorator"),
2006
+ ("class", "Generated code missing class definition"),
2007
+ ("def run(self, arguments", "Generated code missing run method"),
2008
+ ("BaseTool", "Generated code missing BaseTool inheritance"),
2009
+ ("def __init__(self, tool_config", "Generated code missing __init__ method"),
2010
+ ]
2011
+
2012
+ for element, error_msg in required_elements:
2013
+ if element not in code_content:
2014
+ raise ValueError(error_msg)
2015
+
2016
+ print(" โœ… Generated code structure validated")
2017
+
2018
+
2019
+ def _generate_tool_code(tool_config, code_file, call_tool=None):
2020
+ """Generate Python code for all tool types using correct register_tool method"""
2021
+ tool_name = tool_config["name"]
2022
+ print(f" ๐Ÿท๏ธ Tool name: {tool_name}")
2023
+
2024
+ # Clean tool name to be a valid Python class name
2025
+ import re
2026
+
2027
+ clean_tool_name = re.sub(r"[^a-zA-Z0-9_]", "", tool_name)
2028
+ if not clean_tool_name or clean_tool_name[0].isdigit():
2029
+ clean_tool_name = "Tool" + clean_tool_name
2030
+ print(f" ๐Ÿงน Cleaned class name: {clean_tool_name}")
2031
+
2032
+ print(f" ๐Ÿ“ Writing code to file: {code_file}")
2033
+
2034
+ # Write code to file
2035
+ try:
2036
+ source_code = tool_config["implementation"]["source_code"]
2037
+ with open(code_file, "w", encoding="utf-8") as f:
2038
+ f.write(source_code)
2039
+ print(" โœ… Code written successfully")
2040
+ except Exception as e:
2041
+ print(f" โŒ Code writing failed: {e}")
2042
+ import traceback
2043
+
2044
+ traceback.print_exc()
2045
+
2046
+ # Read the generated code once
2047
+ with open(code_file, "r", encoding="utf-8") as f:
2048
+ code_content = f.read()
2049
+
2050
+ # Validate generated code
2051
+ is_valid, error_info = _validate_generated_code(code_file, code_content)
2052
+
2053
+ if not is_valid:
2054
+ # Try to fix syntax errors
2055
+ if not _fix_syntax_errors(tool_config, code_file, error_info, call_tool):
2056
+ # Save fallback file
2057
+ fallback_file = code_file.replace(".py", "_fallback.py")
2058
+ with open(fallback_file, "w", encoding="utf-8") as f:
2059
+ f.write(
2060
+ "# Fallback file - contains syntax errors that need manual fixing\n"
2061
+ )
2062
+ f.write(f"# Original error: {error_info}\n\n")
2063
+ f.write(code_content)
2064
+ print(f" ๐Ÿ“„ Fallback file saved: {fallback_file}")
2065
+ print(
2066
+ f" โš ๏ธ Syntax error could not be automatically fixed. Please review {fallback_file}"
2067
+ )
2068
+
2069
+ raise SyntaxError(
2070
+ f"Generated code has syntax error: {error_info}. "
2071
+ f"Fallback file saved to {fallback_file} for manual review."
2072
+ )
2073
+
2074
+ # Validate class structure using cached content
2075
+ _validate_class_structure(code_content)
645
2076
 
646
2077
 
647
2078
  def compose(arguments, tooluniverse, call_tool):
@@ -649,57 +2080,124 @@ def compose(arguments, tooluniverse, call_tool):
649
2080
  tool_description = arguments["tool_description"]
650
2081
  max_iterations = arguments.get("max_iterations", 2)
651
2082
  arguments.get("save_to_file", True)
2083
+ save_dir = arguments.get("save_dir", None)
652
2084
 
653
- print(f"๐Ÿ” Starting tool discovery: {tool_description}")
2085
+ # Determine where to save files
2086
+ import tempfile
2087
+ import shutil
654
2088
 
655
- # 1. Discover similar tools
656
- print("๐Ÿ“Š Discovering similar tools...")
657
- similar_tools = _discover_similar_tools(tool_description, call_tool)
658
- print(f"Found {len(similar_tools)} similar tools")
2089
+ # If save_dir is provided, use it; otherwise use current working directory
2090
+ if save_dir:
2091
+ output_dir = os.path.abspath(save_dir)
2092
+ else:
2093
+ output_dir = os.getcwd()
659
2094
 
660
- # 2. Generate initial tool specification
661
- print("๐Ÿ—๏ธ Generating tool specification...")
662
- tool_config = _generate_tool_specification(
663
- tool_description, similar_tools, call_tool
664
- )
2095
+ # Also create a temp directory for intermediate files during optimization
2096
+ temp_dir = tempfile.mkdtemp(prefix="tool_discover_")
2097
+ print(f"๐Ÿ“ Created temporary folder: {temp_dir}", flush=True)
2098
+ print(f"๐Ÿ“ Files will be saved to: {output_dir}", flush=True)
665
2099
 
666
- # 3. Generate implementation for all tools
667
- print("๐Ÿ’ป Generating code implementation...")
668
- tool_config = _generate_implementation(tool_config, call_tool)
2100
+ try:
2101
+ print(f"๐Ÿ” Starting tool discovery: {tool_description}", flush=True)
669
2102
 
670
- # 4. Iterative optimization
671
- print("\n๐Ÿš€ Starting enhanced iterative improvement system...")
2103
+ # 1. Collect reference information
2104
+ reference_info = _collect_reference_info(tool_description, call_tool)
672
2105
 
673
- target_quality_score = arguments.get("target_quality_score", 8.5)
2106
+ # 2. Generate tool specification AND implementation together (XML format)
2107
+ print("๐Ÿ—๏ธ Generating tool (specification + implementation)...", flush=True)
2108
+ tool_config = _generate_tool_with_xml(
2109
+ tool_description, reference_info, call_tool
2110
+ )
674
2111
 
675
- print(
676
- f"๐ŸŽฏ Enabling iterative improvement, target quality score: {target_quality_score}/10"
677
- )
2112
+ # Display results
2113
+ print("\033[92mTool specification:\033[0m")
2114
+ config_display = {k: v for k, v in tool_config.items() if k != "implementation"}
2115
+ print(json.dumps(config_display, indent=4))
678
2116
 
679
- tool_config, final_quality_score, improvement_history = iterative_code_improvement(
680
- tool_config,
681
- call_tool,
682
- max_iterations=max_iterations,
683
- target_score=target_quality_score,
684
- )
2117
+ print("\n๐Ÿ’ป Implementation code:")
2118
+ print(
2119
+ "################################################################################"
2120
+ )
2121
+ print(tool_config["implementation"]["source_code"])
2122
+ print(
2123
+ "################################################################################"
2124
+ )
685
2125
 
686
- print(
687
- f"๐ŸŽ‰ Iterative improvement completed! Final quality score: {final_quality_score:.2f}/10"
688
- )
2126
+ # 4. Iterative optimization (handles runtime validation, testing, error fixing, and optimization)
2127
+ print("\n๐Ÿš€ Phase: Iterative Optimization")
2128
+ target_quality_score = arguments.get("target_quality_score", 8.5)
2129
+ tool_config, final_quality_score, improvement_history = (
2130
+ iterative_comprehensive_optimization(
2131
+ tool_config,
2132
+ call_tool,
2133
+ max_iterations=max_iterations,
2134
+ target_score=target_quality_score,
2135
+ temp_dir=temp_dir,
2136
+ )
2137
+ )
2138
+
2139
+ # Display final results
2140
+ if isinstance(final_quality_score, dict):
2141
+ score = final_quality_score.get("overall_score", 0)
2142
+ else:
2143
+ score = final_quality_score
2144
+ print(
2145
+ f"๐ŸŽ‰ Implementation and optimization completed! Final quality score: {score:.2f}/10"
2146
+ )
689
2147
 
690
- # 5. Save tool files
691
- print("๐Ÿ’พ Saving tool files...")
692
- base_filename = f"generated_tool_{tool_config['name']}"
693
- saved_files = _save_tool_files(tool_config, base_filename)
694
- print(f"Saved: {saved_files}")
2148
+ # 5. Save final tool files to output directory
2149
+ print("๐Ÿ’พ Saving tool files...")
2150
+ base_filename = f"generated_tool_{tool_config['name']}"
695
2151
 
696
- print("\n๐ŸŽ‰ Tool generation completed!")
697
- print(f"Tool name: {tool_config['name']}")
698
- print(f"Tool type: {tool_config['type']}")
699
- print(f"Final quality: {final_quality_score:.1f}/10")
2152
+ # First save to temp directory
2153
+ temp_saved_files = _save_tool_files(
2154
+ tool_config, base_filename, call_tool, temp_dir, None
2155
+ )
2156
+ print(f"Saved to temp: {temp_saved_files}")
2157
+
2158
+ # Then copy to output directory
2159
+ saved_files = []
2160
+ os.makedirs(output_dir, exist_ok=True)
2161
+
2162
+ for temp_file in temp_saved_files:
2163
+ filename = os.path.basename(temp_file)
2164
+ output_file = os.path.join(output_dir, filename)
2165
+ shutil.copy2(temp_file, output_file)
2166
+ saved_files.append(output_file)
2167
+ print(f"๐Ÿ’พ Copied to output directory: {output_file}")
2168
+
2169
+ print(f"\nโœ… Saved files: {saved_files}")
2170
+
2171
+ print("\n๐ŸŽ‰ Tool generation completed!")
2172
+ print(f"Tool name: {tool_config['name']}")
2173
+ print(f"Tool type: {tool_config.get('type', 'Unknown')}")
2174
+ if isinstance(final_quality_score, dict):
2175
+ score = final_quality_score.get("overall_score", 0)
2176
+ else:
2177
+ score = final_quality_score
2178
+ print(f"Final quality: {score:.1f}/10")
2179
+
2180
+ return {
2181
+ "tool_config": tool_config,
2182
+ "quality_score": final_quality_score,
2183
+ "saved_files": saved_files,
2184
+ "output_directory": output_dir,
2185
+ }
2186
+
2187
+ finally:
2188
+ # Clean up temporary directory
2189
+ try:
2190
+ shutil.rmtree(temp_dir)
2191
+ print(f"๐Ÿงน Cleaned up temporary directory: {temp_dir}")
2192
+ except Exception as e:
2193
+ print(f"โš ๏ธ Warning: Could not clean up temporary directory {temp_dir}: {e}")
700
2194
 
701
- return {
702
- "tool_config": tool_config,
703
- "quality_score": final_quality_score,
704
- "saved_files": saved_files,
705
- }
2195
+
2196
+ # ============================================================================
2197
+ # NEW CORE FUNCTIONS FOR REFACTORED SYSTEM
2198
+ # ============================================================================
2199
+
2200
+
2201
+ def optimize_code(tool_config, optimization_context, call_tool):
2202
+ """Wrapper function that calls the XML-based optimizer"""
2203
+ return _optimize_tool_with_xml(tool_config, optimization_context, call_tool)