tooluniverse 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +71 -57
- tooluniverse/alphafold_tool.py +51 -20
- tooluniverse/compose_scripts/tool_graph_generation.py +249 -0
- tooluniverse/compose_scripts/tool_metadata_generator.py +369 -0
- tooluniverse/data/agentic_tools.json +143 -28
- tooluniverse/data/alphafold_tools.json +203 -61
- tooluniverse/data/compose_tools.json +63 -0
- tooluniverse/data/special_tools.json +2 -0
- tooluniverse/test/test_alphafold_tool.py +66 -29
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/METADATA +115 -173
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/RECORD +16 -18
- tooluniverse/data/packages/software_tools.json +0 -4954
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +0 -1611
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +0 -34
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +0 -91
- /tooluniverse/data/{clait_tools.json → adverse_event_tools.json} +0 -0
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.0.dist-info → tooluniverse-1.0.2.dist-info}/top_level.txt +0 -0
tooluniverse/__init__.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from importlib.metadata import version
|
|
2
|
+
import warnings
|
|
2
3
|
from typing import Any, Optional, List
|
|
3
4
|
from .execute_function import ToolUniverse
|
|
4
5
|
from .base_tool import BaseTool
|
|
@@ -148,65 +149,76 @@ MCPClientTool: Any
|
|
|
148
149
|
MCPAutoLoaderTool: Any
|
|
149
150
|
ADMETAITool: Any
|
|
150
151
|
AlphaFoldRESTTool: Any
|
|
152
|
+
ComposeTool: Any
|
|
151
153
|
if not LAZY_LOADING_ENABLED:
|
|
152
|
-
# Import all tool classes immediately (old behavior)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
154
|
+
# Import all tool classes immediately (old behavior) with warning suppression # noqa: E501
|
|
155
|
+
with warnings.catch_warnings():
|
|
156
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
157
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
158
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
159
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
160
|
+
|
|
161
|
+
from .restful_tool import MonarchTool, MonarchDiseasesForMultiplePhenoTool
|
|
162
|
+
from .ctg_tool import ClinicalTrialsSearchTool, ClinicalTrialsDetailsTool
|
|
163
|
+
from .graphql_tool import (
|
|
164
|
+
OpentargetTool,
|
|
165
|
+
OpentargetGeneticsTool,
|
|
166
|
+
OpentargetToolDrugNameMatch,
|
|
167
|
+
DiseaseTargetScoreTool,
|
|
168
|
+
)
|
|
169
|
+
from .openfda_tool import (
|
|
170
|
+
FDADrugLabelTool,
|
|
171
|
+
FDADrugLabelSearchTool,
|
|
172
|
+
FDADrugLabelSearchIDTool,
|
|
173
|
+
FDADrugLabelGetDrugGenericNameTool,
|
|
174
|
+
)
|
|
175
|
+
from .openfda_adv_tool import (
|
|
176
|
+
FDADrugAdverseEventTool,
|
|
177
|
+
FDACountAdditiveReactionsTool,
|
|
178
|
+
)
|
|
179
|
+
from .chem_tool import ChEMBLTool
|
|
180
|
+
from .compose_tool import ComposeTool
|
|
181
|
+
from .europe_pmc_tool import EuropePMCTool
|
|
182
|
+
from .semantic_scholar_tool import SemanticScholarTool
|
|
183
|
+
from .pubtator_tool import PubTatorTool
|
|
184
|
+
from .efo_tool import EFOTool
|
|
185
|
+
from .agentic_tool import AgenticTool
|
|
186
|
+
from .dataset_tool import DatasetTool
|
|
187
|
+
from .dailymed_tool import SearchSPLTool, GetSPLBySetIDTool
|
|
188
|
+
from .hpa_tool import HPAGetGeneJSONTool, HPAGetGeneXMLTool
|
|
189
|
+
from .reactome_tool import ReactomeRESTTool
|
|
190
|
+
from .pubchem_tool import PubChemRESTTool
|
|
191
|
+
from .url_tool import URLHTMLTagTool, URLToPDFTextTool
|
|
192
|
+
from .medlineplus_tool import MedlinePlusRESTTool
|
|
193
|
+
from .uniprot_tool import UniProtRESTTool
|
|
194
|
+
from .package_tool import PackageTool
|
|
195
|
+
from .uspto_tool import USPTOOpenDataPortalTool
|
|
196
|
+
from .xml_tool import XMLDatasetTool
|
|
197
|
+
from .tool_finder_embedding import ToolFinderEmbedding
|
|
198
|
+
from .tool_finder_keyword import ToolFinderKeyword
|
|
199
|
+
from .tool_finder_llm import ToolFinderLLM
|
|
200
|
+
from .embedding_database import EmbeddingDatabase
|
|
201
|
+
from .embedding_sync import EmbeddingSync
|
|
202
|
+
from .rcsb_pdb_tool import RCSBTool
|
|
203
|
+
from .gwas_tool import (
|
|
204
|
+
GWASAssociationSearch,
|
|
205
|
+
GWASStudySearch,
|
|
206
|
+
GWASSNPSearch,
|
|
207
|
+
GWASAssociationByID,
|
|
208
|
+
GWASStudyByID,
|
|
209
|
+
GWASSNPByID,
|
|
210
|
+
GWASVariantsForTrait,
|
|
211
|
+
GWASAssociationsForTrait,
|
|
212
|
+
GWASAssociationsForSNP,
|
|
213
|
+
GWASStudiesForTrait,
|
|
214
|
+
GWASSNPsForGene,
|
|
215
|
+
GWASAssociationsForStudy,
|
|
216
|
+
)
|
|
205
217
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
218
|
+
# from .admetai_tool import ADMETAITool
|
|
219
|
+
from .mcp_client_tool import MCPClientTool, MCPAutoLoaderTool
|
|
220
|
+
from .admetai_tool import ADMETAITool
|
|
221
|
+
from .alphafold_tool import AlphaFoldRESTTool
|
|
210
222
|
else:
|
|
211
223
|
# With lazy loading, create lazy import proxies that import modules only when accessed
|
|
212
224
|
MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
|
|
@@ -238,6 +250,7 @@ else:
|
|
|
238
250
|
"openfda_adv_tool", "FDACountAdditiveReactionsTool"
|
|
239
251
|
)
|
|
240
252
|
ChEMBLTool = _LazyImportProxy("chem_tool", "ChEMBLTool")
|
|
253
|
+
ComposeTool = _LazyImportProxy("compose_tool", "ComposeTool")
|
|
241
254
|
EuropePMCTool = _LazyImportProxy("europe_pmc_tool", "EuropePMCTool")
|
|
242
255
|
SemanticScholarTool = _LazyImportProxy(
|
|
243
256
|
"semantic_scholar_tool", "SemanticScholarTool"
|
|
@@ -306,6 +319,7 @@ __all__ = [
|
|
|
306
319
|
"FDADrugAdverseEventTool",
|
|
307
320
|
"FDACountAdditiveReactionsTool",
|
|
308
321
|
"ChEMBLTool",
|
|
322
|
+
"ComposeTool",
|
|
309
323
|
"EuropePMCTool",
|
|
310
324
|
"SemanticScholarTool",
|
|
311
325
|
"PubTatorTool",
|
tooluniverse/alphafold_tool.py
CHANGED
|
@@ -11,7 +11,7 @@ ALPHAFOLD_BASE_URL = "https://alphafold.ebi.ac.uk/api"
|
|
|
11
11
|
class AlphaFoldRESTTool(BaseTool):
|
|
12
12
|
"""
|
|
13
13
|
AlphaFold Protein Structure Database API tool.
|
|
14
|
-
|
|
14
|
+
Generic wrapper for AlphaFold API endpoints defined in alphafold_tools.json.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
def __init__(self, tool_config):
|
|
@@ -20,28 +20,38 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
20
20
|
parameter = tool_config.get("parameter", {})
|
|
21
21
|
|
|
22
22
|
self.endpoint_template: str = fields["endpoint"]
|
|
23
|
-
self.param_schema: Dict[str, Any] = parameter.get("properties", {})
|
|
24
23
|
self.required: List[str] = parameter.get("required", [])
|
|
25
24
|
self.output_format: str = fields.get("return_format", "JSON")
|
|
26
25
|
|
|
27
|
-
def _build_url(self, arguments: Dict[str, Any]) -> Dict[str, Any]
|
|
26
|
+
def _build_url(self, arguments: Dict[str, Any]) -> str | Dict[str, Any]:
|
|
27
|
+
# Example: endpoint_template = "/annotations/{qualifier}.json"
|
|
28
28
|
url_path = self.endpoint_template
|
|
29
|
+
# Find placeholders like {qualifier} in the path
|
|
29
30
|
placeholders = re.findall(r"\{([^{}]+)\}", url_path)
|
|
31
|
+
used = set()
|
|
32
|
+
|
|
33
|
+
# Replace placeholders with provided arguments
|
|
34
|
+
# ex. if arguments = {"qualifier": "P69905", "type": "MUTAGEN"}
|
|
30
35
|
for ph in placeholders:
|
|
31
36
|
if ph not in arguments or arguments[ph] is None:
|
|
32
37
|
return {"error": f"Missing required parameter '{ph}'"}
|
|
33
38
|
url_path = url_path.replace(f"{{{ph}}}", str(arguments[ph]))
|
|
34
|
-
|
|
39
|
+
used.add(ph)
|
|
40
|
+
# Now url_path = "/annotations/P69905.json"
|
|
35
41
|
|
|
36
|
-
|
|
37
|
-
#
|
|
38
|
-
|
|
39
|
-
if
|
|
40
|
-
|
|
42
|
+
# Treat all remaining args as query parameters
|
|
43
|
+
# "type" wasn’t a placeholder, so it becomes a query param
|
|
44
|
+
query_args = {k: v for k, v in arguments.items() if k not in used}
|
|
45
|
+
if query_args:
|
|
46
|
+
from urllib.parse import urlencode
|
|
41
47
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
url_path += "?" + urlencode(query_args)
|
|
49
|
+
|
|
50
|
+
# Final result = "https://alphafold.ebi.ac.uk/api/annotations/P69905.json?type=MUTAGEN"
|
|
51
|
+
return ALPHAFOLD_BASE_URL + url_path
|
|
52
|
+
|
|
53
|
+
def _make_request(self, url: str) -> Dict[str, Any]:
|
|
54
|
+
"""Perform a GET request and handle common errors."""
|
|
45
55
|
try:
|
|
46
56
|
resp = requests.get(
|
|
47
57
|
url,
|
|
@@ -54,18 +64,36 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
54
64
|
except Exception as e:
|
|
55
65
|
return {"error": "Request to AlphaFold API failed", "detail": str(e)}
|
|
56
66
|
|
|
57
|
-
# Handle HTTP errors cleanly
|
|
58
67
|
if resp.status_code == 404:
|
|
59
|
-
return {
|
|
60
|
-
"error": "No AlphaFold prediction found",
|
|
61
|
-
"uniprot_id": arguments.get("uniprot_id"),
|
|
62
|
-
}
|
|
68
|
+
return {"error": "Not found", "endpoint": url}
|
|
63
69
|
if resp.status_code != 200:
|
|
64
70
|
return {
|
|
65
71
|
"error": f"AlphaFold API returned {resp.status_code}",
|
|
66
72
|
"detail": resp.text,
|
|
73
|
+
"endpoint": url,
|
|
67
74
|
}
|
|
68
75
|
|
|
76
|
+
return {"response": resp}
|
|
77
|
+
|
|
78
|
+
def run(self, arguments: Dict[str, Any]):
|
|
79
|
+
"""Execute the tool with provided arguments."""
|
|
80
|
+
# Validate required params
|
|
81
|
+
missing = [k for k in self.required if k not in arguments]
|
|
82
|
+
if missing:
|
|
83
|
+
return {"error": f"Missing required parameter(s): {', '.join(missing)}"}
|
|
84
|
+
|
|
85
|
+
# Build URL
|
|
86
|
+
url = self._build_url(arguments)
|
|
87
|
+
if isinstance(url, dict) and "error" in url:
|
|
88
|
+
return {**url, "query": arguments}
|
|
89
|
+
|
|
90
|
+
# Make request
|
|
91
|
+
result = self._make_request(url)
|
|
92
|
+
if "error" in result:
|
|
93
|
+
return {**result, "query": arguments}
|
|
94
|
+
|
|
95
|
+
resp = result["response"]
|
|
96
|
+
|
|
69
97
|
# Parse JSON
|
|
70
98
|
if self.output_format.upper() == "JSON":
|
|
71
99
|
try:
|
|
@@ -73,7 +101,8 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
73
101
|
if not data:
|
|
74
102
|
return {
|
|
75
103
|
"error": "AlphaFold returned an empty response",
|
|
76
|
-
"
|
|
104
|
+
"endpoint": url,
|
|
105
|
+
"query": arguments,
|
|
77
106
|
}
|
|
78
107
|
|
|
79
108
|
return {
|
|
@@ -90,7 +119,9 @@ class AlphaFoldRESTTool(BaseTool):
|
|
|
90
119
|
"error": "Failed to parse JSON response",
|
|
91
120
|
"raw": resp.text,
|
|
92
121
|
"detail": str(e),
|
|
122
|
+
"endpoint": url,
|
|
123
|
+
"query": arguments,
|
|
93
124
|
}
|
|
94
125
|
|
|
95
|
-
# Fallback
|
|
96
|
-
return {"data": resp.text, "metadata": {"endpoint": url}}
|
|
126
|
+
# Fallback for non-JSON output
|
|
127
|
+
return {"data": resp.text, "metadata": {"endpoint": url, "query": arguments}}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Tool Graph Generation Compose Script
|
|
2
|
+
|
|
3
|
+
Efficiently evaluates directional data-flow relationships between all unique pairs
|
|
4
|
+
of provided tool configs using one agentic tool:
|
|
5
|
+
- ToolRelationshipDetector
|
|
6
|
+
|
|
7
|
+
Outputs a graph structure with edges representing valid directional relationships.
|
|
8
|
+
Each edge stores: source, target, rationale.
|
|
9
|
+
|
|
10
|
+
Performance considerations:
|
|
11
|
+
- Iterates i<j once (O(N^2/2) pairs)
|
|
12
|
+
- Lightweight JSON serialization of minimal fields
|
|
13
|
+
- Optional batching hook (currently sequential because call_tool likely remote)
|
|
14
|
+
|
|
15
|
+
Arguments:
|
|
16
|
+
tool_configs (list[dict]) REQUIRED
|
|
17
|
+
max_tools (int) optional limit for debugging
|
|
18
|
+
output_path (str) path to write resulting graph JSON (default './tool_relationship_graph.json')
|
|
19
|
+
save_intermediate_every (int) checkpoint frequency (default 5000 pairs processed)
|
|
20
|
+
|
|
21
|
+
Return:
|
|
22
|
+
dict with keys: nodes, edges, stats
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import math
|
|
28
|
+
import os
|
|
29
|
+
import time
|
|
30
|
+
from typing import Any, Dict, List, Tuple
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
DETECTOR_NAME = "ToolRelationshipDetector"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def compose(arguments, tooluniverse, call_tool): # noqa: D401
|
|
37
|
+
tool_configs: List[dict] = arguments.get("tool_configs") or []
|
|
38
|
+
if not tool_configs:
|
|
39
|
+
return {"status": "error", "message": "tool_configs empty"}
|
|
40
|
+
|
|
41
|
+
max_tools = arguments.get("max_tools")
|
|
42
|
+
if isinstance(max_tools, int) and max_tools > 0:
|
|
43
|
+
tool_configs = tool_configs[: max_tools]
|
|
44
|
+
|
|
45
|
+
output_path = arguments.get("output_path", "./tool_relationship_graph.json")
|
|
46
|
+
checkpoint_every = int(arguments.get("save_intermediate_every", 5000))
|
|
47
|
+
|
|
48
|
+
# Prepare nodes list (unique tool names)
|
|
49
|
+
nodes = []
|
|
50
|
+
minimal_tool_map: Dict[str, dict] = {}
|
|
51
|
+
for cfg in tool_configs:
|
|
52
|
+
name = cfg.get("name")
|
|
53
|
+
if not name:
|
|
54
|
+
continue
|
|
55
|
+
if name in minimal_tool_map:
|
|
56
|
+
continue
|
|
57
|
+
minimal_tool = {
|
|
58
|
+
"name": name,
|
|
59
|
+
"description": cfg.get("description", ""),
|
|
60
|
+
"parameter": cfg.get("parameter", {}),
|
|
61
|
+
"type": cfg.get("type", cfg.get("toolType", "unknown")),
|
|
62
|
+
}
|
|
63
|
+
minimal_tool_map[name] = minimal_tool
|
|
64
|
+
nodes.append({"id": name, "name": name, "type": minimal_tool["type"]})
|
|
65
|
+
|
|
66
|
+
names = list(minimal_tool_map.keys())
|
|
67
|
+
n = len(names)
|
|
68
|
+
total_pairs = n * (n - 1) // 2
|
|
69
|
+
|
|
70
|
+
edges: List[dict] = []
|
|
71
|
+
processed_pairs = 0
|
|
72
|
+
llm_calls = 0
|
|
73
|
+
start_time = time.time()
|
|
74
|
+
batch_size = 100
|
|
75
|
+
|
|
76
|
+
# --- Resume from checkpoint ---
|
|
77
|
+
checkpoint_path = output_path + ".checkpoint.json"
|
|
78
|
+
load_path = None
|
|
79
|
+
|
|
80
|
+
# Prefer checkpoint file, otherwise use the main output file
|
|
81
|
+
if os.path.exists(checkpoint_path):
|
|
82
|
+
load_path = checkpoint_path
|
|
83
|
+
elif os.path.exists(output_path):
|
|
84
|
+
load_path = output_path
|
|
85
|
+
|
|
86
|
+
if load_path:
|
|
87
|
+
print(f"Attempting to resume from {load_path}")
|
|
88
|
+
try:
|
|
89
|
+
with open(load_path, "r", encoding="utf-8") as f:
|
|
90
|
+
existing_graph = json.load(f)
|
|
91
|
+
|
|
92
|
+
# Re-hydrate edges and find processed source tools
|
|
93
|
+
if "edges" in existing_graph and isinstance(existing_graph["edges"], list):
|
|
94
|
+
edges = existing_graph["edges"]
|
|
95
|
+
|
|
96
|
+
# Align the 'names' list order with the loaded graph to ensure correct loop continuation
|
|
97
|
+
if "nodes" in existing_graph and isinstance(existing_graph["nodes"], list):
|
|
98
|
+
loaded_node_order = [node.get("name") for node in existing_graph.get("nodes", [])]
|
|
99
|
+
if names == loaded_node_order:
|
|
100
|
+
print("Current tool order matches the loaded graph.")
|
|
101
|
+
else:
|
|
102
|
+
print("Reordering tools to match the loaded graph for correct resume.")
|
|
103
|
+
# Create a map for quick lookup of current tool positions
|
|
104
|
+
current_name_pos = {name: i for i, name in enumerate(names)}
|
|
105
|
+
# Build the new 'names' list and 'minimal_tool_map' based on the loaded order
|
|
106
|
+
new_names = [name for name in loaded_node_order if name in current_name_pos]
|
|
107
|
+
# Find any new tools not in the original graph and append them
|
|
108
|
+
new_tools_from_config = [name for name in names if name not in loaded_node_order]
|
|
109
|
+
if new_tools_from_config:
|
|
110
|
+
print(f"Appending {len(new_tools_from_config)} new tools to the list.")
|
|
111
|
+
new_names.extend(new_tools_from_config)
|
|
112
|
+
|
|
113
|
+
names = new_names
|
|
114
|
+
assert(n==len(names)) # n should remain the same
|
|
115
|
+
print("Tool order successfully realigned.")
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"Warning: Could not load or parse existing graph at {load_path}. Starting fresh. Error: {e}")
|
|
119
|
+
edges = [] # Reset edges if loading failed
|
|
120
|
+
|
|
121
|
+
# Core loop over unique unordered pairs (i<j). We'll batch the 'j' tools.
|
|
122
|
+
for i in range(n):
|
|
123
|
+
tool_a = minimal_tool_map[names[i]]
|
|
124
|
+
a_json = json.dumps(tool_a, ensure_ascii=False)
|
|
125
|
+
# This logic is to skip all tools until a specific one is found,
|
|
126
|
+
# skip that one, and then process all subsequent tools.
|
|
127
|
+
# This is useful for debugging or resuming from a specific point.
|
|
128
|
+
start_processing_flag_name = 'get_em_3d_fitting_and_reconstruction_details'
|
|
129
|
+
|
|
130
|
+
# Find the index of the tool to start after
|
|
131
|
+
try:
|
|
132
|
+
start_index = names.index(start_processing_flag_name)
|
|
133
|
+
except ValueError:
|
|
134
|
+
start_index = -1 # Flag tool not found, process all
|
|
135
|
+
|
|
136
|
+
if start_index != -1 and i <= start_index:
|
|
137
|
+
print(f"Skipping tool {tool_a['name']} with index {i} (target index is {start_index}).")
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Batch the remaining tools to compare against tool_a
|
|
141
|
+
for j_batch_start in range(i + 1, n, batch_size):
|
|
142
|
+
j_batch_end = min(j_batch_start + batch_size, n)
|
|
143
|
+
other_tools_batch_names = names[j_batch_start:j_batch_end]
|
|
144
|
+
|
|
145
|
+
if not other_tools_batch_names:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
other_tools_list = [minimal_tool_map[name] for name in other_tools_batch_names]
|
|
149
|
+
other_tools_json = json.dumps(other_tools_list, ensure_ascii=False)
|
|
150
|
+
|
|
151
|
+
# Call detector with the batch
|
|
152
|
+
detector_args = {"tool_a": a_json, "other_tools": other_tools_json}
|
|
153
|
+
detector_res = {}
|
|
154
|
+
for _ in range(5): # Retry up to 5 times
|
|
155
|
+
detector_raw = call_tool(DETECTOR_NAME, detector_args)
|
|
156
|
+
llm_calls += 1
|
|
157
|
+
detector_res = _parse_json(detector_raw)
|
|
158
|
+
if detector_res and "relationships" in detector_res:
|
|
159
|
+
break
|
|
160
|
+
|
|
161
|
+
processed_pairs += len(other_tools_list)
|
|
162
|
+
|
|
163
|
+
relationships = detector_res.get("relationships", [])
|
|
164
|
+
if not isinstance(relationships, list):
|
|
165
|
+
relationships = []
|
|
166
|
+
|
|
167
|
+
print(f"Tool A: {tool_a['name']} vs {len(other_tools_list)} others => Found {len(relationships)} relationships")
|
|
168
|
+
|
|
169
|
+
for rel in relationships:
|
|
170
|
+
tool_b_name = rel.get("tool_b_name")
|
|
171
|
+
direction = rel.get("direction")
|
|
172
|
+
rationale = rel.get("rationale")
|
|
173
|
+
|
|
174
|
+
if not tool_b_name or tool_b_name not in minimal_tool_map:
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
if direction in ("A->B", "both"):
|
|
178
|
+
edges.append({
|
|
179
|
+
"source": tool_a["name"],
|
|
180
|
+
"target": tool_b_name,
|
|
181
|
+
"rationale": rationale,
|
|
182
|
+
})
|
|
183
|
+
if direction in ("B->A", "both"):
|
|
184
|
+
edges.append({
|
|
185
|
+
"source": tool_b_name,
|
|
186
|
+
"target": tool_a["name"],
|
|
187
|
+
"rationale": rationale,
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
# Progress reporting and checkpointing
|
|
191
|
+
if processed_pairs % 1000 < len(other_tools_list): # Heuristic to report near the thousand marks
|
|
192
|
+
elapsed = time.time() - start_time
|
|
193
|
+
rate = processed_pairs / elapsed if elapsed > 0 else 0
|
|
194
|
+
print(f"[progress] pairs={processed_pairs}/{total_pairs} edges={len(edges)} llm_calls={llm_calls} rate={rate:.2f} pairs/s")
|
|
195
|
+
if processed_pairs // checkpoint_every > (processed_pairs - len(other_tools_list)) // checkpoint_every:
|
|
196
|
+
_maybe_checkpoint(output_path, nodes, edges)
|
|
197
|
+
|
|
198
|
+
graph = {
|
|
199
|
+
"nodes": nodes,
|
|
200
|
+
"edges": edges,
|
|
201
|
+
"stats": {
|
|
202
|
+
"tools": n,
|
|
203
|
+
"pairs_evaluated": processed_pairs,
|
|
204
|
+
"edges": len(edges),
|
|
205
|
+
"llm_calls": llm_calls,
|
|
206
|
+
"runtime_sec": round(time.time() - start_time, 2)
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
# Final save
|
|
211
|
+
try:
|
|
212
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
213
|
+
json.dump(graph, f, indent=2)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
return {"status": "error", "message": f"Failed to write output: {e}", "graph": graph}
|
|
216
|
+
|
|
217
|
+
return {"status": "success", "output_file": output_path, "graph": graph}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _maybe_checkpoint(base_path: str, nodes: List[dict], edges: List[dict]):
|
|
221
|
+
ck_path = base_path + ".checkpoint_new.json"
|
|
222
|
+
try:
|
|
223
|
+
with open(ck_path, "w", encoding="utf-8") as f:
|
|
224
|
+
json.dump({"nodes": nodes, "edges": edges}, f)
|
|
225
|
+
print(f"[checkpoint] saved {ck_path} nodes={len(nodes)} edges={len(edges)}")
|
|
226
|
+
except Exception as e:
|
|
227
|
+
print(f"[checkpoint] failed: {e}")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _parse_json(obj: Any) -> dict:
|
|
231
|
+
if isinstance(obj, dict):
|
|
232
|
+
# may be wrapped
|
|
233
|
+
if "result" in obj and isinstance(obj["result"], str):
|
|
234
|
+
try:
|
|
235
|
+
return json.loads(obj["result"])
|
|
236
|
+
except Exception:
|
|
237
|
+
return {}
|
|
238
|
+
if "content" in obj and isinstance(obj["content"], str):
|
|
239
|
+
try:
|
|
240
|
+
return json.loads(obj["content"])
|
|
241
|
+
except Exception:
|
|
242
|
+
return {}
|
|
243
|
+
return obj
|
|
244
|
+
if isinstance(obj, str):
|
|
245
|
+
try:
|
|
246
|
+
return json.loads(obj)
|
|
247
|
+
except Exception:
|
|
248
|
+
return {}
|
|
249
|
+
return {}
|