tooluniverse 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

@@ -0,0 +1,369 @@
1
+ from __future__ import annotations
2
+ from typing import Any, Dict, List, Optional
3
+ import os
4
+ import time
5
+ import json as _json
6
+
7
+
8
+ class BaseLLMClient:
9
+ def test_api(self) -> None:
10
+ raise NotImplementedError
11
+
12
+ def infer(
13
+ self,
14
+ messages: List[Dict[str, str]],
15
+ temperature: Optional[float],
16
+ max_tokens: Optional[int],
17
+ return_json: bool,
18
+ custom_format: Any = None,
19
+ max_retries: int = 5,
20
+ retry_delay: int = 5,
21
+ ) -> Optional[str]:
22
+ raise NotImplementedError
23
+
24
+
25
+ class AzureOpenAIClient(BaseLLMClient):
26
+ # Built-in defaults for model families (can be overridden by env)
27
+ DEFAULT_MODEL_LIMITS: Dict[str, Dict[str, int]] = {
28
+ # GPT-4.1 series
29
+ "gpt-4.1": {"max_output": 32768, "context_window": 1_047_576},
30
+ "gpt-4.1-mini": {"max_output": 32768, "context_window": 1_047_576},
31
+ "gpt-4.1-nano": {"max_output": 32768, "context_window": 1_047_576},
32
+ # GPT-4o series
33
+ "gpt-4o-1120": {"max_output": 16384, "context_window": 128_000},
34
+ "gpt-4o-0806": {"max_output": 16384, "context_window": 128_000},
35
+ "gpt-4o-mini-0718": {"max_output": 16384, "context_window": 128_000},
36
+ "gpt-4o": {"max_output": 16384, "context_window": 128_000}, # general prefix
37
+ # O-series
38
+ "o4-mini-0416": {"max_output": 100_000, "context_window": 200_000},
39
+ "o3-mini-0131": {"max_output": 100_000, "context_window": 200_000},
40
+ "o4-mini": {"max_output": 100_000, "context_window": 200_000},
41
+ "o3-mini": {"max_output": 100_000, "context_window": 200_000},
42
+ # Embeddings (for completeness)
43
+ "embedding-ada": {"max_output": 8192, "context_window": 8192},
44
+ "text-embedding-3-small": {"max_output": 8192, "context_window": 8192},
45
+ "text-embedding-3-large": {"max_output": 8192, "context_window": 8192},
46
+ }
47
+
48
+ def __init__(self, model_id: str, api_version: Optional[str], logger):
49
+ try:
50
+ from openai import AzureOpenAI as _AzureOpenAI # type: ignore
51
+ import openai as _openai # type: ignore
52
+ except Exception as e: # pragma: no cover
53
+ raise RuntimeError("openai AzureOpenAI client is not available") from e
54
+ self._AzureOpenAI = _AzureOpenAI
55
+ self._openai = _openai
56
+
57
+ self.model_name = model_id
58
+ self.logger = logger
59
+
60
+ resolved_version = api_version or self._resolve_api_version(model_id)
61
+ self.logger.debug(
62
+ f"Resolved Azure API version for {model_id}: {resolved_version}"
63
+ )
64
+
65
+ api_key = os.getenv("AZURE_OPENAI_API_KEY")
66
+ if not api_key:
67
+ raise ValueError("AZURE_OPENAI_API_KEY not set")
68
+ endpoint = os.getenv("AZURE_OPENAI_ENDPOINT", "https://azure-ai.hms.edu")
69
+ self.client = self._AzureOpenAI(
70
+ azure_endpoint=endpoint, api_key=api_key, api_version=resolved_version
71
+ )
72
+ self.api_version = resolved_version
73
+
74
+ # Load env overrides for model limits (JSON dict of {prefix: {max_output, context_window}})
75
+ env_limits_raw = os.getenv("AZURE_DEFAULT_MODEL_LIMITS")
76
+ self._default_limits: Dict[str, Dict[str, int]] = (
77
+ self.DEFAULT_MODEL_LIMITS.copy()
78
+ )
79
+ if env_limits_raw:
80
+ try:
81
+ env_limits = _json.loads(env_limits_raw)
82
+ # shallow merge by keys
83
+ for k, v in env_limits.items():
84
+ if isinstance(v, dict):
85
+ base = self._default_limits.get(k, {}).copy()
86
+ base.update(
87
+ {
88
+ kk: int(vv)
89
+ for kk, vv in v.items()
90
+ if isinstance(vv, (int, float, str))
91
+ }
92
+ )
93
+ self._default_limits[k] = base
94
+ except Exception:
95
+ # ignore bad env format
96
+ pass
97
+
98
+ # --------- helpers (Azure specific) ---------
99
+ def _resolve_api_version(self, model_id: str) -> str:
100
+ mapping_raw = os.getenv("AZURE_OPENAI_API_VERSION_BY_MODEL")
101
+ mapping: Dict[str, str] = {}
102
+ if mapping_raw:
103
+ try:
104
+ mapping = _json.loads(mapping_raw)
105
+ except Exception:
106
+ mapping = {}
107
+ if model_id in mapping:
108
+ return mapping[model_id]
109
+ for k, v in mapping.items():
110
+ try:
111
+ if model_id.startswith(k):
112
+ return v
113
+ except Exception:
114
+ continue
115
+ try:
116
+ if model_id.startswith("o3-mini") or model_id.startswith("o4-mini"):
117
+ return "2024-12-01-preview"
118
+ except Exception:
119
+ pass
120
+ return os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
121
+
122
+ def _resolve_default_max_tokens(self, model_id: str) -> Optional[int]:
123
+ # Highest priority: explicit env per-model tokens mapping
124
+ mapping_raw = os.getenv("AZURE_MAX_TOKENS_BY_MODEL")
125
+ mapping: Dict[str, Any] = {}
126
+ if mapping_raw:
127
+ try:
128
+ mapping = _json.loads(mapping_raw)
129
+ except Exception:
130
+ mapping = {}
131
+ if model_id in mapping:
132
+ try:
133
+ return int(mapping[model_id])
134
+ except Exception:
135
+ pass
136
+ for k, v in mapping.items():
137
+ try:
138
+ if model_id.startswith(k):
139
+ return int(v)
140
+ except Exception:
141
+ continue
142
+ # Next: built-in/default-limits map (with env merged)
143
+ if model_id in self._default_limits:
144
+ return int(self._default_limits[model_id].get("max_output", 0)) or None
145
+ for k, v in self._default_limits.items():
146
+ try:
147
+ if model_id.startswith(k):
148
+ return int(v.get("max_output", 0)) or None
149
+ except Exception:
150
+ continue
151
+ return None
152
+
153
+ def _normalize_temperature(
154
+ self, model_id: str, temperature: Optional[float]
155
+ ) -> Optional[float]:
156
+ if isinstance(model_id, str) and (
157
+ model_id.startswith("o3-mini") or model_id.startswith("o4-mini")
158
+ ):
159
+ if temperature is not None:
160
+ self.logger.warning(
161
+ f"Model {model_id} does not support 'temperature'; ignoring provided value."
162
+ )
163
+ return None
164
+ return temperature
165
+
166
+ # --------- public API ---------
167
+ def test_api(self) -> None:
168
+ test_messages = [{"role": "user", "content": "ping"}]
169
+ token_attempts = [1, 4, 16, 32]
170
+ last_error: Optional[Exception] = None
171
+ for tok in token_attempts:
172
+ try:
173
+ try:
174
+ self.client.chat.completions.create(
175
+ model=self.model_name,
176
+ messages=test_messages,
177
+ max_tokens=tok,
178
+ temperature=0,
179
+ )
180
+ return
181
+ except self._openai.BadRequestError: # type: ignore[attr-defined]
182
+ self.client.chat.completions.create(
183
+ model=self.model_name,
184
+ messages=test_messages,
185
+ max_completion_tokens=tok,
186
+ )
187
+ return
188
+ except Exception as e: # noqa: BLE001
189
+ last_error = e
190
+ msg = str(e).lower()
191
+ if (
192
+ "max_tokens" in msg
193
+ or "model output limit" in msg
194
+ or "finish the message" in msg
195
+ ) and tok != token_attempts[-1]:
196
+ continue
197
+ break
198
+ if last_error:
199
+ raise ValueError(f"ChatGPT API test failed: {last_error}")
200
+ raise ValueError("ChatGPT API test failed: unknown error")
201
+
202
+ def infer(
203
+ self,
204
+ messages: List[Dict[str, str]],
205
+ temperature: Optional[float],
206
+ max_tokens: Optional[int],
207
+ return_json: bool,
208
+ custom_format: Any = None,
209
+ max_retries: int = 5,
210
+ retry_delay: int = 5,
211
+ ) -> Optional[str]:
212
+ retries = 0
213
+ call_fn = (
214
+ self.client.chat.completions.parse
215
+ if custom_format is not None
216
+ else self.client.chat.completions.create
217
+ )
218
+ response_format = (
219
+ custom_format
220
+ if custom_format is not None
221
+ else ({"type": "json_object"} if return_json else None)
222
+ )
223
+ eff_temp = self._normalize_temperature(self.model_name, temperature)
224
+ eff_max = (
225
+ max_tokens
226
+ if max_tokens is not None
227
+ else self._resolve_default_max_tokens(self.model_name)
228
+ )
229
+ while retries < max_retries:
230
+ try:
231
+ kwargs: Dict[str, Any] = {
232
+ "model": self.model_name,
233
+ "messages": messages,
234
+ }
235
+ if response_format is not None:
236
+ kwargs["response_format"] = response_format
237
+ if eff_temp is not None:
238
+ kwargs["temperature"] = eff_temp
239
+ try:
240
+ if eff_max is not None:
241
+ resp = call_fn(max_tokens=eff_max, **kwargs)
242
+ else:
243
+ resp = call_fn(**kwargs)
244
+ except self._openai.BadRequestError as be: # type: ignore[attr-defined]
245
+ if eff_max is not None:
246
+ resp = call_fn(max_completion_tokens=eff_max, **kwargs)
247
+ else:
248
+ be_msg = str(be).lower()
249
+ fallback_limits = [
250
+ 8192,
251
+ 4096,
252
+ 2048,
253
+ 1024,
254
+ 512,
255
+ 256,
256
+ 128,
257
+ 64,
258
+ 32,
259
+ ]
260
+ if any(
261
+ k in be_msg
262
+ for k in [
263
+ "max_tokens",
264
+ "output limit",
265
+ "finish the message",
266
+ "max_completion_tokens",
267
+ ]
268
+ ):
269
+ last_exc: Optional[Exception] = be
270
+ for lim in fallback_limits:
271
+ try:
272
+ try:
273
+ resp = call_fn(
274
+ max_completion_tokens=lim, **kwargs
275
+ )
276
+ last_exc = None
277
+ break
278
+ except Exception as inner_e: # noqa: BLE001
279
+ last_exc = inner_e
280
+ resp = call_fn(max_tokens=lim, **kwargs)
281
+ last_exc = None
282
+ break
283
+ except Exception as inner2: # noqa: BLE001
284
+ last_exc = inner2
285
+ continue
286
+ if last_exc is not None:
287
+ raise last_exc
288
+ else:
289
+ raise be
290
+ if custom_format is not None:
291
+ return resp.choices[0].message.parsed.model_dump()
292
+ return resp.choices[0].message.content
293
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
294
+ self.logger.warning(
295
+ f"Rate limit exceeded. Retrying in {retry_delay} seconds..."
296
+ )
297
+ retries += 1
298
+ time.sleep(retry_delay * retries)
299
+ except Exception as e: # noqa: BLE001
300
+ self.logger.error(f"An error occurred: {e}")
301
+ import traceback
302
+
303
+ traceback.print_exc()
304
+ break
305
+ self.logger.error("Max retries exceeded. Unable to complete the request.")
306
+ return None
307
+
308
+
309
+ class GeminiClient(BaseLLMClient):
310
+ def __init__(self, model_name: str, logger):
311
+ try:
312
+ import google.generativeai as genai # type: ignore
313
+ except Exception as e: # pragma: no cover
314
+ raise RuntimeError("google.generativeai not available") from e
315
+ api_key = os.getenv("GEMINI_API_KEY")
316
+ if not api_key:
317
+ raise ValueError("GEMINI_API_KEY not found")
318
+ self._genai = genai
319
+ self._genai.configure(api_key=api_key)
320
+ self.model_name = model_name
321
+ self.logger = logger
322
+
323
+ def _build_model(self):
324
+ return self._genai.GenerativeModel(self.model_name)
325
+
326
+ def test_api(self) -> None:
327
+ model = self._build_model()
328
+ model.generate_content(
329
+ "ping",
330
+ generation_config={
331
+ "max_output_tokens": 8,
332
+ "temperature": 0,
333
+ },
334
+ )
335
+
336
+ def infer(
337
+ self,
338
+ messages: List[Dict[str, str]],
339
+ temperature: Optional[float],
340
+ max_tokens: Optional[int],
341
+ return_json: bool,
342
+ custom_format: Any = None,
343
+ max_retries: int = 5,
344
+ retry_delay: int = 5,
345
+ ) -> Optional[str]:
346
+ if return_json:
347
+ raise ValueError("Gemini JSON mode not supported here")
348
+ contents = ""
349
+ for m in messages:
350
+ if m["role"] in ("user", "system"):
351
+ contents += f"{m['content']}\n"
352
+ retries = 0
353
+ while retries < max_retries:
354
+ try:
355
+ gen_cfg: Dict[str, Any] = {
356
+ "temperature": (temperature if temperature is not None else 0)
357
+ }
358
+ if max_tokens is not None:
359
+ gen_cfg["max_output_tokens"] = max_tokens
360
+ model = self._build_model()
361
+ resp = model.generate_content(contents, generation_config=gen_cfg)
362
+ return getattr(resp, "text", None) or getattr(resp, "candidates", [{}])[
363
+ 0
364
+ ].get("content")
365
+ except Exception as e: # noqa: BLE001
366
+ self.logger.error(f"Gemini error: {e}")
367
+ retries += 1
368
+ time.sleep(retry_delay * retries)
369
+ return None
@@ -226,6 +226,9 @@ class SummarizationHook(OutputHook):
226
226
  self.chunk_size = hook_config.get("chunk_size", 2000)
227
227
  self.focus_areas = hook_config.get("focus_areas", "key_findings_and_results")
228
228
  self.max_summary_length = hook_config.get("max_summary_length", 3000)
229
+ # Optional timeout to prevent hangs in composer / LLM calls
230
+ # If the composer does not return within this window, we gracefully fall back
231
+ self.composer_timeout_sec = hook_config.get("composer_timeout_sec", 20)
229
232
 
230
233
  def process(
231
234
  self,
@@ -252,6 +255,19 @@ class SummarizationHook(OutputHook):
252
255
  Any: The summarized output, or original output if summarization fails
253
256
  """
254
257
  try:
258
+ # Debug: basic context
259
+ try:
260
+ _len = len(str(result))
261
+ except Exception:
262
+ _len = -1
263
+ import sys as _sys
264
+
265
+ print(
266
+ f"[SummarizationHook] process: tool={tool_name}, result_len={_len}, "
267
+ f"chunk_size={self.chunk_size}, max_summary_length={self.max_summary_length}",
268
+ file=_sys.stderr,
269
+ flush=True,
270
+ )
255
271
  # Check if the required tools are available
256
272
  if (
257
273
  self.composer_tool_name not in self.tooluniverse.callable_functions
@@ -277,9 +293,49 @@ class SummarizationHook(OutputHook):
277
293
  }
278
294
 
279
295
  # Call Compose Summarizer Tool through ToolUniverse
280
- composer_result = self.tooluniverse.run_one_function(
281
- {"name": self.composer_tool_name, "arguments": composer_args}
296
+ print(
297
+ f"[SummarizationHook] calling composer tool: {self.composer_tool_name} (timeout={self.composer_timeout_sec}s)",
298
+ file=_sys.stderr,
299
+ flush=True,
282
300
  )
301
+ # Run composer with timeout to avoid hangs
302
+ try:
303
+ from concurrent.futures import (
304
+ ThreadPoolExecutor,
305
+ )
306
+
307
+ def _call_composer():
308
+ return self.tooluniverse.run_one_function(
309
+ {"name": self.composer_tool_name, "arguments": composer_args}
310
+ )
311
+
312
+ with ThreadPoolExecutor(max_workers=1) as _pool:
313
+ _future = _pool.submit(_call_composer)
314
+ composer_result = _future.result(timeout=self.composer_timeout_sec)
315
+ except Exception as _e_timeout:
316
+ # Timeout or execution error; log and fall back to original output
317
+ print(
318
+ f"[SummarizationHook] composer execution failed/timeout: {_e_timeout}",
319
+ file=_sys.stderr,
320
+ flush=True,
321
+ )
322
+ return result
323
+ # Debug: show composer result meta
324
+ try:
325
+ if isinstance(composer_result, dict):
326
+ success = composer_result.get("success", False)
327
+ summary_len = len(composer_result.get("summary", ""))
328
+ print(
329
+ f"[SummarizationHook] composer_result: success={success} summary_len={summary_len}",
330
+ file=_sys.stderr,
331
+ flush=True,
332
+ )
333
+ except Exception as _e_dbg:
334
+ print(
335
+ f"[SummarizationHook] debug error inspecting composer_result: {_e_dbg}",
336
+ file=_sys.stderr,
337
+ flush=True,
338
+ )
283
339
 
284
340
  # Process Compose Tool result
285
341
  if isinstance(composer_result, dict) and composer_result.get("success"):
@@ -294,7 +350,13 @@ class SummarizationHook(OutputHook):
294
350
 
295
351
  except Exception as e:
296
352
  error_msg = str(e)
297
- print(f"Error in summarization hook: {error_msg}")
353
+ import sys as _sys
354
+
355
+ print(
356
+ f"Error in summarization hook: {error_msg}",
357
+ file=_sys.stderr,
358
+ flush=True,
359
+ )
298
360
 
299
361
  # Check if the error is due to missing tools
300
362
  if "not found" in error_msg.lower() or "ToolOutputSummarizer" in error_msg:
@@ -365,6 +427,16 @@ class HookManager:
365
427
  self.config_path = config.get("config_path", "template/hook_config.json")
366
428
  self._pending_tools_to_load: List[str] = []
367
429
  self._load_hook_config()
430
+
431
+ # Validate LLM API keys before loading hooks
432
+ if not self._validate_llm_api_keys():
433
+ print("⚠️ Warning: LLM API keys not available. Hooks will be disabled.")
434
+ print(
435
+ " To enable hooks, please set AZURE_OPENAI_API_KEY environment variable."
436
+ )
437
+ self.enabled = False
438
+ return
439
+
368
440
  self._load_hooks()
369
441
 
370
442
  def apply_hooks(
@@ -415,6 +487,23 @@ class HookManager:
415
487
 
416
488
  return result
417
489
 
490
+ def _validate_llm_api_keys(self) -> bool:
491
+ """
492
+ Validate that LLM API keys are available for hook tools.
493
+
494
+ Returns:
495
+ bool: True if API keys are available, False otherwise
496
+ """
497
+ from .agentic_tool import AgenticTool
498
+
499
+ if AgenticTool.has_any_api_keys():
500
+ print("✅ LLM API keys validated successfully")
501
+ return True
502
+ else:
503
+ print("❌ LLM API key validation failed: No API keys available")
504
+ print(" To enable hooks, please set API key environment variables.")
505
+ return False
506
+
418
507
  def enable_hook(self, hook_name: str):
419
508
  """
420
509
  Enable a specific hook by name.
@@ -0,0 +1,194 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to filter tool files by removing tools that don't exist in the current tool universe.
4
+
5
+ This script:
6
+ 1. Gets all valid tool names from ToolUniverse using scan_all=True
7
+ 2. Filters tool_relationship_graph_FINAL.json to keep only valid tools
8
+ 3. Filters v4_all_tools_final.json to keep only valid tools
9
+ 4. Preserves all other data structure and content
10
+ """
11
+
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Import after modifying sys.path
16
+ from tooluniverse import ToolUniverse
17
+
18
+
19
+ def load_json_file(file_path):
20
+ """Load JSON file and return the data."""
21
+ try:
22
+ with open(file_path, "r", encoding="utf-8") as f:
23
+ return json.load(f)
24
+ except Exception as e:
25
+ print(f"Error loading {file_path}: {e}")
26
+ return None
27
+
28
+
29
+ def save_json_file(file_path, data):
30
+ """Save data to JSON file."""
31
+ try:
32
+ with open(file_path, "w", encoding="utf-8") as f:
33
+ json.dump(data, f, ensure_ascii=False, indent=2)
34
+ print(f"Successfully saved filtered data to {file_path}")
35
+ return True
36
+ except Exception as e:
37
+ print(f"Error saving {file_path}: {e}")
38
+ return False
39
+
40
+
41
+ def filter_tool_relationship_graph(data, valid_tool_names):
42
+ """
43
+ Filter tool_relationship_graph_FINAL.json to keep only valid tools.
44
+
45
+ Args:
46
+ data: The loaded JSON data
47
+ valid_tool_names: Set of valid tool names
48
+
49
+ Returns:
50
+ Filtered data
51
+ """
52
+ if not isinstance(data, dict):
53
+ print("Warning: tool_relationship_graph data is not a dict")
54
+ return data
55
+
56
+ filtered_data = {}
57
+
58
+ # Handle nodes array
59
+ if "nodes" in data and isinstance(data["nodes"], list):
60
+ filtered_nodes = []
61
+ for node in data["nodes"]:
62
+ if isinstance(node, dict) and "name" in node:
63
+ if node["name"] in valid_tool_names:
64
+ filtered_nodes.append(node)
65
+ else:
66
+ print(f"Removing node from relationship graph: {node['name']}")
67
+ else:
68
+ # Keep non-tool nodes (if any)
69
+ filtered_nodes.append(node)
70
+ filtered_data["nodes"] = filtered_nodes
71
+ print(
72
+ f"Nodes: {len(data['nodes'])} -> {len(filtered_nodes)} ({len(data['nodes']) - len(filtered_nodes)} removed)"
73
+ )
74
+
75
+ # Handle edges array
76
+ if "edges" in data and isinstance(data["edges"], list):
77
+ filtered_edges = []
78
+ for edge in data["edges"]:
79
+ if isinstance(edge, dict) and "source" in edge and "target" in edge:
80
+ # Keep edge if both source and target are valid tools
81
+ if (
82
+ edge["source"] in valid_tool_names
83
+ and edge["target"] in valid_tool_names
84
+ ):
85
+ filtered_edges.append(edge)
86
+ else:
87
+ print(
88
+ f"Removing edge from relationship graph: {edge.get('source', 'unknown')} -> {edge.get('target', 'unknown')}"
89
+ )
90
+ else:
91
+ # Keep non-tool edges (if any)
92
+ filtered_edges.append(edge)
93
+ filtered_data["edges"] = filtered_edges
94
+ print(
95
+ f"Edges: {len(data['edges'])} -> {len(filtered_edges)} ({len(data['edges']) - len(filtered_edges)} removed)"
96
+ )
97
+
98
+ # Keep other fields as-is (like stats, metadata, etc.)
99
+ for key, value in data.items():
100
+ if key not in ["nodes", "edges"]:
101
+ filtered_data[key] = value
102
+
103
+ return filtered_data
104
+
105
+
106
+ def filter_v4_all_tools(data, valid_tool_names):
107
+ """
108
+ Filter v4_all_tools_final.json to keep only valid tools.
109
+
110
+ Args:
111
+ data: The loaded JSON data
112
+ valid_tool_names: Set of valid tool names
113
+
114
+ Returns:
115
+ Filtered data
116
+ """
117
+ if not isinstance(data, list):
118
+ print("Warning: v4_all_tools data is not a list")
119
+ return data
120
+
121
+ filtered_data = []
122
+
123
+ for tool in data:
124
+ if isinstance(tool, dict) and "name" in tool:
125
+ if tool["name"] in valid_tool_names:
126
+ filtered_data.append(tool)
127
+ else:
128
+ print(f"Removing tool from v4_all_tools: {tool['name']}")
129
+ else:
130
+ # Keep non-tool entries (if any)
131
+ filtered_data.append(tool)
132
+
133
+ return filtered_data
134
+
135
+
136
+ def main():
137
+ """Main function to filter the tool files."""
138
+ print("Starting tool file filtering process...")
139
+
140
+ # Initialize ToolUniverse and get all valid tool names
141
+ print("Getting all valid tool names from ToolUniverse...")
142
+ tu = ToolUniverse()
143
+ all_tool_names = tu.list_built_in_tools(mode="list_name", scan_all=True)
144
+ valid_tool_names = set(all_tool_names)
145
+ print(f"Found {len(valid_tool_names)} valid tools")
146
+
147
+ # Define file paths
148
+ project_root = Path(__file__).parent.parent.parent.parent
149
+ web_dir = project_root / "web"
150
+
151
+ relationship_graph_file = web_dir / "tool_relationship_graph_FINAL.json"
152
+ v4_tools_file = web_dir / "v4_all_tools_final.json"
153
+
154
+ # Check if files exist
155
+ if not relationship_graph_file.exists():
156
+ print(f"Error: {relationship_graph_file} not found")
157
+ return
158
+
159
+ if not v4_tools_file.exists():
160
+ print(f"Error: {v4_tools_file} not found")
161
+ return
162
+
163
+ # Process tool_relationship_graph_FINAL.json
164
+ print(f"\nProcessing {relationship_graph_file.name}...")
165
+ relationship_data = load_json_file(relationship_graph_file)
166
+ if relationship_data is not None:
167
+ len(relationship_data.get("nodes", []))
168
+ len(relationship_data.get("edges", []))
169
+ filtered_relationship_data = filter_tool_relationship_graph(
170
+ relationship_data, valid_tool_names
171
+ )
172
+
173
+ # Save filtered data
174
+ save_json_file(relationship_graph_file, filtered_relationship_data)
175
+
176
+ # Process v4_all_tools_final.json
177
+ print(f"\nProcessing {v4_tools_file.name}...")
178
+ v4_data = load_json_file(v4_tools_file)
179
+ if v4_data is not None:
180
+ original_count = len(v4_data)
181
+ filtered_v4_data = filter_v4_all_tools(v4_data, valid_tool_names)
182
+ filtered_count = len(filtered_v4_data)
183
+ print(
184
+ f"V4 tools: {original_count} -> {filtered_count} tools ({original_count - filtered_count} removed)"
185
+ )
186
+
187
+ # Save filtered data
188
+ save_json_file(v4_tools_file, filtered_v4_data)
189
+
190
+ print("\nTool file filtering completed!")
191
+
192
+
193
+ if __name__ == "__main__":
194
+ main()