tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (190) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +1 -1
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +1 -1
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  183. tooluniverse/generate_mcp_tools.py +0 -113
  184. tooluniverse/mcp_server.py +0 -3340
  185. tooluniverse-0.2.0.dist-info/METADATA +0 -139
  186. tooluniverse-0.2.0.dist-info/RECORD +0 -21
  187. tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
  188. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
  189. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  190. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,693 @@
1
+ """
2
+ Keyword-based Tool Finder - An advanced keyword search tool for finding relevant tools.
3
+
4
+ This tool provides sophisticated keyword matching functionality using natural language
5
+ processing techniques including tokenization, stop word removal, stemming, and TF-IDF
6
+ scoring for improved relevance ranking. It serves as a robust search method when
7
+ AI-powered search methods are unavailable.
8
+ """
9
+
10
+ import json
11
+ import re
12
+ import math
13
+ from collections import Counter, defaultdict
14
+ from typing import Dict, List
15
+ from .base_tool import BaseTool
16
+ from .tool_registry import register_tool
17
+
18
+
19
+ @register_tool("ToolFinderKeyword")
20
+ class ToolFinderKeyword(BaseTool):
21
+ """
22
+ Advanced keyword-based tool finder that uses sophisticated text processing and TF-IDF scoring.
23
+
24
+ This class implements natural language processing techniques for tool discovery including:
25
+ - Tokenization and normalization
26
+ - Stop word removal
27
+ - Basic stemming
28
+ - TF-IDF relevance scoring
29
+ - Semantic phrase matching
30
+
31
+ The search operates by parsing user queries to extract key terms, processing them through
32
+ NLP pipelines, and matching against pre-built indices of tool metadata for efficient
33
+ and relevant tool discovery.
34
+ """
35
+
36
+ # Common English stop words to filter out
37
+ STOP_WORDS = {
38
+ "a",
39
+ "an",
40
+ "and",
41
+ "are",
42
+ "as",
43
+ "at",
44
+ "be",
45
+ "by",
46
+ "for",
47
+ "from",
48
+ "has",
49
+ "he",
50
+ "in",
51
+ "is",
52
+ "it",
53
+ "its",
54
+ "of",
55
+ "on",
56
+ "that",
57
+ "to",
58
+ "was",
59
+ "will",
60
+ "with",
61
+ "the",
62
+ "this",
63
+ "but",
64
+ "they",
65
+ "have",
66
+ "had",
67
+ "what",
68
+ "said",
69
+ "each",
70
+ "which",
71
+ "their",
72
+ "time",
73
+ "up",
74
+ "use",
75
+ "your",
76
+ "how",
77
+ "all",
78
+ "any",
79
+ "can",
80
+ "do",
81
+ "get",
82
+ "if",
83
+ "may",
84
+ "new",
85
+ "now",
86
+ "old",
87
+ "see",
88
+ "two",
89
+ "way",
90
+ "who",
91
+ "boy",
92
+ "did",
93
+ "number",
94
+ "no",
95
+ "find",
96
+ "long",
97
+ "down",
98
+ "day",
99
+ "came",
100
+ "made",
101
+ "part",
102
+ }
103
+
104
+ # Simple stemming rules for common suffixes
105
+ STEMMING_RULES = [
106
+ ("ies", "y"),
107
+ ("ied", "y"),
108
+ ("ying", "y"),
109
+ ("ing", ""),
110
+ ("ly", ""),
111
+ ("ed", ""),
112
+ ("ies", "y"),
113
+ ("ier", "y"),
114
+ ("iest", "y"),
115
+ ("s", ""),
116
+ ("es", ""),
117
+ ("er", ""),
118
+ ("est", ""),
119
+ ("tion", "t"),
120
+ ("sion", "s"),
121
+ ("ness", ""),
122
+ ("ment", ""),
123
+ ("able", ""),
124
+ ("ible", ""),
125
+ ("ful", ""),
126
+ ("less", ""),
127
+ ("ous", ""),
128
+ ("ive", ""),
129
+ ("al", ""),
130
+ ("ic", ""),
131
+ ("ize", ""),
132
+ ("ise", ""),
133
+ ("ate", ""),
134
+ ("fy", ""),
135
+ ("ify", ""),
136
+ ]
137
+
138
+ def __init__(self, tool_config, tooluniverse=None):
139
+ """
140
+ Initialize the Advanced Keyword-based Tool Finder.
141
+
142
+ Args:
143
+ tool_config (dict): Configuration dictionary for the tool
144
+ tooluniverse: Reference to the ToolUniverse instance containing all tools
145
+ """
146
+ super().__init__(tool_config)
147
+ self.tooluniverse = tooluniverse
148
+
149
+ # Extract configuration
150
+ self.name = tool_config.get("name", "ToolFinderKeyword")
151
+ self.description = tool_config.get(
152
+ "description", "Advanced keyword-based tool finder"
153
+ )
154
+
155
+ # Tool filtering settings
156
+ self.exclude_tools = tool_config.get(
157
+ "exclude_tools",
158
+ tool_config.get("configs", {}).get(
159
+ "exclude_tools",
160
+ [
161
+ "Tool_RAG",
162
+ "Tool_Finder",
163
+ "Finish",
164
+ "CallAgent",
165
+ "ToolFinderLLM",
166
+ "ToolFinderKeyword",
167
+ ],
168
+ ),
169
+ )
170
+ self.include_categories = tool_config.get("include_categories", None)
171
+ self.exclude_categories = tool_config.get("exclude_categories", None)
172
+
173
+ # Initialize tool index for TF-IDF scoring
174
+ self._tool_index = None
175
+ self._document_frequencies = None
176
+ self._total_documents = 0
177
+
178
+ def _tokenize_and_normalize(self, text: str) -> List[str]:
179
+ """
180
+ Tokenize text and apply normalization including stop word removal and stemming.
181
+
182
+ Args:
183
+ text (str): Input text to tokenize
184
+
185
+ Returns:
186
+ List[str]: List of processed tokens
187
+ """
188
+ if not text:
189
+ return []
190
+
191
+ # Convert to lowercase and extract words (alphanumeric sequences)
192
+ tokens = re.findall(r"\b[a-zA-Z][a-zA-Z0-9]*\b", text.lower())
193
+
194
+ # Remove stop words
195
+ tokens = [token for token in tokens if token not in self.STOP_WORDS]
196
+
197
+ # Apply basic stemming
198
+ stemmed_tokens = []
199
+ for token in tokens:
200
+ stemmed = self._apply_stemming(token)
201
+ if len(stemmed) > 2: # Keep only meaningful terms
202
+ stemmed_tokens.append(stemmed)
203
+
204
+ return stemmed_tokens
205
+
206
+ def _apply_stemming(self, word: str) -> str:
207
+ """
208
+ Apply basic stemming rules to reduce words to their root form.
209
+
210
+ Args:
211
+ word (str): Word to stem
212
+
213
+ Returns:
214
+ str: Stemmed word
215
+ """
216
+ if len(word) <= 3:
217
+ return word
218
+
219
+ for suffix, replacement in self.STEMMING_RULES:
220
+ if word.endswith(suffix) and len(word) > len(suffix) + 2:
221
+ return word[: -len(suffix)] + replacement
222
+
223
+ return word
224
+
225
+ def _extract_phrases(
226
+ self, tokens: List[str], max_phrase_length: int = 3
227
+ ) -> List[str]:
228
+ """
229
+ Extract meaningful phrases from tokens for better semantic matching.
230
+
231
+ Args:
232
+ tokens (List[str]): Tokenized words
233
+ max_phrase_length (int): Maximum length of phrases to extract
234
+
235
+ Returns:
236
+ List[str]: List of phrases and individual tokens
237
+ """
238
+ phrases = []
239
+
240
+ # Add individual tokens
241
+ phrases.extend(tokens)
242
+
243
+ # Add bigrams and trigrams
244
+ for length in range(2, min(max_phrase_length + 1, len(tokens) + 1)):
245
+ for i in range(len(tokens) - length + 1):
246
+ phrase = " ".join(tokens[i : i + length])
247
+ phrases.append(phrase)
248
+
249
+ return phrases
250
+
251
+ def _build_tool_index(self, tools: List[Dict]) -> None:
252
+ """
253
+ Build TF-IDF index for all tools to enable efficient relevance scoring.
254
+
255
+ Args:
256
+ tools (List[Dict]): List of tool configurations
257
+ """
258
+ self._tool_index = {}
259
+ term_doc_count = defaultdict(int)
260
+ self._total_documents = 0
261
+
262
+ for tool in tools:
263
+ tool_name = tool.get("name", "")
264
+ if tool_name in self.exclude_tools:
265
+ continue
266
+
267
+ # Combine tool metadata for indexing
268
+ searchable_text = " ".join(
269
+ [
270
+ tool.get("name", ""),
271
+ tool.get("description", ""),
272
+ tool.get("type", ""),
273
+ tool.get("category", ""),
274
+ # Include parameter names and descriptions
275
+ " ".join(self._extract_parameter_text(tool.get("parameter", {}))),
276
+ ]
277
+ )
278
+
279
+ # Tokenize and extract phrases
280
+ tokens = self._tokenize_and_normalize(searchable_text)
281
+ phrases = self._extract_phrases(tokens)
282
+
283
+ # Build term frequency map for this tool
284
+ term_freq = Counter(phrases)
285
+ self._tool_index[tool_name] = {
286
+ "tool": tool,
287
+ "terms": term_freq,
288
+ "total_terms": len(phrases),
289
+ }
290
+
291
+ # Count document frequency for each term
292
+ unique_terms = set(phrases)
293
+ for term in unique_terms:
294
+ term_doc_count[term] += 1
295
+
296
+ self._total_documents += 1
297
+
298
+ # Calculate document frequencies
299
+ self._document_frequencies = dict(term_doc_count)
300
+
301
+ def _extract_parameter_text(self, parameter_schema: Dict) -> List[str]:
302
+ """
303
+ Extract searchable text from parameter schema.
304
+
305
+ Args:
306
+ parameter_schema (Dict): Tool parameter schema
307
+
308
+ Returns:
309
+ List[str]: List of text elements from parameters
310
+ """
311
+ text_elements = []
312
+
313
+ if isinstance(parameter_schema, dict):
314
+ properties = parameter_schema.get("properties", {})
315
+ for prop_name, prop_info in properties.items():
316
+ text_elements.append(prop_name)
317
+ if isinstance(prop_info, dict):
318
+ desc = prop_info.get("description", "")
319
+ if desc:
320
+ text_elements.append(desc)
321
+
322
+ return text_elements
323
+
324
+ def _calculate_tfidf_score(self, query_terms: List[str], tool_name: str) -> float:
325
+ """
326
+ Calculate TF-IDF relevance score for a tool given query terms.
327
+
328
+ Args:
329
+ query_terms (List[str]): Processed query terms and phrases
330
+ tool_name (str): Name of the tool to score
331
+
332
+ Returns:
333
+ float: TF-IDF relevance score
334
+ """
335
+ if tool_name not in self._tool_index:
336
+ return 0.0
337
+
338
+ tool_data = self._tool_index[tool_name]
339
+ tool_terms = tool_data["terms"]
340
+ total_terms = tool_data["total_terms"]
341
+
342
+ score = 0.0
343
+ query_term_freq = Counter(query_terms)
344
+
345
+ for term, query_freq in query_term_freq.items():
346
+ if term in tool_terms:
347
+ # Term Frequency (TF): frequency of term in tool / total terms in tool
348
+ tf = tool_terms[term] / total_terms
349
+
350
+ # Inverse Document Frequency (IDF): log(total docs / docs containing term)
351
+ doc_freq = self._document_frequencies.get(term, 1)
352
+ idf = math.log(self._total_documents / doc_freq)
353
+
354
+ # TF-IDF score with query term frequency weighting
355
+ score += tf * idf * math.log(1 + query_freq)
356
+
357
+ return score
358
+
359
+ def _calculate_exact_match_bonus(self, query: str, tool: Dict) -> float:
360
+ """
361
+ Calculate bonus score for exact matches in tool name or key phrases.
362
+
363
+ Args:
364
+ query (str): Original query string
365
+ tool (Dict): Tool configuration
366
+
367
+ Returns:
368
+ float: Exact match bonus score
369
+ """
370
+ query_lower = query.lower()
371
+ tool_name = tool.get("name", "").lower()
372
+ tool_desc = tool.get("description", "").lower()
373
+
374
+ bonus = 0.0
375
+
376
+ # Exact tool name match
377
+ if query_lower in tool_name or tool_name in query_lower:
378
+ bonus += 2.0
379
+
380
+ # Exact phrase matches in description
381
+ query_words = query_lower.split()
382
+ if len(query_words) > 1:
383
+ query_phrase = " ".join(query_words)
384
+ if query_phrase in tool_desc:
385
+ bonus += 1.5
386
+
387
+ # Category or type exact matches
388
+ tool_type = tool.get("type", "").lower()
389
+ tool_category = tool.get("category", "").lower()
390
+
391
+ if query_lower in tool_type or query_lower in tool_category:
392
+ bonus += 1.0
393
+
394
+ return bonus
395
+
396
+ def find_tools(
397
+ self,
398
+ message=None,
399
+ picked_tool_names=None,
400
+ rag_num=5,
401
+ return_call_result=False,
402
+ categories=None,
403
+ ):
404
+ """
405
+ Find relevant tools based on a message or pre-selected tool names.
406
+
407
+ This method matches the interface of other tool finders to ensure
408
+ seamless replacement. It uses keyword-based search instead of embedding similarity.
409
+
410
+ Args:
411
+ message (str, optional): Query message to find tools for. Required if picked_tool_names is None.
412
+ picked_tool_names (list, optional): Pre-selected tool names to process. Required if message is None.
413
+ rag_num (int, optional): Number of tools to return after filtering. Defaults to 5.
414
+ return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False.
415
+ categories (list, optional): List of tool categories to filter by.
416
+
417
+ Returns:
418
+ str or tuple:
419
+ - If return_call_result is False: Tool prompts as a formatted string
420
+ - If return_call_result is True: Tuple of (tool_prompts, tool_names)
421
+
422
+ Raises:
423
+ AssertionError: If both message and picked_tool_names are None
424
+ """
425
+ if picked_tool_names is None:
426
+ assert picked_tool_names is not None or message is not None
427
+
428
+ # Use keyword-based tool search (directly call JSON search to avoid recursion)
429
+ search_result = self._run_json_search(
430
+ {"description": message, "categories": categories, "limit": rag_num}
431
+ )
432
+
433
+ # Parse JSON result to extract tool names
434
+ try:
435
+ result_data = json.loads(search_result)
436
+ if result_data.get("error"):
437
+ picked_tool_names = []
438
+ else:
439
+ picked_tool_names = [
440
+ tool["name"] for tool in result_data.get("tools", [])
441
+ ]
442
+ except json.JSONDecodeError:
443
+ picked_tool_names = []
444
+
445
+ # Filter out special tools (matching original behavior)
446
+ picked_tool_names_no_special = []
447
+ for tool in picked_tool_names:
448
+ if tool not in self.exclude_tools:
449
+ picked_tool_names_no_special.append(tool)
450
+ picked_tool_names_no_special = picked_tool_names_no_special[:rag_num]
451
+ picked_tool_names = picked_tool_names_no_special[:rag_num]
452
+
453
+ # Get tool objects and prepare prompts (matching original behavior)
454
+ picked_tools = self.tooluniverse.get_tool_by_name(picked_tool_names)
455
+ picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
456
+
457
+ if return_call_result:
458
+ return picked_tools_prompt, picked_tool_names
459
+ return picked_tools_prompt
460
+
461
+ def run(self, arguments):
462
+ """
463
+ Find tools using advanced keyword-based search with NLP processing and TF-IDF scoring.
464
+
465
+ This method provides a unified interface compatible with other tool finders.
466
+
467
+ Args:
468
+ arguments (dict): Dictionary containing:
469
+ - description (str): Search query string (unified parameter name)
470
+ - categories (list, optional): List of categories to filter by
471
+ - limit (int, optional): Maximum number of results to return (default: 10)
472
+ - picked_tool_names (list, optional): Pre-selected tool names to process
473
+ - return_call_result (bool, optional): Whether to return both prompts and names. Defaults to False.
474
+
475
+ Returns:
476
+ str or tuple:
477
+ - If return_call_result is False: Tool prompts as a formatted string
478
+ - If return_call_result is True: Tuple of (tool_prompts, tool_names)
479
+ """
480
+ # Extract parameters for compatibility
481
+ description = arguments.get("description", arguments.get("query", ""))
482
+ limit = arguments.get("limit", 10)
483
+ return_call_result = arguments.get("return_call_result", False)
484
+ categories = arguments.get("categories", None)
485
+ picked_tool_names = arguments.get("picked_tool_names", None)
486
+
487
+ # If we have a unified interface call, delegate to find_tools method
488
+ if return_call_result is not None:
489
+ return self.find_tools(
490
+ message=description,
491
+ picked_tool_names=picked_tool_names,
492
+ rag_num=limit,
493
+ return_call_result=return_call_result,
494
+ categories=categories,
495
+ )
496
+
497
+ # Otherwise use original JSON-based interface for backward compatibility
498
+ return self._run_json_search(arguments)
499
+
500
+ def _run_json_search(self, arguments):
501
+ """
502
+ Original JSON-based search implementation for backward compatibility.
503
+
504
+ Args:
505
+ arguments (dict): Search arguments
506
+
507
+ Returns:
508
+ str: JSON string containing search results with relevance scores
509
+ """
510
+ try:
511
+ # Extract arguments with unified parameter names
512
+ query = arguments.get(
513
+ "description", arguments.get("query", "")
514
+ ) # Support both names for compatibility
515
+ categories = arguments.get("categories", None)
516
+ limit = arguments.get("limit", 10)
517
+
518
+ if not query:
519
+ return json.dumps(
520
+ {
521
+ "error": "Description parameter is required",
522
+ "query": query,
523
+ "tools": [],
524
+ },
525
+ indent=2,
526
+ )
527
+
528
+ # Ensure categories is None or a list (handle validation issue)
529
+ if categories is not None and not isinstance(categories, list):
530
+ categories = None
531
+
532
+ # Get all tools from tooluniverse
533
+ if not self.tooluniverse:
534
+ return json.dumps(
535
+ {
536
+ "error": "ToolUniverse not available",
537
+ "query": query,
538
+ "tools": [],
539
+ },
540
+ indent=2,
541
+ )
542
+
543
+ all_tools = self.tooluniverse.return_all_loaded_tools()
544
+
545
+ # Filter by categories if specified
546
+ if categories:
547
+ filtered_tools = self.tooluniverse.select_tools(
548
+ include_categories=categories
549
+ )
550
+ else:
551
+ filtered_tools = all_tools
552
+
553
+ # Build search index if not already built or if tools changed
554
+ if self._tool_index is None or self._total_documents != len(
555
+ [
556
+ t
557
+ for t in filtered_tools
558
+ if t.get("name", "") not in self.exclude_tools
559
+ ]
560
+ ):
561
+ self._build_tool_index(filtered_tools)
562
+
563
+ # Process query using NLP techniques
564
+ query_tokens = self._tokenize_and_normalize(query)
565
+ query_phrases = self._extract_phrases(query_tokens)
566
+
567
+ if not query_tokens and not query_phrases:
568
+ return json.dumps(
569
+ {
570
+ "error": "No meaningful search terms found in query",
571
+ "query": query,
572
+ "tools": [],
573
+ },
574
+ indent=2,
575
+ )
576
+
577
+ # Calculate relevance scores for all tools
578
+ tool_scores = []
579
+
580
+ for tool in filtered_tools:
581
+ tool_name = tool.get("name", "")
582
+
583
+ # Skip excluded tools
584
+ if tool_name in self.exclude_tools:
585
+ continue
586
+
587
+ # Apply category filters if specified
588
+ tool_category = tool.get("category", "unknown")
589
+ if (
590
+ self.include_categories
591
+ and tool_category not in self.include_categories
592
+ ):
593
+ continue
594
+ if self.exclude_categories and tool_category in self.exclude_categories:
595
+ continue
596
+
597
+ # Calculate TF-IDF score
598
+ tfidf_score = self._calculate_tfidf_score(query_phrases, tool_name)
599
+
600
+ # Calculate exact match bonus
601
+ exact_bonus = self._calculate_exact_match_bonus(query, tool)
602
+
603
+ # Combined relevance score
604
+ total_score = tfidf_score + exact_bonus
605
+
606
+ # Only include tools with positive relevance
607
+ if total_score > 0:
608
+ tool_info = {
609
+ "name": tool_name,
610
+ "description": tool.get("description", ""),
611
+ "type": tool.get("type", ""),
612
+ "category": tool_category,
613
+ "parameters": tool.get("parameter", {}),
614
+ "required": tool.get("required", []),
615
+ "relevance_score": round(total_score, 4),
616
+ "tfidf_score": round(tfidf_score, 4),
617
+ "exact_match_bonus": round(exact_bonus, 4),
618
+ }
619
+ tool_scores.append(tool_info)
620
+
621
+ # Sort by relevance score (highest first) and limit results
622
+ tool_scores.sort(key=lambda x: x["relevance_score"], reverse=True)
623
+ matching_tools = tool_scores[:limit]
624
+
625
+ # Remove internal scoring details from final output
626
+ for tool in matching_tools:
627
+ tool.pop("tfidf_score", None)
628
+ tool.pop("exact_match_bonus", None)
629
+
630
+ return json.dumps(
631
+ {
632
+ "query": query,
633
+ "search_method": "Advanced keyword matching (TF-IDF + NLP)",
634
+ "total_matches": len(matching_tools),
635
+ "categories_filtered": categories,
636
+ "processing_info": {
637
+ "query_tokens": len(query_tokens),
638
+ "query_phrases": len(query_phrases),
639
+ "indexed_tools": self._total_documents,
640
+ },
641
+ "tools": matching_tools,
642
+ },
643
+ indent=2,
644
+ )
645
+
646
+ except Exception as e:
647
+ return json.dumps(
648
+ {
649
+ "error": f"Advanced keyword search error: {str(e)}",
650
+ "query": arguments.get("query", ""),
651
+ "tools": [],
652
+ },
653
+ indent=2,
654
+ )
655
+
656
+
657
+ # # Tool configuration for ToolUniverse registration
658
+ # TOOL_CONFIG = {
659
+ # "name": "ToolFinderKeyword",
660
+ # "description": "Advanced keyword-based tool finder using NLP techniques, TF-IDF scoring, and semantic phrase matching for precise tool discovery",
661
+ # "type": "tool_finder_keyword",
662
+ # "category": "tool_finder",
663
+ # "parameter": {
664
+ # "type": "object",
665
+ # "properties": {
666
+ # "query": {
667
+ # "type": "string",
668
+ # "description": "Search query describing the desired functionality. Uses advanced NLP processing including tokenization, stop word removal, and stemming."
669
+ # },
670
+ # "categories": {
671
+ # "type": "array",
672
+ # "items": {"type": "string"},
673
+ # "description": "Optional list of tool categories to filter by"
674
+ # },
675
+ # "limit": {
676
+ # "type": "integer",
677
+ # "description": "Maximum number of tools to return, ranked by TF-IDF relevance score (default: 10)",
678
+ # "default": 10
679
+ # }
680
+ # },
681
+ # "required": ["query"]
682
+ # },
683
+ # "configs": {
684
+ # "exclude_tools": [
685
+ # "Tool_RAG", "Tool_Finder", "Finish", "CallAgent",
686
+ # "ToolFinderLLM", "ToolFinderKeyword"
687
+ # ],
688
+ # "features": [
689
+ # "tokenization", "stop_word_removal", "stemming",
690
+ # "phrase_extraction", "tfidf_scoring", "exact_match_bonus"
691
+ # ]
692
+ # }
693
+ # }