tooluniverse 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/adverse_event_tools.json +108 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.1.dist-info/METADATA +387 -0
- tooluniverse-1.0.1.dist-info/RECORD +182 -0
- tooluniverse-1.0.1.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Keyword-based Tool Finder - An advanced keyword search tool for finding relevant tools.
|
|
3
|
+
|
|
4
|
+
This tool provides sophisticated keyword matching functionality using natural language
|
|
5
|
+
processing techniques including tokenization, stop word removal, stemming, and TF-IDF
|
|
6
|
+
scoring for improved relevance ranking. It serves as a robust search method when
|
|
7
|
+
AI-powered search methods are unavailable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
import math
|
|
13
|
+
from collections import Counter, defaultdict
|
|
14
|
+
from typing import Dict, List
|
|
15
|
+
from .base_tool import BaseTool
|
|
16
|
+
from .tool_registry import register_tool
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@register_tool("ToolFinderKeyword")
|
|
20
|
+
class ToolFinderKeyword(BaseTool):
|
|
21
|
+
"""
|
|
22
|
+
Advanced keyword-based tool finder that uses sophisticated text processing and TF-IDF scoring.
|
|
23
|
+
|
|
24
|
+
This class implements natural language processing techniques for tool discovery including:
|
|
25
|
+
- Tokenization and normalization
|
|
26
|
+
- Stop word removal
|
|
27
|
+
- Basic stemming
|
|
28
|
+
- TF-IDF relevance scoring
|
|
29
|
+
- Semantic phrase matching
|
|
30
|
+
|
|
31
|
+
The search operates by parsing user queries to extract key terms, processing them through
|
|
32
|
+
NLP pipelines, and matching against pre-built indices of tool metadata for efficient
|
|
33
|
+
and relevant tool discovery.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# Common English stop words to filter out
|
|
37
|
+
STOP_WORDS = {
|
|
38
|
+
"a",
|
|
39
|
+
"an",
|
|
40
|
+
"and",
|
|
41
|
+
"are",
|
|
42
|
+
"as",
|
|
43
|
+
"at",
|
|
44
|
+
"be",
|
|
45
|
+
"by",
|
|
46
|
+
"for",
|
|
47
|
+
"from",
|
|
48
|
+
"has",
|
|
49
|
+
"he",
|
|
50
|
+
"in",
|
|
51
|
+
"is",
|
|
52
|
+
"it",
|
|
53
|
+
"its",
|
|
54
|
+
"of",
|
|
55
|
+
"on",
|
|
56
|
+
"that",
|
|
57
|
+
"to",
|
|
58
|
+
"was",
|
|
59
|
+
"will",
|
|
60
|
+
"with",
|
|
61
|
+
"the",
|
|
62
|
+
"this",
|
|
63
|
+
"but",
|
|
64
|
+
"they",
|
|
65
|
+
"have",
|
|
66
|
+
"had",
|
|
67
|
+
"what",
|
|
68
|
+
"said",
|
|
69
|
+
"each",
|
|
70
|
+
"which",
|
|
71
|
+
"their",
|
|
72
|
+
"time",
|
|
73
|
+
"up",
|
|
74
|
+
"use",
|
|
75
|
+
"your",
|
|
76
|
+
"how",
|
|
77
|
+
"all",
|
|
78
|
+
"any",
|
|
79
|
+
"can",
|
|
80
|
+
"do",
|
|
81
|
+
"get",
|
|
82
|
+
"if",
|
|
83
|
+
"may",
|
|
84
|
+
"new",
|
|
85
|
+
"now",
|
|
86
|
+
"old",
|
|
87
|
+
"see",
|
|
88
|
+
"two",
|
|
89
|
+
"way",
|
|
90
|
+
"who",
|
|
91
|
+
"boy",
|
|
92
|
+
"did",
|
|
93
|
+
"number",
|
|
94
|
+
"no",
|
|
95
|
+
"find",
|
|
96
|
+
"long",
|
|
97
|
+
"down",
|
|
98
|
+
"day",
|
|
99
|
+
"came",
|
|
100
|
+
"made",
|
|
101
|
+
"part",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# Simple stemming rules for common suffixes
|
|
105
|
+
STEMMING_RULES = [
|
|
106
|
+
("ies", "y"),
|
|
107
|
+
("ied", "y"),
|
|
108
|
+
("ying", "y"),
|
|
109
|
+
("ing", ""),
|
|
110
|
+
("ly", ""),
|
|
111
|
+
("ed", ""),
|
|
112
|
+
("ies", "y"),
|
|
113
|
+
("ier", "y"),
|
|
114
|
+
("iest", "y"),
|
|
115
|
+
("s", ""),
|
|
116
|
+
("es", ""),
|
|
117
|
+
("er", ""),
|
|
118
|
+
("est", ""),
|
|
119
|
+
("tion", "t"),
|
|
120
|
+
("sion", "s"),
|
|
121
|
+
("ness", ""),
|
|
122
|
+
("ment", ""),
|
|
123
|
+
("able", ""),
|
|
124
|
+
("ible", ""),
|
|
125
|
+
("ful", ""),
|
|
126
|
+
("less", ""),
|
|
127
|
+
("ous", ""),
|
|
128
|
+
("ive", ""),
|
|
129
|
+
("al", ""),
|
|
130
|
+
("ic", ""),
|
|
131
|
+
("ize", ""),
|
|
132
|
+
("ise", ""),
|
|
133
|
+
("ate", ""),
|
|
134
|
+
("fy", ""),
|
|
135
|
+
("ify", ""),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
def __init__(self, tool_config, tooluniverse=None):
|
|
139
|
+
"""
|
|
140
|
+
Initialize the Advanced Keyword-based Tool Finder.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
tool_config (dict): Configuration dictionary for the tool
|
|
144
|
+
tooluniverse: Reference to the ToolUniverse instance containing all tools
|
|
145
|
+
"""
|
|
146
|
+
super().__init__(tool_config)
|
|
147
|
+
self.tooluniverse = tooluniverse
|
|
148
|
+
|
|
149
|
+
# Extract configuration
|
|
150
|
+
self.name = tool_config.get("name", "ToolFinderKeyword")
|
|
151
|
+
self.description = tool_config.get(
|
|
152
|
+
"description", "Advanced keyword-based tool finder"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Tool filtering settings
|
|
156
|
+
self.exclude_tools = tool_config.get(
|
|
157
|
+
"exclude_tools",
|
|
158
|
+
tool_config.get("configs", {}).get(
|
|
159
|
+
"exclude_tools",
|
|
160
|
+
[
|
|
161
|
+
"Tool_RAG",
|
|
162
|
+
"Tool_Finder",
|
|
163
|
+
"Finish",
|
|
164
|
+
"CallAgent",
|
|
165
|
+
"ToolFinderLLM",
|
|
166
|
+
"ToolFinderKeyword",
|
|
167
|
+
],
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
self.include_categories = tool_config.get("include_categories", None)
|
|
171
|
+
self.exclude_categories = tool_config.get("exclude_categories", None)
|
|
172
|
+
|
|
173
|
+
# Initialize tool index for TF-IDF scoring
|
|
174
|
+
self._tool_index = None
|
|
175
|
+
self._document_frequencies = None
|
|
176
|
+
self._total_documents = 0
|
|
177
|
+
|
|
178
|
+
def _tokenize_and_normalize(self, text: str) -> List[str]:
|
|
179
|
+
"""
|
|
180
|
+
Tokenize text and apply normalization including stop word removal and stemming.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
text (str): Input text to tokenize
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List[str]: List of processed tokens
|
|
187
|
+
"""
|
|
188
|
+
if not text:
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
# Convert to lowercase and extract words (alphanumeric sequences)
|
|
192
|
+
tokens = re.findall(r"\b[a-zA-Z][a-zA-Z0-9]*\b", text.lower())
|
|
193
|
+
|
|
194
|
+
# Remove stop words
|
|
195
|
+
tokens = [token for token in tokens if token not in self.STOP_WORDS]
|
|
196
|
+
|
|
197
|
+
# Apply basic stemming
|
|
198
|
+
stemmed_tokens = []
|
|
199
|
+
for token in tokens:
|
|
200
|
+
stemmed = self._apply_stemming(token)
|
|
201
|
+
if len(stemmed) > 2: # Keep only meaningful terms
|
|
202
|
+
stemmed_tokens.append(stemmed)
|
|
203
|
+
|
|
204
|
+
return stemmed_tokens
|
|
205
|
+
|
|
206
|
+
def _apply_stemming(self, word: str) -> str:
|
|
207
|
+
"""
|
|
208
|
+
Apply basic stemming rules to reduce words to their root form.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
word (str): Word to stem
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
str: Stemmed word
|
|
215
|
+
"""
|
|
216
|
+
if len(word) <= 3:
|
|
217
|
+
return word
|
|
218
|
+
|
|
219
|
+
for suffix, replacement in self.STEMMING_RULES:
|
|
220
|
+
if word.endswith(suffix) and len(word) > len(suffix) + 2:
|
|
221
|
+
return word[: -len(suffix)] + replacement
|
|
222
|
+
|
|
223
|
+
return word
|
|
224
|
+
|
|
225
|
+
def _extract_phrases(
|
|
226
|
+
self, tokens: List[str], max_phrase_length: int = 3
|
|
227
|
+
) -> List[str]:
|
|
228
|
+
"""
|
|
229
|
+
Extract meaningful phrases from tokens for better semantic matching.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
tokens (List[str]): Tokenized words
|
|
233
|
+
max_phrase_length (int): Maximum length of phrases to extract
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List[str]: List of phrases and individual tokens
|
|
237
|
+
"""
|
|
238
|
+
phrases = []
|
|
239
|
+
|
|
240
|
+
# Add individual tokens
|
|
241
|
+
phrases.extend(tokens)
|
|
242
|
+
|
|
243
|
+
# Add bigrams and trigrams
|
|
244
|
+
for length in range(2, min(max_phrase_length + 1, len(tokens) + 1)):
|
|
245
|
+
for i in range(len(tokens) - length + 1):
|
|
246
|
+
phrase = " ".join(tokens[i : i + length])
|
|
247
|
+
phrases.append(phrase)
|
|
248
|
+
|
|
249
|
+
return phrases
|
|
250
|
+
|
|
251
|
+
def _build_tool_index(self, tools: List[Dict]) -> None:
|
|
252
|
+
"""
|
|
253
|
+
Build TF-IDF index for all tools to enable efficient relevance scoring.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
tools (List[Dict]): List of tool configurations
|
|
257
|
+
"""
|
|
258
|
+
self._tool_index = {}
|
|
259
|
+
term_doc_count = defaultdict(int)
|
|
260
|
+
self._total_documents = 0
|
|
261
|
+
|
|
262
|
+
for tool in tools:
|
|
263
|
+
tool_name = tool.get("name", "")
|
|
264
|
+
if tool_name in self.exclude_tools:
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
# Combine tool metadata for indexing
|
|
268
|
+
searchable_text = " ".join(
|
|
269
|
+
[
|
|
270
|
+
tool.get("name", ""),
|
|
271
|
+
tool.get("description", ""),
|
|
272
|
+
tool.get("type", ""),
|
|
273
|
+
tool.get("category", ""),
|
|
274
|
+
# Include parameter names and descriptions
|
|
275
|
+
" ".join(self._extract_parameter_text(tool.get("parameter", {}))),
|
|
276
|
+
]
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Tokenize and extract phrases
|
|
280
|
+
tokens = self._tokenize_and_normalize(searchable_text)
|
|
281
|
+
phrases = self._extract_phrases(tokens)
|
|
282
|
+
|
|
283
|
+
# Build term frequency map for this tool
|
|
284
|
+
term_freq = Counter(phrases)
|
|
285
|
+
self._tool_index[tool_name] = {
|
|
286
|
+
"tool": tool,
|
|
287
|
+
"terms": term_freq,
|
|
288
|
+
"total_terms": len(phrases),
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
# Count document frequency for each term
|
|
292
|
+
unique_terms = set(phrases)
|
|
293
|
+
for term in unique_terms:
|
|
294
|
+
term_doc_count[term] += 1
|
|
295
|
+
|
|
296
|
+
self._total_documents += 1
|
|
297
|
+
|
|
298
|
+
# Calculate document frequencies
|
|
299
|
+
self._document_frequencies = dict(term_doc_count)
|
|
300
|
+
|
|
301
|
+
def _extract_parameter_text(self, parameter_schema: Dict) -> List[str]:
|
|
302
|
+
"""
|
|
303
|
+
Extract searchable text from parameter schema.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
parameter_schema (Dict): Tool parameter schema
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
List[str]: List of text elements from parameters
|
|
310
|
+
"""
|
|
311
|
+
text_elements = []
|
|
312
|
+
|
|
313
|
+
if isinstance(parameter_schema, dict):
|
|
314
|
+
properties = parameter_schema.get("properties", {})
|
|
315
|
+
for prop_name, prop_info in properties.items():
|
|
316
|
+
text_elements.append(prop_name)
|
|
317
|
+
if isinstance(prop_info, dict):
|
|
318
|
+
desc = prop_info.get("description", "")
|
|
319
|
+
if desc:
|
|
320
|
+
text_elements.append(desc)
|
|
321
|
+
|
|
322
|
+
return text_elements
|
|
323
|
+
|
|
324
|
+
def _calculate_tfidf_score(self, query_terms: List[str], tool_name: str) -> float:
|
|
325
|
+
"""
|
|
326
|
+
Calculate TF-IDF relevance score for a tool given query terms.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
query_terms (List[str]): Processed query terms and phrases
|
|
330
|
+
tool_name (str): Name of the tool to score
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
float: TF-IDF relevance score
|
|
334
|
+
"""
|
|
335
|
+
if tool_name not in self._tool_index:
|
|
336
|
+
return 0.0
|
|
337
|
+
|
|
338
|
+
tool_data = self._tool_index[tool_name]
|
|
339
|
+
tool_terms = tool_data["terms"]
|
|
340
|
+
total_terms = tool_data["total_terms"]
|
|
341
|
+
|
|
342
|
+
score = 0.0
|
|
343
|
+
query_term_freq = Counter(query_terms)
|
|
344
|
+
|
|
345
|
+
for term, query_freq in query_term_freq.items():
|
|
346
|
+
if term in tool_terms:
|
|
347
|
+
# Term Frequency (TF): frequency of term in tool / total terms in tool
|
|
348
|
+
tf = tool_terms[term] / total_terms
|
|
349
|
+
|
|
350
|
+
# Inverse Document Frequency (IDF): log(total docs / docs containing term)
|
|
351
|
+
doc_freq = self._document_frequencies.get(term, 1)
|
|
352
|
+
idf = math.log(self._total_documents / doc_freq)
|
|
353
|
+
|
|
354
|
+
# TF-IDF score with query term frequency weighting
|
|
355
|
+
score += tf * idf * math.log(1 + query_freq)
|
|
356
|
+
|
|
357
|
+
return score
|
|
358
|
+
|
|
359
|
+
def _calculate_exact_match_bonus(self, query: str, tool: Dict) -> float:
|
|
360
|
+
"""
|
|
361
|
+
Calculate bonus score for exact matches in tool name or key phrases.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
query (str): Original query string
|
|
365
|
+
tool (Dict): Tool configuration
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
float: Exact match bonus score
|
|
369
|
+
"""
|
|
370
|
+
query_lower = query.lower()
|
|
371
|
+
tool_name = tool.get("name", "").lower()
|
|
372
|
+
tool_desc = tool.get("description", "").lower()
|
|
373
|
+
|
|
374
|
+
bonus = 0.0
|
|
375
|
+
|
|
376
|
+
# Exact tool name match
|
|
377
|
+
if query_lower in tool_name or tool_name in query_lower:
|
|
378
|
+
bonus += 2.0
|
|
379
|
+
|
|
380
|
+
# Exact phrase matches in description
|
|
381
|
+
query_words = query_lower.split()
|
|
382
|
+
if len(query_words) > 1:
|
|
383
|
+
query_phrase = " ".join(query_words)
|
|
384
|
+
if query_phrase in tool_desc:
|
|
385
|
+
bonus += 1.5
|
|
386
|
+
|
|
387
|
+
# Category or type exact matches
|
|
388
|
+
tool_type = tool.get("type", "").lower()
|
|
389
|
+
tool_category = tool.get("category", "").lower()
|
|
390
|
+
|
|
391
|
+
if query_lower in tool_type or query_lower in tool_category:
|
|
392
|
+
bonus += 1.0
|
|
393
|
+
|
|
394
|
+
return bonus
|
|
395
|
+
|
|
396
|
+
def find_tools(
|
|
397
|
+
self,
|
|
398
|
+
message=None,
|
|
399
|
+
picked_tool_names=None,
|
|
400
|
+
rag_num=5,
|
|
401
|
+
return_call_result=False,
|
|
402
|
+
categories=None,
|
|
403
|
+
):
|
|
404
|
+
"""
|
|
405
|
+
Find relevant tools based on a message or pre-selected tool names.
|
|
406
|
+
|
|
407
|
+
This method matches the interface of other tool finders to ensure
|
|
408
|
+
seamless replacement. It uses keyword-based search instead of embedding similarity.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
message (str, optional): Query message to find tools for. Required if picked_tool_names is None.
|
|
412
|
+
picked_tool_names (list, optional): Pre-selected tool names to process. Required if message is None.
|
|
413
|
+
rag_num (int, optional): Number of tools to return after filtering. Defaults to 5.
|
|
414
|
+
return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False.
|
|
415
|
+
categories (list, optional): List of tool categories to filter by.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
str or tuple:
|
|
419
|
+
- If return_call_result is False: Tool prompts as a formatted string
|
|
420
|
+
- If return_call_result is True: Tuple of (tool_prompts, tool_names)
|
|
421
|
+
|
|
422
|
+
Raises:
|
|
423
|
+
AssertionError: If both message and picked_tool_names are None
|
|
424
|
+
"""
|
|
425
|
+
if picked_tool_names is None:
|
|
426
|
+
assert picked_tool_names is not None or message is not None
|
|
427
|
+
|
|
428
|
+
# Use keyword-based tool search (directly call JSON search to avoid recursion)
|
|
429
|
+
search_result = self._run_json_search(
|
|
430
|
+
{"description": message, "categories": categories, "limit": rag_num}
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Parse JSON result to extract tool names
|
|
434
|
+
try:
|
|
435
|
+
result_data = json.loads(search_result)
|
|
436
|
+
if result_data.get("error"):
|
|
437
|
+
picked_tool_names = []
|
|
438
|
+
else:
|
|
439
|
+
picked_tool_names = [
|
|
440
|
+
tool["name"] for tool in result_data.get("tools", [])
|
|
441
|
+
]
|
|
442
|
+
except json.JSONDecodeError:
|
|
443
|
+
picked_tool_names = []
|
|
444
|
+
|
|
445
|
+
# Filter out special tools (matching original behavior)
|
|
446
|
+
picked_tool_names_no_special = []
|
|
447
|
+
for tool in picked_tool_names:
|
|
448
|
+
if tool not in self.exclude_tools:
|
|
449
|
+
picked_tool_names_no_special.append(tool)
|
|
450
|
+
picked_tool_names_no_special = picked_tool_names_no_special[:rag_num]
|
|
451
|
+
picked_tool_names = picked_tool_names_no_special[:rag_num]
|
|
452
|
+
|
|
453
|
+
# Get tool objects and prepare prompts (matching original behavior)
|
|
454
|
+
picked_tools = self.tooluniverse.get_tool_by_name(picked_tool_names)
|
|
455
|
+
picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
|
|
456
|
+
|
|
457
|
+
if return_call_result:
|
|
458
|
+
return picked_tools_prompt, picked_tool_names
|
|
459
|
+
return picked_tools_prompt
|
|
460
|
+
|
|
461
|
+
def run(self, arguments):
|
|
462
|
+
"""
|
|
463
|
+
Find tools using advanced keyword-based search with NLP processing and TF-IDF scoring.
|
|
464
|
+
|
|
465
|
+
This method provides a unified interface compatible with other tool finders.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
arguments (dict): Dictionary containing:
|
|
469
|
+
- description (str): Search query string (unified parameter name)
|
|
470
|
+
- categories (list, optional): List of categories to filter by
|
|
471
|
+
- limit (int, optional): Maximum number of results to return (default: 10)
|
|
472
|
+
- picked_tool_names (list, optional): Pre-selected tool names to process
|
|
473
|
+
- return_call_result (bool, optional): Whether to return both prompts and names. Defaults to False.
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
str or tuple:
|
|
477
|
+
- If return_call_result is False: Tool prompts as a formatted string
|
|
478
|
+
- If return_call_result is True: Tuple of (tool_prompts, tool_names)
|
|
479
|
+
"""
|
|
480
|
+
# Extract parameters for compatibility
|
|
481
|
+
description = arguments.get("description", arguments.get("query", ""))
|
|
482
|
+
limit = arguments.get("limit", 10)
|
|
483
|
+
return_call_result = arguments.get("return_call_result", False)
|
|
484
|
+
categories = arguments.get("categories", None)
|
|
485
|
+
picked_tool_names = arguments.get("picked_tool_names", None)
|
|
486
|
+
|
|
487
|
+
# If we have a unified interface call, delegate to find_tools method
|
|
488
|
+
if return_call_result is not None:
|
|
489
|
+
return self.find_tools(
|
|
490
|
+
message=description,
|
|
491
|
+
picked_tool_names=picked_tool_names,
|
|
492
|
+
rag_num=limit,
|
|
493
|
+
return_call_result=return_call_result,
|
|
494
|
+
categories=categories,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Otherwise use original JSON-based interface for backward compatibility
|
|
498
|
+
return self._run_json_search(arguments)
|
|
499
|
+
|
|
500
|
+
def _run_json_search(self, arguments):
|
|
501
|
+
"""
|
|
502
|
+
Original JSON-based search implementation for backward compatibility.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
arguments (dict): Search arguments
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
str: JSON string containing search results with relevance scores
|
|
509
|
+
"""
|
|
510
|
+
try:
|
|
511
|
+
# Extract arguments with unified parameter names
|
|
512
|
+
query = arguments.get(
|
|
513
|
+
"description", arguments.get("query", "")
|
|
514
|
+
) # Support both names for compatibility
|
|
515
|
+
categories = arguments.get("categories", None)
|
|
516
|
+
limit = arguments.get("limit", 10)
|
|
517
|
+
|
|
518
|
+
if not query:
|
|
519
|
+
return json.dumps(
|
|
520
|
+
{
|
|
521
|
+
"error": "Description parameter is required",
|
|
522
|
+
"query": query,
|
|
523
|
+
"tools": [],
|
|
524
|
+
},
|
|
525
|
+
indent=2,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Ensure categories is None or a list (handle validation issue)
|
|
529
|
+
if categories is not None and not isinstance(categories, list):
|
|
530
|
+
categories = None
|
|
531
|
+
|
|
532
|
+
# Get all tools from tooluniverse
|
|
533
|
+
if not self.tooluniverse:
|
|
534
|
+
return json.dumps(
|
|
535
|
+
{
|
|
536
|
+
"error": "ToolUniverse not available",
|
|
537
|
+
"query": query,
|
|
538
|
+
"tools": [],
|
|
539
|
+
},
|
|
540
|
+
indent=2,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
all_tools = self.tooluniverse.return_all_loaded_tools()
|
|
544
|
+
|
|
545
|
+
# Filter by categories if specified
|
|
546
|
+
if categories:
|
|
547
|
+
filtered_tools = self.tooluniverse.select_tools(
|
|
548
|
+
include_categories=categories
|
|
549
|
+
)
|
|
550
|
+
else:
|
|
551
|
+
filtered_tools = all_tools
|
|
552
|
+
|
|
553
|
+
# Build search index if not already built or if tools changed
|
|
554
|
+
if self._tool_index is None or self._total_documents != len(
|
|
555
|
+
[
|
|
556
|
+
t
|
|
557
|
+
for t in filtered_tools
|
|
558
|
+
if t.get("name", "") not in self.exclude_tools
|
|
559
|
+
]
|
|
560
|
+
):
|
|
561
|
+
self._build_tool_index(filtered_tools)
|
|
562
|
+
|
|
563
|
+
# Process query using NLP techniques
|
|
564
|
+
query_tokens = self._tokenize_and_normalize(query)
|
|
565
|
+
query_phrases = self._extract_phrases(query_tokens)
|
|
566
|
+
|
|
567
|
+
if not query_tokens and not query_phrases:
|
|
568
|
+
return json.dumps(
|
|
569
|
+
{
|
|
570
|
+
"error": "No meaningful search terms found in query",
|
|
571
|
+
"query": query,
|
|
572
|
+
"tools": [],
|
|
573
|
+
},
|
|
574
|
+
indent=2,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Calculate relevance scores for all tools
|
|
578
|
+
tool_scores = []
|
|
579
|
+
|
|
580
|
+
for tool in filtered_tools:
|
|
581
|
+
tool_name = tool.get("name", "")
|
|
582
|
+
|
|
583
|
+
# Skip excluded tools
|
|
584
|
+
if tool_name in self.exclude_tools:
|
|
585
|
+
continue
|
|
586
|
+
|
|
587
|
+
# Apply category filters if specified
|
|
588
|
+
tool_category = tool.get("category", "unknown")
|
|
589
|
+
if (
|
|
590
|
+
self.include_categories
|
|
591
|
+
and tool_category not in self.include_categories
|
|
592
|
+
):
|
|
593
|
+
continue
|
|
594
|
+
if self.exclude_categories and tool_category in self.exclude_categories:
|
|
595
|
+
continue
|
|
596
|
+
|
|
597
|
+
# Calculate TF-IDF score
|
|
598
|
+
tfidf_score = self._calculate_tfidf_score(query_phrases, tool_name)
|
|
599
|
+
|
|
600
|
+
# Calculate exact match bonus
|
|
601
|
+
exact_bonus = self._calculate_exact_match_bonus(query, tool)
|
|
602
|
+
|
|
603
|
+
# Combined relevance score
|
|
604
|
+
total_score = tfidf_score + exact_bonus
|
|
605
|
+
|
|
606
|
+
# Only include tools with positive relevance
|
|
607
|
+
if total_score > 0:
|
|
608
|
+
tool_info = {
|
|
609
|
+
"name": tool_name,
|
|
610
|
+
"description": tool.get("description", ""),
|
|
611
|
+
"type": tool.get("type", ""),
|
|
612
|
+
"category": tool_category,
|
|
613
|
+
"parameters": tool.get("parameter", {}),
|
|
614
|
+
"required": tool.get("required", []),
|
|
615
|
+
"relevance_score": round(total_score, 4),
|
|
616
|
+
"tfidf_score": round(tfidf_score, 4),
|
|
617
|
+
"exact_match_bonus": round(exact_bonus, 4),
|
|
618
|
+
}
|
|
619
|
+
tool_scores.append(tool_info)
|
|
620
|
+
|
|
621
|
+
# Sort by relevance score (highest first) and limit results
|
|
622
|
+
tool_scores.sort(key=lambda x: x["relevance_score"], reverse=True)
|
|
623
|
+
matching_tools = tool_scores[:limit]
|
|
624
|
+
|
|
625
|
+
# Remove internal scoring details from final output
|
|
626
|
+
for tool in matching_tools:
|
|
627
|
+
tool.pop("tfidf_score", None)
|
|
628
|
+
tool.pop("exact_match_bonus", None)
|
|
629
|
+
|
|
630
|
+
return json.dumps(
|
|
631
|
+
{
|
|
632
|
+
"query": query,
|
|
633
|
+
"search_method": "Advanced keyword matching (TF-IDF + NLP)",
|
|
634
|
+
"total_matches": len(matching_tools),
|
|
635
|
+
"categories_filtered": categories,
|
|
636
|
+
"processing_info": {
|
|
637
|
+
"query_tokens": len(query_tokens),
|
|
638
|
+
"query_phrases": len(query_phrases),
|
|
639
|
+
"indexed_tools": self._total_documents,
|
|
640
|
+
},
|
|
641
|
+
"tools": matching_tools,
|
|
642
|
+
},
|
|
643
|
+
indent=2,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
except Exception as e:
|
|
647
|
+
return json.dumps(
|
|
648
|
+
{
|
|
649
|
+
"error": f"Advanced keyword search error: {str(e)}",
|
|
650
|
+
"query": arguments.get("query", ""),
|
|
651
|
+
"tools": [],
|
|
652
|
+
},
|
|
653
|
+
indent=2,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
# # Tool configuration for ToolUniverse registration
|
|
658
|
+
# TOOL_CONFIG = {
|
|
659
|
+
# "name": "ToolFinderKeyword",
|
|
660
|
+
# "description": "Advanced keyword-based tool finder using NLP techniques, TF-IDF scoring, and semantic phrase matching for precise tool discovery",
|
|
661
|
+
# "type": "tool_finder_keyword",
|
|
662
|
+
# "category": "tool_finder",
|
|
663
|
+
# "parameter": {
|
|
664
|
+
# "type": "object",
|
|
665
|
+
# "properties": {
|
|
666
|
+
# "query": {
|
|
667
|
+
# "type": "string",
|
|
668
|
+
# "description": "Search query describing the desired functionality. Uses advanced NLP processing including tokenization, stop word removal, and stemming."
|
|
669
|
+
# },
|
|
670
|
+
# "categories": {
|
|
671
|
+
# "type": "array",
|
|
672
|
+
# "items": {"type": "string"},
|
|
673
|
+
# "description": "Optional list of tool categories to filter by"
|
|
674
|
+
# },
|
|
675
|
+
# "limit": {
|
|
676
|
+
# "type": "integer",
|
|
677
|
+
# "description": "Maximum number of tools to return, ranked by TF-IDF relevance score (default: 10)",
|
|
678
|
+
# "default": 10
|
|
679
|
+
# }
|
|
680
|
+
# },
|
|
681
|
+
# "required": ["query"]
|
|
682
|
+
# },
|
|
683
|
+
# "configs": {
|
|
684
|
+
# "exclude_tools": [
|
|
685
|
+
# "Tool_RAG", "Tool_Finder", "Finish", "CallAgent",
|
|
686
|
+
# "ToolFinderLLM", "ToolFinderKeyword"
|
|
687
|
+
# ],
|
|
688
|
+
# "features": [
|
|
689
|
+
# "tokenization", "stop_word_removal", "stemming",
|
|
690
|
+
# "phrase_extraction", "tfidf_scoring", "exact_match_bonus"
|
|
691
|
+
# ]
|
|
692
|
+
# }
|
|
693
|
+
# }
|