tooluniverse 0.1.4__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +544 -168
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +82 -58
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +1 -1
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse-0.1.4.dist-info/METADATA +0 -141
- tooluniverse-0.1.4.dist-info/RECORD +0 -18
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import os
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from .base_tool import BaseTool
|
|
5
|
+
from .utils import download_from_hf
|
|
6
|
+
from .tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_tool("DatasetTool")
|
|
10
|
+
class DatasetTool(BaseTool):
|
|
11
|
+
"""
|
|
12
|
+
Tool to search and filter the DrugBank vocabulary dataset.
|
|
13
|
+
Provides functionality to search drugs by name, ID, synonyms and filter by various criteria.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, tool_config):
|
|
17
|
+
super().__init__(tool_config)
|
|
18
|
+
self.dataset = None
|
|
19
|
+
self.query_schema = tool_config[
|
|
20
|
+
"query_schema"
|
|
21
|
+
] # TODO: Move query_schema to BaseTool
|
|
22
|
+
self.parameters = tool_config["parameter"][
|
|
23
|
+
"properties"
|
|
24
|
+
] # TODO: Move parameters to BaseTool
|
|
25
|
+
self._load_dataset()
|
|
26
|
+
|
|
27
|
+
def _load_dataset(self):
|
|
28
|
+
"""Load the drugbank vocabulary CSV dataset."""
|
|
29
|
+
try:
|
|
30
|
+
if "hf_dataset_path" in self.tool_config:
|
|
31
|
+
# Download dataset from Hugging Face Hub
|
|
32
|
+
result = download_from_hf(self.tool_config)
|
|
33
|
+
|
|
34
|
+
if not result.get("success", False):
|
|
35
|
+
print(f"Failed to download dataset: {result.get('error')}")
|
|
36
|
+
self.dataset = pd.DataFrame()
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
# Load the downloaded CSV
|
|
40
|
+
dataset_path = result["local_path"]
|
|
41
|
+
|
|
42
|
+
elif "local_dataset_path" in self.tool_config:
|
|
43
|
+
dataset_path = self.tool_config["local_dataset_path"]
|
|
44
|
+
|
|
45
|
+
# If relative path, make it relative to the project root
|
|
46
|
+
if not os.path.isabs(dataset_path):
|
|
47
|
+
# Go up from src/tooluniverse to project root
|
|
48
|
+
project_root = os.path.dirname(
|
|
49
|
+
os.path.dirname(os.path.dirname(__file__))
|
|
50
|
+
)
|
|
51
|
+
dataset_path = os.path.join(project_root, dataset_path)
|
|
52
|
+
|
|
53
|
+
else:
|
|
54
|
+
print("No dataset path provided in tool configuration")
|
|
55
|
+
self.dataset = pd.DataFrame()
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Load the CSV file
|
|
59
|
+
if dataset_path.endswith(".csv"):
|
|
60
|
+
self.dataset = pd.read_csv(dataset_path)
|
|
61
|
+
elif dataset_path.endswith(".tsv"):
|
|
62
|
+
self.dataset = pd.read_csv(dataset_path, sep="\t")
|
|
63
|
+
elif dataset_path.endswith(".txt"):
|
|
64
|
+
self.dataset = pd.read_table(dataset_path, sep="\t")
|
|
65
|
+
elif dataset_path.endswith(".xlsx"):
|
|
66
|
+
self.dataset = pd.read_excel(dataset_path)
|
|
67
|
+
elif dataset_path.endswith(".pkl"):
|
|
68
|
+
self.dataset = pd.read_pickle(dataset_path)
|
|
69
|
+
elif dataset_path.endswith(".parquet"):
|
|
70
|
+
self.dataset = pd.read_parquet(dataset_path)
|
|
71
|
+
|
|
72
|
+
# Clean column names
|
|
73
|
+
self.dataset.columns = self.dataset.columns.str.strip()
|
|
74
|
+
|
|
75
|
+
# Fill NaN values with empty strings for better searching
|
|
76
|
+
self.dataset = self.dataset.fillna("")
|
|
77
|
+
|
|
78
|
+
print(f"Loaded dataset with {len(self.dataset)} records")
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
print(f"Error loading dataset: {e}")
|
|
82
|
+
self.dataset = pd.DataFrame()
|
|
83
|
+
|
|
84
|
+
def run(self, arguments):
|
|
85
|
+
"""Main entry point for the tool."""
|
|
86
|
+
if self.dataset is None or self.dataset.empty:
|
|
87
|
+
return {"error": "Dataset not loaded or is empty"}
|
|
88
|
+
|
|
89
|
+
query_params = deepcopy(self.query_schema)
|
|
90
|
+
expected_param_names = self.parameters.keys()
|
|
91
|
+
|
|
92
|
+
# Prepare API parameters from arguments
|
|
93
|
+
for k in expected_param_names:
|
|
94
|
+
if k in arguments and arguments[k] is not None:
|
|
95
|
+
query_params[k] = arguments[k]
|
|
96
|
+
|
|
97
|
+
# Determine operation based on arguments - completely separate functions
|
|
98
|
+
if "field" in query_params:
|
|
99
|
+
# Use dedicated filter function
|
|
100
|
+
return self._drugbank_filter(query_params)
|
|
101
|
+
elif "query" in query_params:
|
|
102
|
+
# Use dedicated search function
|
|
103
|
+
return self._drugbank_search(query_params)
|
|
104
|
+
else:
|
|
105
|
+
return {
|
|
106
|
+
"error": "Invalid arguments: must provide either 'query' for search or 'field' for filtering"
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# ==================== SEARCH FUNCTIONALITY ====================
|
|
110
|
+
|
|
111
|
+
def _drugbank_search(self, arguments):
|
|
112
|
+
"""
|
|
113
|
+
Search drugs by name, ID, synonyms, or other fields using text-based queries.
|
|
114
|
+
|
|
115
|
+
This function is dedicated to text-based searching across specified fields.
|
|
116
|
+
It performs substring or exact matching based on user preferences.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
arguments (dict): Search parameters including:
|
|
120
|
+
- query (str): Text to search for
|
|
121
|
+
- search_fields (list): Fields to search in
|
|
122
|
+
- case_sensitive (bool): Whether search is case sensitive
|
|
123
|
+
- exact_match (bool): Whether to perform exact matching
|
|
124
|
+
- limit (int): Maximum number of results
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
dict: Search results with matched records and metadata
|
|
128
|
+
"""
|
|
129
|
+
query = arguments.get("query", "")
|
|
130
|
+
search_fields = arguments.get("search_fields")
|
|
131
|
+
case_sensitive = arguments.get("case_sensitive", False)
|
|
132
|
+
exact_match = arguments.get("exact_match", False)
|
|
133
|
+
limit = arguments.get("limit", 50)
|
|
134
|
+
|
|
135
|
+
if not query:
|
|
136
|
+
return {"error": "Query parameter is required for search"}
|
|
137
|
+
|
|
138
|
+
# Prepare search query
|
|
139
|
+
if not case_sensitive:
|
|
140
|
+
query = query.lower()
|
|
141
|
+
|
|
142
|
+
results = []
|
|
143
|
+
|
|
144
|
+
for _, row in self.dataset.iterrows():
|
|
145
|
+
match_found = False
|
|
146
|
+
matched_fields = []
|
|
147
|
+
|
|
148
|
+
for field in search_fields:
|
|
149
|
+
if field not in self.dataset.columns:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
field_value = str(row[field])
|
|
153
|
+
if not case_sensitive:
|
|
154
|
+
field_value = field_value.lower()
|
|
155
|
+
|
|
156
|
+
if exact_match:
|
|
157
|
+
# For synonyms, check each synonym separately
|
|
158
|
+
if (
|
|
159
|
+
field.lower() == "synonyms" and "|" in field_value
|
|
160
|
+
): # TODO: rename correpsonding columns in each dataset to `synonyms` and use `|` to separate keywords
|
|
161
|
+
synonyms = [s.strip() for s in field_value.split("|")]
|
|
162
|
+
if query in synonyms:
|
|
163
|
+
match_found = True
|
|
164
|
+
matched_fields.append(field)
|
|
165
|
+
elif query == field_value:
|
|
166
|
+
match_found = True
|
|
167
|
+
matched_fields.append(field)
|
|
168
|
+
else:
|
|
169
|
+
if query in field_value:
|
|
170
|
+
match_found = True
|
|
171
|
+
matched_fields.append(field)
|
|
172
|
+
|
|
173
|
+
if match_found:
|
|
174
|
+
result_row = row.to_dict()
|
|
175
|
+
result_row["matched_fields"] = matched_fields
|
|
176
|
+
results.append(result_row)
|
|
177
|
+
|
|
178
|
+
if len(results) >= limit:
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"query": arguments.get("query"),
|
|
183
|
+
"total_results": len(results),
|
|
184
|
+
"results": results,
|
|
185
|
+
"search_parameters": {
|
|
186
|
+
"search_fields": search_fields,
|
|
187
|
+
"case_sensitive": case_sensitive,
|
|
188
|
+
"exact_match": exact_match,
|
|
189
|
+
"limit": limit,
|
|
190
|
+
},
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# ==================== FILTER FUNCTIONALITY ====================
|
|
194
|
+
|
|
195
|
+
def _drugbank_filter(self, arguments):
|
|
196
|
+
"""
|
|
197
|
+
Filter drugs based on specific criteria and field-based conditions.
|
|
198
|
+
|
|
199
|
+
This function is dedicated to criteria-based filtering using simple field-condition-value parameters.
|
|
200
|
+
It supports filter types like contains, starts_with, ends_with, exact, not_empty.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
arguments (dict): Filter parameters including:
|
|
204
|
+
- field (str): Field name to filter on
|
|
205
|
+
- condition (str): Type of condition (contains, starts_with, ends_with, exact, not_empty)
|
|
206
|
+
- value (str): Value to filter by (optional for not_empty condition)
|
|
207
|
+
- limit (int): Maximum number of results
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
dict: Filtered results with matched records and applied filters
|
|
211
|
+
"""
|
|
212
|
+
field = arguments.get("field")
|
|
213
|
+
condition = arguments.get("condition")
|
|
214
|
+
value = arguments.get("value", "")
|
|
215
|
+
limit = arguments.get("limit", 100)
|
|
216
|
+
|
|
217
|
+
if not field or not condition:
|
|
218
|
+
return {
|
|
219
|
+
"error": "Both 'field' and 'condition' parameters are required for filtering"
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if field not in self.dataset.columns:
|
|
223
|
+
return {
|
|
224
|
+
"error": f"Field '{field}' not found in dataset. Available fields: {list(self.dataset.columns)}"
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
# Check if value is required for this condition
|
|
228
|
+
if condition != "not_empty" and not value:
|
|
229
|
+
return {
|
|
230
|
+
"error": f"'value' parameter is required for condition '{condition}'"
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
filtered_data = self.dataset.copy()
|
|
234
|
+
applied_filter = ""
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
if condition == "contains":
|
|
238
|
+
mask = filtered_data[field].str.contains(value, case=False, na=False)
|
|
239
|
+
applied_filter = f"{field} contains '{value}'"
|
|
240
|
+
|
|
241
|
+
elif condition == "starts_with":
|
|
242
|
+
mask = filtered_data[field].str.startswith(value, na=False)
|
|
243
|
+
applied_filter = f"{field} starts with '{value}'"
|
|
244
|
+
|
|
245
|
+
elif condition == "ends_with":
|
|
246
|
+
mask = filtered_data[field].str.endswith(value, na=False)
|
|
247
|
+
applied_filter = f"{field} ends with '{value}'"
|
|
248
|
+
|
|
249
|
+
elif condition == "exact":
|
|
250
|
+
mask = filtered_data[field] == value
|
|
251
|
+
applied_filter = f"{field} equals '{value}'"
|
|
252
|
+
|
|
253
|
+
elif condition == "not_empty":
|
|
254
|
+
mask = (filtered_data[field] != "") & (filtered_data[field].notna())
|
|
255
|
+
applied_filter = f"{field} is not empty"
|
|
256
|
+
|
|
257
|
+
else:
|
|
258
|
+
return {
|
|
259
|
+
"error": f"Unknown condition '{condition}'. Supported: contains, starts_with, ends_with, exact, not_empty"
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
filtered_data = filtered_data[mask]
|
|
263
|
+
|
|
264
|
+
except Exception as e:
|
|
265
|
+
return {"error": f"Error applying filter: {str(e)}"}
|
|
266
|
+
|
|
267
|
+
# Apply limit
|
|
268
|
+
results = filtered_data.head(limit).to_dict("records")
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
"total_matches": len(filtered_data),
|
|
272
|
+
"returned_results": len(results),
|
|
273
|
+
"results": results,
|
|
274
|
+
"applied_filter": applied_filter,
|
|
275
|
+
"filter_parameters": {
|
|
276
|
+
"field": field,
|
|
277
|
+
"condition": condition,
|
|
278
|
+
"value": value if condition != "not_empty" else None,
|
|
279
|
+
"limit": limit,
|
|
280
|
+
},
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# ==================== UTILITY FUNCTIONS ====================
|
|
284
|
+
|
|
285
|
+
def get_dataset_info(self):
|
|
286
|
+
"""Get information about the loaded dataset."""
|
|
287
|
+
if self.dataset is None or self.dataset.empty:
|
|
288
|
+
return {"error": "Dataset not loaded or is empty"}
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
"total_records": len(self.dataset),
|
|
292
|
+
"columns": list(self.dataset.columns),
|
|
293
|
+
"sample_record": (
|
|
294
|
+
self.dataset.iloc[0].to_dict() if len(self.dataset) > 0 else None
|
|
295
|
+
),
|
|
296
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Default tool configuration files mapping.
|
|
3
|
+
|
|
4
|
+
This module contains the default mapping of tool categories to their JSON configuration files.
|
|
5
|
+
It's separated from __init__.py to avoid circular imports.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
# Get the current directory where this file is located
|
|
11
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
12
|
+
|
|
13
|
+
default_tool_files = {
|
|
14
|
+
"special_tools": os.path.join(current_dir, "data", "special_tools.json"),
|
|
15
|
+
"tool_finder": os.path.join(current_dir, "data", "finder_tools.json"),
|
|
16
|
+
# 'tool_finder_llm': os.path.join(current_dir, 'data', 'tool_finder_llm_config.json'),
|
|
17
|
+
"opentarget": os.path.join(current_dir, "data", "opentarget_tools.json"),
|
|
18
|
+
"fda_drug_label": os.path.join(current_dir, "data", "fda_drug_labeling_tools.json"),
|
|
19
|
+
"monarch": os.path.join(current_dir, "data", "monarch_tools.json"),
|
|
20
|
+
"clinical_trials": os.path.join(
|
|
21
|
+
current_dir, "data", "clinicaltrials_gov_tools.json"
|
|
22
|
+
),
|
|
23
|
+
"fda_drug_adverse_event": os.path.join(
|
|
24
|
+
current_dir, "data", "fda_drug_adverse_event_tools.json"
|
|
25
|
+
),
|
|
26
|
+
"ChEMBL": os.path.join(current_dir, "data", "chembl_tools.json"),
|
|
27
|
+
"EuropePMC": os.path.join(current_dir, "data", "europe_pmc_tools.json"),
|
|
28
|
+
"semantic_scholar": os.path.join(
|
|
29
|
+
current_dir, "data", "semantic_scholar_tools.json"
|
|
30
|
+
),
|
|
31
|
+
"pubtator": os.path.join(current_dir, "data", "pubtator_tools.json"),
|
|
32
|
+
"EFO": os.path.join(current_dir, "data", "efo_tools.json"),
|
|
33
|
+
"Enrichr": os.path.join(current_dir, "data", "enrichr_tools.json"),
|
|
34
|
+
"HumanBase": os.path.join(current_dir, "data", "humanbase_tools.json"),
|
|
35
|
+
"OpenAlex": os.path.join(current_dir, "data", "openalex_tools.json"),
|
|
36
|
+
"agents": os.path.join(current_dir, "data", "agentic_tools.json"),
|
|
37
|
+
"dataset": os.path.join(current_dir, "data", "dataset_tools.json"),
|
|
38
|
+
# 'mcp_clients': os.path.join(current_dir, 'data', 'mcp_client_tools_example.json'),
|
|
39
|
+
"mcp_auto_loader_txagent": os.path.join(
|
|
40
|
+
current_dir, "data", "txagent_client_tools.json"
|
|
41
|
+
),
|
|
42
|
+
"mcp_auto_loader_expert_feedback": os.path.join(
|
|
43
|
+
current_dir, "data", "expert_feedback_tools.json"
|
|
44
|
+
),
|
|
45
|
+
"adverse_event": os.path.join(current_dir, "data", "adverse_event_tools.json"),
|
|
46
|
+
"dailymed": os.path.join(current_dir, "data", "dailymed_tools.json"),
|
|
47
|
+
"hpa": os.path.join(current_dir, "data", "hpa_tools.json"),
|
|
48
|
+
"reactome": os.path.join(current_dir, "data", "reactome_tools.json"),
|
|
49
|
+
"pubchem": os.path.join(current_dir, "data", "pubchem_tools.json"),
|
|
50
|
+
"medlineplus": os.path.join(current_dir, "data", "medlineplus_tools.json"),
|
|
51
|
+
"uniprot": os.path.join(current_dir, "data", "uniprot_tools.json"),
|
|
52
|
+
# 'software': os.path.join(current_dir, 'data', 'software_tools.json'),
|
|
53
|
+
# Package tools - categorized software tools
|
|
54
|
+
"software_bioinformatics": os.path.join(
|
|
55
|
+
current_dir, "data", "packages", "bioinformatics_core_tools.json"
|
|
56
|
+
),
|
|
57
|
+
"software_genomics": os.path.join(
|
|
58
|
+
current_dir, "data", "packages", "genomics_tools.json"
|
|
59
|
+
),
|
|
60
|
+
"software_single_cell": os.path.join(
|
|
61
|
+
current_dir, "data", "packages", "single_cell_tools.json"
|
|
62
|
+
),
|
|
63
|
+
"software_structural_biology": os.path.join(
|
|
64
|
+
current_dir, "data", "packages", "structural_biology_tools.json"
|
|
65
|
+
),
|
|
66
|
+
"software_cheminformatics": os.path.join(
|
|
67
|
+
current_dir, "data", "packages", "cheminformatics_tools.json"
|
|
68
|
+
),
|
|
69
|
+
"software_machine_learning": os.path.join(
|
|
70
|
+
current_dir, "data", "packages", "machine_learning_tools.json"
|
|
71
|
+
),
|
|
72
|
+
"software_visualization": os.path.join(
|
|
73
|
+
current_dir, "data", "packages", "visualization_tools.json"
|
|
74
|
+
),
|
|
75
|
+
"software_scientific_computing": os.path.join(
|
|
76
|
+
current_dir, "data", "packages", "scientific_computing_tools.json"
|
|
77
|
+
),
|
|
78
|
+
"software_physics_astronomy": os.path.join(
|
|
79
|
+
current_dir, "data", "packages", "physics_astronomy_tools.json"
|
|
80
|
+
),
|
|
81
|
+
"software_earth_sciences": os.path.join(
|
|
82
|
+
current_dir, "data", "packages", "earth_sciences_tools.json"
|
|
83
|
+
),
|
|
84
|
+
"software_image_processing": os.path.join(
|
|
85
|
+
current_dir, "data", "packages", "image_processing_tools.json"
|
|
86
|
+
),
|
|
87
|
+
"software_neuroscience": os.path.join(
|
|
88
|
+
current_dir, "data", "packages", "neuroscience_tools.json"
|
|
89
|
+
),
|
|
90
|
+
"go": os.path.join(current_dir, "data", "gene_ontology_tools.json"),
|
|
91
|
+
"compose": os.path.join(current_dir, "data", "compose_tools.json"),
|
|
92
|
+
"idmap": os.path.join(current_dir, "data", "idmap_tools.json"),
|
|
93
|
+
"disease_target_score": os.path.join(
|
|
94
|
+
current_dir, "data", "disease_target_score_tools.json"
|
|
95
|
+
),
|
|
96
|
+
"mcp_auto_loader_uspto_downloader": os.path.join(
|
|
97
|
+
current_dir, "data", "uspto_downloader_tools.json"
|
|
98
|
+
),
|
|
99
|
+
"uspto": os.path.join(current_dir, "data", "uspto_tools.json"),
|
|
100
|
+
"xml": os.path.join(current_dir, "data", "xml_tools.json"),
|
|
101
|
+
"mcp_auto_loader_boltz": os.path.join(current_dir, "data", "boltz_tools.json"),
|
|
102
|
+
"url": os.path.join(current_dir, "data", "url_fetch_tools.json"),
|
|
103
|
+
# 'langchain': os.path.join(current_dir, 'data', 'langchain_tools.json'),
|
|
104
|
+
"rcsb_pdb": os.path.join(current_dir, "data", "rcsb_pdb_tools.json"),
|
|
105
|
+
"tool_composition": os.path.join(
|
|
106
|
+
current_dir, "data", "tool_composition_tools.json"
|
|
107
|
+
),
|
|
108
|
+
"embedding": os.path.join(current_dir, "data", "embedding_tools.json"),
|
|
109
|
+
"gwas": os.path.join(current_dir, "data", "gwas_tools.json"),
|
|
110
|
+
"admetai": os.path.join(current_dir, "data", "admetai_tools.json"),
|
|
111
|
+
# duplicate key removed
|
|
112
|
+
"alphafold": os.path.join(current_dir, "data", "alphafold_tools.json"),
|
|
113
|
+
"output_summarization": os.path.join(
|
|
114
|
+
current_dir, "data", "output_summarization_tools.json"
|
|
115
|
+
),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_default_hook_config():
|
|
120
|
+
"""
|
|
121
|
+
Get default hook configuration.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
dict: Default hook configuration with basic settings
|
|
125
|
+
"""
|
|
126
|
+
return {
|
|
127
|
+
"global_settings": {
|
|
128
|
+
"default_timeout": 30,
|
|
129
|
+
"max_hook_depth": 3,
|
|
130
|
+
"enable_hook_caching": True,
|
|
131
|
+
"hook_execution_order": "priority_desc",
|
|
132
|
+
},
|
|
133
|
+
"hook_type_defaults": {
|
|
134
|
+
"SummarizationHook": {
|
|
135
|
+
"default_output_length_threshold": 5000,
|
|
136
|
+
"default_chunk_size": 32000,
|
|
137
|
+
"default_focus_areas": "key_findings_and_results",
|
|
138
|
+
"default_max_summary_length": 3000,
|
|
139
|
+
},
|
|
140
|
+
"FileSaveHook": {
|
|
141
|
+
"default_temp_dir": None,
|
|
142
|
+
"default_file_prefix": "tool_output",
|
|
143
|
+
"default_include_metadata": True,
|
|
144
|
+
"default_auto_cleanup": False,
|
|
145
|
+
"default_cleanup_age_hours": 24,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
"hooks": [
|
|
149
|
+
{
|
|
150
|
+
"name": "default_summarization_hook",
|
|
151
|
+
"type": "SummarizationHook",
|
|
152
|
+
"enabled": True,
|
|
153
|
+
"priority": 1,
|
|
154
|
+
"conditions": {"output_length": {"operator": ">", "threshold": 5000}},
|
|
155
|
+
"hook_config": {
|
|
156
|
+
"composer_tool": "OutputSummarizationComposer",
|
|
157
|
+
"chunk_size": 32000,
|
|
158
|
+
"focus_areas": "key_findings_and_results",
|
|
159
|
+
"max_summary_length": 3000,
|
|
160
|
+
},
|
|
161
|
+
}
|
|
162
|
+
],
|
|
163
|
+
"tool_specific_hooks": {},
|
|
164
|
+
"category_hooks": {},
|
|
165
|
+
}
|
tooluniverse/efo_tool.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("EFOTool")
|
|
7
|
+
class EFOTool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Tool to lookup Experimental Factor Ontology (EFO) IDs for diseases via the EMBL-EBI OLS API.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, tool_config, base_url="https://www.ebi.ac.uk/ols4/api/search"):
|
|
13
|
+
super().__init__(tool_config)
|
|
14
|
+
self.base_url = base_url
|
|
15
|
+
|
|
16
|
+
def run(self, arguments):
|
|
17
|
+
disease = arguments.get("disease")
|
|
18
|
+
rows = arguments.get("rows", 1)
|
|
19
|
+
if not disease:
|
|
20
|
+
return {"error": "`disease` parameter is required."}
|
|
21
|
+
return self._search(disease, rows)
|
|
22
|
+
|
|
23
|
+
def _search(self, disease, rows):
|
|
24
|
+
params = {"ontology": "efo", "q": disease, "rows": rows}
|
|
25
|
+
try:
|
|
26
|
+
response = requests.get(self.base_url, params=params, timeout=20)
|
|
27
|
+
response.raise_for_status()
|
|
28
|
+
except requests.RequestException as e:
|
|
29
|
+
return {"error": "OLS API request failed.", "details": str(e)}
|
|
30
|
+
|
|
31
|
+
data = response.json().get("response", {})
|
|
32
|
+
docs = data.get("docs", [])
|
|
33
|
+
if not docs:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
if rows == 1:
|
|
37
|
+
doc = docs[0]
|
|
38
|
+
return {"efo_id": doc.get("short_form"), "name": doc.get("label")}
|
|
39
|
+
|
|
40
|
+
return [
|
|
41
|
+
{"efo_id": doc.get("short_form"), "name": doc.get("label")} for doc in docs
|
|
42
|
+
]
|