tooluniverse 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/adverse_event_tools.json +108 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.1.dist-info/METADATA +387 -0
- tooluniverse-1.0.1.dist-info/RECORD +182 -0
- tooluniverse-1.0.1.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/top_level.txt +0 -0
tooluniverse/xml_tool.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
# import xml.etree.ElementTree as ET
|
|
2
|
+
from lxml import etree as ET
|
|
3
|
+
from typing import List, Dict, Any, Optional, Set
|
|
4
|
+
from .base_tool import BaseTool
|
|
5
|
+
from .utils import download_from_hf
|
|
6
|
+
from .tool_registry import register_tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_tool("XMLTool")
|
|
10
|
+
class XMLDatasetTool(BaseTool):
|
|
11
|
+
"""
|
|
12
|
+
Tool to search and filter XML datasets that are organized as a collection of searchable records (e.g., dataset of medical subjects or drug descriptions).
|
|
13
|
+
Supports user-friendly queries without requiring XPath knowledge.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, tool_config: Dict[str, Any]):
|
|
17
|
+
super().__init__(tool_config)
|
|
18
|
+
self.xml_root: Optional[ET.Element] = None
|
|
19
|
+
self.records: List[ET.Element] = []
|
|
20
|
+
self.record_xpath: str = tool_config.get("settings").get("record_xpath", ".//*")
|
|
21
|
+
self.namespaces: Dict[str, str] = tool_config.get("settings").get(
|
|
22
|
+
"namespaces", {}
|
|
23
|
+
)
|
|
24
|
+
self.field_mappings: Dict[str, str] = tool_config.get("settings").get(
|
|
25
|
+
"field_mappings", {}
|
|
26
|
+
) # Dict of fields we're interested in extracting from each record
|
|
27
|
+
self.filter_field: Optional[str] = tool_config.get("settings").get(
|
|
28
|
+
"filter_field"
|
|
29
|
+
) # Field to filter on, if specified
|
|
30
|
+
self.search_fields: List[str] = tool_config.get("settings").get(
|
|
31
|
+
"search_fields", ["_text"] + list(self.field_mappings.keys())
|
|
32
|
+
)
|
|
33
|
+
self._record_cache: List[Dict[str, Any]] = [] # Cache extracted data
|
|
34
|
+
self.temporary_record_fields: Set[str] = set()
|
|
35
|
+
self._load_dataset()
|
|
36
|
+
|
|
37
|
+
def _load_dataset(self) -> None:
|
|
38
|
+
"""Load and parse the XML dataset."""
|
|
39
|
+
try:
|
|
40
|
+
xml_path = self._get_dataset_path()
|
|
41
|
+
if not xml_path:
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
tree = ET.parse(xml_path)
|
|
45
|
+
self.xml_root = tree.getroot()
|
|
46
|
+
self.records = self.xml_root.findall(
|
|
47
|
+
self.record_xpath, namespaces=self.namespaces
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
print(
|
|
51
|
+
f"Loaded XML dataset: {len(self.records)} records from root '{self.xml_root.tag}'"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Error loading XML dataset: {e}")
|
|
56
|
+
self.records = []
|
|
57
|
+
|
|
58
|
+
def _get_dataset_path(self) -> Optional[str]:
|
|
59
|
+
"""Get the path to the XML dataset."""
|
|
60
|
+
if "hf_dataset_path" in self.tool_config["settings"]:
|
|
61
|
+
result = download_from_hf(self.tool_config["settings"])
|
|
62
|
+
if result.get("success"):
|
|
63
|
+
return result["local_path"]
|
|
64
|
+
print(f"Failed to download dataset: {result.get('error')}")
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
if "local_dataset_path" in self.tool_config["settings"]:
|
|
68
|
+
return self.tool_config["settings"]["local_dataset_path"]
|
|
69
|
+
|
|
70
|
+
print("No dataset path provided in tool configuration")
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
def _extract_record_data(self, record_element: ET.Element) -> Dict[str, Any]:
|
|
74
|
+
"""Extract data from a record element with caching."""
|
|
75
|
+
data = {
|
|
76
|
+
"_tag": record_element.tag,
|
|
77
|
+
"_text": (record_element.text or "").strip(),
|
|
78
|
+
"_attributes": dict(record_element.attrib),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
for field_name, xpath_expr in self.field_mappings.items():
|
|
82
|
+
# Extract mapped fields
|
|
83
|
+
if isinstance(xpath_expr, dict) and "parent_path" in xpath_expr:
|
|
84
|
+
# Handle nested structure
|
|
85
|
+
parent_xpath = xpath_expr["parent_path"]
|
|
86
|
+
subfields = xpath_expr.get("subfields", {})
|
|
87
|
+
elements = record_element.findall(
|
|
88
|
+
parent_xpath, namespaces=self.namespaces
|
|
89
|
+
)
|
|
90
|
+
structured_list = []
|
|
91
|
+
for el in elements:
|
|
92
|
+
entry = {}
|
|
93
|
+
for sf_name, sf_path in subfields.items():
|
|
94
|
+
entry[sf_name] = self._extract_field_value(el, sf_path)
|
|
95
|
+
if any(entry.values()): # Only add entries with non-empty values
|
|
96
|
+
structured_list.append(entry)
|
|
97
|
+
|
|
98
|
+
data[field_name] = structured_list
|
|
99
|
+
|
|
100
|
+
# Flatten for search
|
|
101
|
+
for sf_name, _ in subfields.items():
|
|
102
|
+
flat_key = f"{field_name}_{sf_name}"
|
|
103
|
+
|
|
104
|
+
# For efficient search, flatten structured data into a single string
|
|
105
|
+
data[flat_key] = " | ".join(
|
|
106
|
+
entry.get(sf_name, "") for entry in structured_list
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self.temporary_record_fields.add(flat_key)
|
|
110
|
+
else:
|
|
111
|
+
# Regular flat field extraction
|
|
112
|
+
data[field_name] = self._extract_field_value(record_element, xpath_expr)
|
|
113
|
+
|
|
114
|
+
return data
|
|
115
|
+
|
|
116
|
+
def _extract_field_value(self, element: ET.Element, xpath_expr: str) -> str:
|
|
117
|
+
"""Extract field value using XPath expression."""
|
|
118
|
+
try:
|
|
119
|
+
# Handle attribute extraction with /@
|
|
120
|
+
if "/@" in xpath_expr:
|
|
121
|
+
elem_path, attr_name = xpath_expr.rsplit("/@", 1)
|
|
122
|
+
found_elements = element.findall(elem_path, namespaces=self.namespaces)
|
|
123
|
+
if not found_elements:
|
|
124
|
+
return ""
|
|
125
|
+
|
|
126
|
+
# Use generator expression for memory efficiency
|
|
127
|
+
values = (
|
|
128
|
+
el.get(attr_name, "").strip()
|
|
129
|
+
for el in found_elements
|
|
130
|
+
if el.get(attr_name)
|
|
131
|
+
)
|
|
132
|
+
return " | ".join(values)
|
|
133
|
+
|
|
134
|
+
# Handle direct attribute on current element
|
|
135
|
+
if xpath_expr.startswith("@"):
|
|
136
|
+
return element.get(xpath_expr[1:], "").strip()
|
|
137
|
+
|
|
138
|
+
# Handle text content extraction
|
|
139
|
+
found_elements = element.findall(xpath_expr, namespaces=self.namespaces)
|
|
140
|
+
if not found_elements:
|
|
141
|
+
return ""
|
|
142
|
+
|
|
143
|
+
# Use generator expression and filter out empty text
|
|
144
|
+
values = ((elem.text or "").strip() for elem in found_elements)
|
|
145
|
+
non_empty_values = (v for v in values if v)
|
|
146
|
+
return " | ".join(non_empty_values)
|
|
147
|
+
|
|
148
|
+
except Exception:
|
|
149
|
+
return ""
|
|
150
|
+
|
|
151
|
+
def _get_all_records_data(self) -> List[Dict[str, Any]]:
|
|
152
|
+
"""Get all records data with caching."""
|
|
153
|
+
if not self._record_cache:
|
|
154
|
+
self._record_cache = [
|
|
155
|
+
self._extract_record_data(record) for record in self.records
|
|
156
|
+
]
|
|
157
|
+
return self._record_cache
|
|
158
|
+
|
|
159
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
160
|
+
"""Main entry point for the tool."""
|
|
161
|
+
if not self.records:
|
|
162
|
+
return {"error": "XML dataset not loaded or contains no records"}
|
|
163
|
+
|
|
164
|
+
# Route to appropriate function based on arguments
|
|
165
|
+
if "query" in arguments:
|
|
166
|
+
return self._search(arguments)
|
|
167
|
+
elif "condition" in arguments:
|
|
168
|
+
return self._filter(arguments)
|
|
169
|
+
else:
|
|
170
|
+
return {
|
|
171
|
+
"error": "Provide either 'query' for search or 'condition' for filtering"
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
def _search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
175
|
+
"""Search records by text content across multiple fields."""
|
|
176
|
+
query = arguments.get("query", "").strip()
|
|
177
|
+
if not query:
|
|
178
|
+
return {"error": "Query parameter is required"}
|
|
179
|
+
|
|
180
|
+
# Parse search parameters with sensible defaults
|
|
181
|
+
case_sensitive = arguments.get("case_sensitive", False)
|
|
182
|
+
exact_match = arguments.get("exact_match", False)
|
|
183
|
+
limit = min(arguments.get("limit", 50), 1000) # Cap at 1000
|
|
184
|
+
|
|
185
|
+
search_query = query if case_sensitive else query.lower()
|
|
186
|
+
results = []
|
|
187
|
+
|
|
188
|
+
all_records = self._get_all_records_data()
|
|
189
|
+
total_matches = 0
|
|
190
|
+
for record_data in all_records:
|
|
191
|
+
matched_fields = self._find_matches(
|
|
192
|
+
record_data,
|
|
193
|
+
search_query,
|
|
194
|
+
self.search_fields,
|
|
195
|
+
case_sensitive,
|
|
196
|
+
exact_match,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if matched_fields:
|
|
200
|
+
total_matches += 1
|
|
201
|
+
if len(results) < limit:
|
|
202
|
+
result_record = record_data.copy()
|
|
203
|
+
for temp in self.temporary_record_fields:
|
|
204
|
+
result_record.pop(temp, None)
|
|
205
|
+
result_record["matched_fields"] = matched_fields
|
|
206
|
+
results.append(result_record)
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"query": query,
|
|
210
|
+
"total_matches": total_matches,
|
|
211
|
+
"total_returned_results": len(results),
|
|
212
|
+
"results": results,
|
|
213
|
+
"search_parameters": {
|
|
214
|
+
"case_sensitive": case_sensitive,
|
|
215
|
+
"exact_match": exact_match,
|
|
216
|
+
"limit": limit,
|
|
217
|
+
},
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
def _find_matches(
|
|
221
|
+
self,
|
|
222
|
+
record_data: Dict[str, Any],
|
|
223
|
+
search_query: str,
|
|
224
|
+
search_fields: List[str],
|
|
225
|
+
case_sensitive: bool,
|
|
226
|
+
exact_match: bool,
|
|
227
|
+
) -> List[str]:
|
|
228
|
+
"""Find matching fields in a record."""
|
|
229
|
+
matched_fields = []
|
|
230
|
+
|
|
231
|
+
for field in search_fields:
|
|
232
|
+
if field not in record_data:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
field_value = self._get_searchable_value(record_data, field, case_sensitive)
|
|
236
|
+
|
|
237
|
+
if self._is_match(field_value, search_query, exact_match):
|
|
238
|
+
matched_fields.append(field)
|
|
239
|
+
|
|
240
|
+
return matched_fields
|
|
241
|
+
|
|
242
|
+
def _get_searchable_value(
|
|
243
|
+
self, record_data: Dict[str, Any], field: str, case_sensitive: bool
|
|
244
|
+
) -> str:
|
|
245
|
+
"""Get searchable string value for a field."""
|
|
246
|
+
if field == "_attributes":
|
|
247
|
+
value = " ".join(record_data["_attributes"].values())
|
|
248
|
+
else:
|
|
249
|
+
value = str(record_data.get(field, ""))
|
|
250
|
+
|
|
251
|
+
return value if case_sensitive else value.lower()
|
|
252
|
+
|
|
253
|
+
def _is_match(self, field_value: str, search_query: str, exact_match: bool) -> bool:
|
|
254
|
+
"""Check if field value matches search query."""
|
|
255
|
+
if exact_match:
|
|
256
|
+
if "|" in field_value: # Handle multiple values
|
|
257
|
+
return search_query in [v.strip() for v in field_value.split("|")]
|
|
258
|
+
return search_query == field_value.strip()
|
|
259
|
+
|
|
260
|
+
return search_query in field_value
|
|
261
|
+
|
|
262
|
+
def _filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
263
|
+
"""Filter records based on field criteria."""
|
|
264
|
+
field = self.filter_field
|
|
265
|
+
condition = arguments.get("condition")
|
|
266
|
+
value = arguments.get("value", "")
|
|
267
|
+
limit = min(arguments.get("limit", 100), 1000) # Cap at 1000
|
|
268
|
+
|
|
269
|
+
if not field or not condition:
|
|
270
|
+
return {"error": "Both 'field' and 'condition' are required"}
|
|
271
|
+
|
|
272
|
+
# Validate condition requirements
|
|
273
|
+
if condition not in ["not_empty", "has_attribute"] and not value:
|
|
274
|
+
return {"error": f"'value' parameter required for condition '{condition}'"}
|
|
275
|
+
|
|
276
|
+
all_records = self._get_all_records_data()
|
|
277
|
+
|
|
278
|
+
# Check if field exists
|
|
279
|
+
if all_records and field not in all_records[0]:
|
|
280
|
+
available_fields = sorted(all_records[0].keys())
|
|
281
|
+
return {
|
|
282
|
+
"error": f"Field '{field}' not found. Available: {available_fields}"
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
filtered_records = []
|
|
286
|
+
filter_func = self._get_filter_function(condition, value)
|
|
287
|
+
|
|
288
|
+
if not filter_func:
|
|
289
|
+
return {
|
|
290
|
+
"error": f"Unknown condition '{condition}'. Supported: contains, starts_with, ends_with, exact, not_empty, has_attribute"
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
total_matches = 0
|
|
294
|
+
for record_data in all_records:
|
|
295
|
+
if field in record_data and filter_func(record_data, field):
|
|
296
|
+
total_matches += 1
|
|
297
|
+
if len(filtered_records) < limit:
|
|
298
|
+
result_record = record_data.copy()
|
|
299
|
+
for temp in self.temporary_record_fields:
|
|
300
|
+
result_record.pop(temp, None)
|
|
301
|
+
filtered_records.append(result_record)
|
|
302
|
+
|
|
303
|
+
return {
|
|
304
|
+
"total_matches": total_matches,
|
|
305
|
+
"total_returned_results": len(filtered_records),
|
|
306
|
+
"results": filtered_records,
|
|
307
|
+
"applied_filter": self._get_filter_description(field, condition, value),
|
|
308
|
+
"filter_parameters": {
|
|
309
|
+
"field": field,
|
|
310
|
+
"condition": condition,
|
|
311
|
+
"value": (
|
|
312
|
+
value if condition not in ["not_empty", "has_attribute"] else None
|
|
313
|
+
),
|
|
314
|
+
"limit": limit,
|
|
315
|
+
},
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
def _get_filter_function(self, condition: str, value: str):
|
|
319
|
+
"""Get the appropriate filter function for the condition."""
|
|
320
|
+
filter_functions = {
|
|
321
|
+
"contains": lambda data, field: value.lower() in str(data[field]).lower(),
|
|
322
|
+
"starts_with": lambda data, field: str(data[field])
|
|
323
|
+
.lower()
|
|
324
|
+
.startswith(value.lower()),
|
|
325
|
+
"ends_with": lambda data, field: str(data[field])
|
|
326
|
+
.lower()
|
|
327
|
+
.endswith(value.lower()),
|
|
328
|
+
"exact": lambda data, field: str(data[field]).lower() == value.lower(),
|
|
329
|
+
"not_empty": lambda data, field: str(data[field]).strip() != "",
|
|
330
|
+
"has_attribute": lambda data, field: field == "_attributes"
|
|
331
|
+
and value in data["_attributes"],
|
|
332
|
+
}
|
|
333
|
+
return filter_functions.get(condition)
|
|
334
|
+
|
|
335
|
+
def _get_filter_description(self, field: str, condition: str, value: str) -> str:
|
|
336
|
+
"""Get human-readable filter description."""
|
|
337
|
+
descriptions = {
|
|
338
|
+
"contains": f"{field} contains '{value}'",
|
|
339
|
+
"starts_with": f"{field} starts with '{value}'",
|
|
340
|
+
"ends_with": f"{field} ends with '{value}'",
|
|
341
|
+
"exact": f"{field} equals '{value}'",
|
|
342
|
+
"not_empty": f"{field} is not empty",
|
|
343
|
+
"has_attribute": f"has attribute '{value}'",
|
|
344
|
+
}
|
|
345
|
+
return descriptions.get(condition, f"{field} {condition} {value}")
|
|
346
|
+
|
|
347
|
+
def get_dataset_info(self) -> Dict[str, Any]:
|
|
348
|
+
"""Get comprehensive information about the loaded XML dataset."""
|
|
349
|
+
if not self.records:
|
|
350
|
+
return {"error": "XML dataset not loaded or contains no records"}
|
|
351
|
+
|
|
352
|
+
# Get field information from sample records
|
|
353
|
+
sample_data = self._get_all_records_data()[:5]
|
|
354
|
+
all_fields = set()
|
|
355
|
+
for record_data in sample_data:
|
|
356
|
+
all_fields.update(record_data.keys())
|
|
357
|
+
|
|
358
|
+
info = {
|
|
359
|
+
"total_records": len(self.records),
|
|
360
|
+
"root_element": self.xml_root.tag if self.xml_root else None,
|
|
361
|
+
"record_xpath": self.record_xpath,
|
|
362
|
+
"field_mappings": self.field_mappings,
|
|
363
|
+
"available_fields": sorted(all_fields),
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
if sample_data:
|
|
367
|
+
info["sample_record"] = sample_data[0]
|
|
368
|
+
|
|
369
|
+
return info
|