tooluniverse 0.1.4__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (187) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +544 -168
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +82 -58
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +1 -1
  183. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  184. tooluniverse-0.1.4.dist-info/METADATA +0 -141
  185. tooluniverse-0.1.4.dist-info/RECORD +0 -18
  186. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  187. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,369 @@
1
+ # import xml.etree.ElementTree as ET
2
+ from lxml import etree as ET
3
+ from typing import List, Dict, Any, Optional, Set
4
+ from .base_tool import BaseTool
5
+ from .utils import download_from_hf
6
+ from .tool_registry import register_tool
7
+
8
+
9
+ @register_tool("XMLTool")
10
+ class XMLDatasetTool(BaseTool):
11
+ """
12
+ Tool to search and filter XML datasets that are organized as a collection of searchable records (e.g., dataset of medical subjects or drug descriptions).
13
+ Supports user-friendly queries without requiring XPath knowledge.
14
+ """
15
+
16
+ def __init__(self, tool_config: Dict[str, Any]):
17
+ super().__init__(tool_config)
18
+ self.xml_root: Optional[ET.Element] = None
19
+ self.records: List[ET.Element] = []
20
+ self.record_xpath: str = tool_config.get("settings").get("record_xpath", ".//*")
21
+ self.namespaces: Dict[str, str] = tool_config.get("settings").get(
22
+ "namespaces", {}
23
+ )
24
+ self.field_mappings: Dict[str, str] = tool_config.get("settings").get(
25
+ "field_mappings", {}
26
+ ) # Dict of fields we're interested in extracting from each record
27
+ self.filter_field: Optional[str] = tool_config.get("settings").get(
28
+ "filter_field"
29
+ ) # Field to filter on, if specified
30
+ self.search_fields: List[str] = tool_config.get("settings").get(
31
+ "search_fields", ["_text"] + list(self.field_mappings.keys())
32
+ )
33
+ self._record_cache: List[Dict[str, Any]] = [] # Cache extracted data
34
+ self.temporary_record_fields: Set[str] = set()
35
+ self._load_dataset()
36
+
37
+ def _load_dataset(self) -> None:
38
+ """Load and parse the XML dataset."""
39
+ try:
40
+ xml_path = self._get_dataset_path()
41
+ if not xml_path:
42
+ return
43
+
44
+ tree = ET.parse(xml_path)
45
+ self.xml_root = tree.getroot()
46
+ self.records = self.xml_root.findall(
47
+ self.record_xpath, namespaces=self.namespaces
48
+ )
49
+
50
+ print(
51
+ f"Loaded XML dataset: {len(self.records)} records from root '{self.xml_root.tag}'"
52
+ )
53
+
54
+ except Exception as e:
55
+ print(f"Error loading XML dataset: {e}")
56
+ self.records = []
57
+
58
+ def _get_dataset_path(self) -> Optional[str]:
59
+ """Get the path to the XML dataset."""
60
+ if "hf_dataset_path" in self.tool_config["settings"]:
61
+ result = download_from_hf(self.tool_config["settings"])
62
+ if result.get("success"):
63
+ return result["local_path"]
64
+ print(f"Failed to download dataset: {result.get('error')}")
65
+ return None
66
+
67
+ if "local_dataset_path" in self.tool_config["settings"]:
68
+ return self.tool_config["settings"]["local_dataset_path"]
69
+
70
+ print("No dataset path provided in tool configuration")
71
+ return None
72
+
73
+ def _extract_record_data(self, record_element: ET.Element) -> Dict[str, Any]:
74
+ """Extract data from a record element with caching."""
75
+ data = {
76
+ "_tag": record_element.tag,
77
+ "_text": (record_element.text or "").strip(),
78
+ "_attributes": dict(record_element.attrib),
79
+ }
80
+
81
+ for field_name, xpath_expr in self.field_mappings.items():
82
+ # Extract mapped fields
83
+ if isinstance(xpath_expr, dict) and "parent_path" in xpath_expr:
84
+ # Handle nested structure
85
+ parent_xpath = xpath_expr["parent_path"]
86
+ subfields = xpath_expr.get("subfields", {})
87
+ elements = record_element.findall(
88
+ parent_xpath, namespaces=self.namespaces
89
+ )
90
+ structured_list = []
91
+ for el in elements:
92
+ entry = {}
93
+ for sf_name, sf_path in subfields.items():
94
+ entry[sf_name] = self._extract_field_value(el, sf_path)
95
+ if any(entry.values()): # Only add entries with non-empty values
96
+ structured_list.append(entry)
97
+
98
+ data[field_name] = structured_list
99
+
100
+ # Flatten for search
101
+ for sf_name, _ in subfields.items():
102
+ flat_key = f"{field_name}_{sf_name}"
103
+
104
+ # For efficient search, flatten structured data into a single string
105
+ data[flat_key] = " | ".join(
106
+ entry.get(sf_name, "") for entry in structured_list
107
+ )
108
+
109
+ self.temporary_record_fields.add(flat_key)
110
+ else:
111
+ # Regular flat field extraction
112
+ data[field_name] = self._extract_field_value(record_element, xpath_expr)
113
+
114
+ return data
115
+
116
+ def _extract_field_value(self, element: ET.Element, xpath_expr: str) -> str:
117
+ """Extract field value using XPath expression."""
118
+ try:
119
+ # Handle attribute extraction with /@
120
+ if "/@" in xpath_expr:
121
+ elem_path, attr_name = xpath_expr.rsplit("/@", 1)
122
+ found_elements = element.findall(elem_path, namespaces=self.namespaces)
123
+ if not found_elements:
124
+ return ""
125
+
126
+ # Use generator expression for memory efficiency
127
+ values = (
128
+ el.get(attr_name, "").strip()
129
+ for el in found_elements
130
+ if el.get(attr_name)
131
+ )
132
+ return " | ".join(values)
133
+
134
+ # Handle direct attribute on current element
135
+ if xpath_expr.startswith("@"):
136
+ return element.get(xpath_expr[1:], "").strip()
137
+
138
+ # Handle text content extraction
139
+ found_elements = element.findall(xpath_expr, namespaces=self.namespaces)
140
+ if not found_elements:
141
+ return ""
142
+
143
+ # Use generator expression and filter out empty text
144
+ values = ((elem.text or "").strip() for elem in found_elements)
145
+ non_empty_values = (v for v in values if v)
146
+ return " | ".join(non_empty_values)
147
+
148
+ except Exception:
149
+ return ""
150
+
151
+ def _get_all_records_data(self) -> List[Dict[str, Any]]:
152
+ """Get all records data with caching."""
153
+ if not self._record_cache:
154
+ self._record_cache = [
155
+ self._extract_record_data(record) for record in self.records
156
+ ]
157
+ return self._record_cache
158
+
159
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
160
+ """Main entry point for the tool."""
161
+ if not self.records:
162
+ return {"error": "XML dataset not loaded or contains no records"}
163
+
164
+ # Route to appropriate function based on arguments
165
+ if "query" in arguments:
166
+ return self._search(arguments)
167
+ elif "condition" in arguments:
168
+ return self._filter(arguments)
169
+ else:
170
+ return {
171
+ "error": "Provide either 'query' for search or 'condition' for filtering"
172
+ }
173
+
174
+ def _search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
175
+ """Search records by text content across multiple fields."""
176
+ query = arguments.get("query", "").strip()
177
+ if not query:
178
+ return {"error": "Query parameter is required"}
179
+
180
+ # Parse search parameters with sensible defaults
181
+ case_sensitive = arguments.get("case_sensitive", False)
182
+ exact_match = arguments.get("exact_match", False)
183
+ limit = min(arguments.get("limit", 50), 1000) # Cap at 1000
184
+
185
+ search_query = query if case_sensitive else query.lower()
186
+ results = []
187
+
188
+ all_records = self._get_all_records_data()
189
+ total_matches = 0
190
+ for record_data in all_records:
191
+ matched_fields = self._find_matches(
192
+ record_data,
193
+ search_query,
194
+ self.search_fields,
195
+ case_sensitive,
196
+ exact_match,
197
+ )
198
+
199
+ if matched_fields:
200
+ total_matches += 1
201
+ if len(results) < limit:
202
+ result_record = record_data.copy()
203
+ for temp in self.temporary_record_fields:
204
+ result_record.pop(temp, None)
205
+ result_record["matched_fields"] = matched_fields
206
+ results.append(result_record)
207
+
208
+ return {
209
+ "query": query,
210
+ "total_matches": total_matches,
211
+ "total_returned_results": len(results),
212
+ "results": results,
213
+ "search_parameters": {
214
+ "case_sensitive": case_sensitive,
215
+ "exact_match": exact_match,
216
+ "limit": limit,
217
+ },
218
+ }
219
+
220
+ def _find_matches(
221
+ self,
222
+ record_data: Dict[str, Any],
223
+ search_query: str,
224
+ search_fields: List[str],
225
+ case_sensitive: bool,
226
+ exact_match: bool,
227
+ ) -> List[str]:
228
+ """Find matching fields in a record."""
229
+ matched_fields = []
230
+
231
+ for field in search_fields:
232
+ if field not in record_data:
233
+ continue
234
+
235
+ field_value = self._get_searchable_value(record_data, field, case_sensitive)
236
+
237
+ if self._is_match(field_value, search_query, exact_match):
238
+ matched_fields.append(field)
239
+
240
+ return matched_fields
241
+
242
+ def _get_searchable_value(
243
+ self, record_data: Dict[str, Any], field: str, case_sensitive: bool
244
+ ) -> str:
245
+ """Get searchable string value for a field."""
246
+ if field == "_attributes":
247
+ value = " ".join(record_data["_attributes"].values())
248
+ else:
249
+ value = str(record_data.get(field, ""))
250
+
251
+ return value if case_sensitive else value.lower()
252
+
253
+ def _is_match(self, field_value: str, search_query: str, exact_match: bool) -> bool:
254
+ """Check if field value matches search query."""
255
+ if exact_match:
256
+ if "|" in field_value: # Handle multiple values
257
+ return search_query in [v.strip() for v in field_value.split("|")]
258
+ return search_query == field_value.strip()
259
+
260
+ return search_query in field_value
261
+
262
+ def _filter(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
263
+ """Filter records based on field criteria."""
264
+ field = self.filter_field
265
+ condition = arguments.get("condition")
266
+ value = arguments.get("value", "")
267
+ limit = min(arguments.get("limit", 100), 1000) # Cap at 1000
268
+
269
+ if not field or not condition:
270
+ return {"error": "Both 'field' and 'condition' are required"}
271
+
272
+ # Validate condition requirements
273
+ if condition not in ["not_empty", "has_attribute"] and not value:
274
+ return {"error": f"'value' parameter required for condition '{condition}'"}
275
+
276
+ all_records = self._get_all_records_data()
277
+
278
+ # Check if field exists
279
+ if all_records and field not in all_records[0]:
280
+ available_fields = sorted(all_records[0].keys())
281
+ return {
282
+ "error": f"Field '{field}' not found. Available: {available_fields}"
283
+ }
284
+
285
+ filtered_records = []
286
+ filter_func = self._get_filter_function(condition, value)
287
+
288
+ if not filter_func:
289
+ return {
290
+ "error": f"Unknown condition '{condition}'. Supported: contains, starts_with, ends_with, exact, not_empty, has_attribute"
291
+ }
292
+
293
+ total_matches = 0
294
+ for record_data in all_records:
295
+ if field in record_data and filter_func(record_data, field):
296
+ total_matches += 1
297
+ if len(filtered_records) < limit:
298
+ result_record = record_data.copy()
299
+ for temp in self.temporary_record_fields:
300
+ result_record.pop(temp, None)
301
+ filtered_records.append(result_record)
302
+
303
+ return {
304
+ "total_matches": total_matches,
305
+ "total_returned_results": len(filtered_records),
306
+ "results": filtered_records,
307
+ "applied_filter": self._get_filter_description(field, condition, value),
308
+ "filter_parameters": {
309
+ "field": field,
310
+ "condition": condition,
311
+ "value": (
312
+ value if condition not in ["not_empty", "has_attribute"] else None
313
+ ),
314
+ "limit": limit,
315
+ },
316
+ }
317
+
318
+ def _get_filter_function(self, condition: str, value: str):
319
+ """Get the appropriate filter function for the condition."""
320
+ filter_functions = {
321
+ "contains": lambda data, field: value.lower() in str(data[field]).lower(),
322
+ "starts_with": lambda data, field: str(data[field])
323
+ .lower()
324
+ .startswith(value.lower()),
325
+ "ends_with": lambda data, field: str(data[field])
326
+ .lower()
327
+ .endswith(value.lower()),
328
+ "exact": lambda data, field: str(data[field]).lower() == value.lower(),
329
+ "not_empty": lambda data, field: str(data[field]).strip() != "",
330
+ "has_attribute": lambda data, field: field == "_attributes"
331
+ and value in data["_attributes"],
332
+ }
333
+ return filter_functions.get(condition)
334
+
335
+ def _get_filter_description(self, field: str, condition: str, value: str) -> str:
336
+ """Get human-readable filter description."""
337
+ descriptions = {
338
+ "contains": f"{field} contains '{value}'",
339
+ "starts_with": f"{field} starts with '{value}'",
340
+ "ends_with": f"{field} ends with '{value}'",
341
+ "exact": f"{field} equals '{value}'",
342
+ "not_empty": f"{field} is not empty",
343
+ "has_attribute": f"has attribute '{value}'",
344
+ }
345
+ return descriptions.get(condition, f"{field} {condition} {value}")
346
+
347
+ def get_dataset_info(self) -> Dict[str, Any]:
348
+ """Get comprehensive information about the loaded XML dataset."""
349
+ if not self.records:
350
+ return {"error": "XML dataset not loaded or contains no records"}
351
+
352
+ # Get field information from sample records
353
+ sample_data = self._get_all_records_data()[:5]
354
+ all_fields = set()
355
+ for record_data in sample_data:
356
+ all_fields.update(record_data.keys())
357
+
358
+ info = {
359
+ "total_records": len(self.records),
360
+ "root_element": self.xml_root.tag if self.xml_root else None,
361
+ "record_xpath": self.record_xpath,
362
+ "field_mappings": self.field_mappings,
363
+ "available_fields": sorted(all_fields),
364
+ }
365
+
366
+ if sample_data:
367
+ info["sample_record"] = sample_data[0]
368
+
369
+ return info