tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,309 @@
1
+ """OpenSearch field mapping extraction utilities.
2
+
3
+ This module provides utilities to extract field mappings from OpenSearch indices
4
+ and convert them to the format expected by TQL for intelligent field selection.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def extract_field_mappings_from_opensearch(
14
+ opensearch_client: Any, index_pattern: str, tql_query: str, tql_instance: Optional[Any] = None
15
+ ) -> Dict[str, Dict[str, Any]]:
16
+ """Extract field mappings from OpenSearch for fields used in a TQL query.
17
+
18
+ This function extracts field mappings from OpenSearch indices and converts them
19
+ to the format expected by TQL. The output format is designed to work seamlessly
20
+ with TQL's field mapping system and intelligent field selection.
21
+
22
+ Args:
23
+ opensearch_client: OpenSearch client instance
24
+ index_pattern: Index pattern to query (e.g., 'logs-*', 'my-index')
25
+ tql_query: TQL query string to extract field names from
26
+ tql_instance: Optional TQL instance for field extraction (will create one if not provided)
27
+
28
+ Returns:
29
+ Dictionary of field mappings in TQL format. For example:
30
+ {
31
+ "message": {
32
+ "type": "text",
33
+ "fields": {
34
+ "keyword": {"type": "keyword"},
35
+ "lowercase": {"type": "text", "analyzer": "lowercase"}
36
+ }
37
+ },
38
+ "level": {
39
+ "type": "keyword"
40
+ }
41
+ }
42
+
43
+ Raises:
44
+ Exception: If OpenSearch query fails or mappings cannot be retrieved
45
+ """
46
+ # Import TQL here to avoid circular imports
47
+ if tql_instance is None:
48
+ from .core import TQL
49
+
50
+ tql_instance = TQL()
51
+
52
+ try:
53
+ # Extract field names from the TQL query
54
+ field_names = tql_instance.extract_fields(tql_query)
55
+ logger.info(f"Extracted {len(field_names)} fields from TQL query: {field_names}")
56
+
57
+ if not field_names:
58
+ logger.warning("No fields found in TQL query")
59
+ return {}
60
+
61
+ # Get mappings from OpenSearch
62
+ try:
63
+ mapping_response = opensearch_client.indices.get_mapping(index=index_pattern)
64
+ except Exception as e:
65
+ logger.error(f"Failed to get mappings from OpenSearch: {e}")
66
+ raise RuntimeError(f"Failed to retrieve mappings from OpenSearch: {e}")
67
+
68
+ # Extract and convert mappings to TQL format
69
+ tql_mappings = _convert_opensearch_mappings_to_tql_format(mapping_response, field_names)
70
+
71
+ logger.info(f"Successfully converted mappings for {len(tql_mappings)} fields")
72
+ return tql_mappings
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error extracting field mappings: {e}")
76
+ raise
77
+
78
+
79
+ def _convert_opensearch_mappings_to_tql_format(
80
+ opensearch_mappings: Dict[str, Any], field_names: List[str]
81
+ ) -> Dict[str, Dict[str, Any]]:
82
+ """Convert OpenSearch mapping response to TQL's expected format.
83
+
84
+ This function converts OpenSearch mappings to the format TQL expects,
85
+ which is the same as the OpenSearch format but ensures proper structure.
86
+
87
+ Args:
88
+ opensearch_mappings: Raw OpenSearch mapping response
89
+ field_names: List of field names to extract mappings for
90
+
91
+ Returns:
92
+ Dictionary of TQL-format field mappings
93
+ """
94
+ tql_mappings = {}
95
+
96
+ # Collect all field mappings from all indices
97
+ all_field_mappings: Dict[str, Any] = {}
98
+
99
+ for _index_name, index_info in opensearch_mappings.items():
100
+ if "mappings" in index_info and "properties" in index_info["mappings"]:
101
+ properties = index_info["mappings"]["properties"]
102
+ _extract_field_mappings_recursive(properties, all_field_mappings)
103
+
104
+ # Convert requested fields to TQL format
105
+ for field_name in field_names:
106
+ if field_name in all_field_mappings:
107
+ # Convert the OpenSearch mapping to TQL format
108
+ opensearch_mapping = all_field_mappings[field_name]
109
+ tql_mapping = _convert_opensearch_field_to_tql_format(opensearch_mapping)
110
+ tql_mappings[field_name] = tql_mapping
111
+ else:
112
+ # Field not found in mappings - create a default keyword mapping
113
+ logger.warning(f"Field '{field_name}' not found in OpenSearch mappings, using default keyword type")
114
+ tql_mappings[field_name] = {"type": "keyword"}
115
+
116
+ return tql_mappings
117
+
118
+
119
+ def _convert_opensearch_mappings_to_tql(
120
+ opensearch_mappings: Dict[str, Any], field_names: List[str]
121
+ ) -> Dict[str, Dict[str, Any]]:
122
+ """Convert OpenSearch mapping response to TQL format.
123
+
124
+ DEPRECATED: Use _convert_opensearch_mappings_to_tql_format instead.
125
+
126
+ Args:
127
+ opensearch_mappings: Raw OpenSearch mapping response
128
+ field_names: List of field names to extract mappings for
129
+
130
+ Returns:
131
+ Dictionary of TQL-format field mappings
132
+ """
133
+ return _convert_opensearch_mappings_to_tql_format(opensearch_mappings, field_names)
134
+
135
+
136
+ def _extract_field_mappings_recursive(
137
+ properties: Dict[str, Any], all_mappings: Dict[str, Any], prefix: str = ""
138
+ ) -> None:
139
+ """Recursively extract field mappings from OpenSearch properties.
140
+
141
+ Args:
142
+ properties: OpenSearch properties dictionary
143
+ all_mappings: Dictionary to store extracted mappings
144
+ prefix: Field name prefix for nested fields
145
+ """
146
+ for field_name, field_config in properties.items():
147
+ full_field_name = f"{prefix}.{field_name}" if prefix else field_name
148
+
149
+ if isinstance(field_config, dict):
150
+ all_mappings[full_field_name] = field_config
151
+
152
+ # Recursively process nested properties
153
+ if "properties" in field_config:
154
+ _extract_field_mappings_recursive(field_config["properties"], all_mappings, full_field_name)
155
+
156
+
157
+ def _convert_opensearch_field_to_tql_format(opensearch_mapping: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
158
+ """Convert a single OpenSearch field mapping to TQL's expected format.
159
+
160
+ This function ensures the mapping is in the exact format TQL expects.
161
+ TQL expects the same structure as OpenSearch mappings but with clean formatting.
162
+
163
+ Args:
164
+ opensearch_mapping: OpenSearch mapping for the field
165
+
166
+ Returns:
167
+ TQL-format field mapping
168
+ """
169
+ # Create a clean TQL mapping structure
170
+ tql_mapping = {}
171
+
172
+ # Copy the type (required)
173
+ if "type" in opensearch_mapping:
174
+ tql_mapping["type"] = opensearch_mapping["type"]
175
+ else:
176
+ # Default to keyword if no type specified
177
+ tql_mapping["type"] = "keyword"
178
+
179
+ # Copy analyzer if present
180
+ if "analyzer" in opensearch_mapping:
181
+ tql_mapping["analyzer"] = opensearch_mapping["analyzer"]
182
+
183
+ # Convert subfields (fields property)
184
+ if "fields" in opensearch_mapping and isinstance(opensearch_mapping["fields"], dict):
185
+ tql_mapping["fields"] = {}
186
+
187
+ for subfield_name, subfield_config in opensearch_mapping["fields"].items():
188
+ if isinstance(subfield_config, dict):
189
+ # Create clean subfield mapping
190
+ clean_subfield = {}
191
+
192
+ # Type is required for subfields
193
+ if "type" in subfield_config:
194
+ clean_subfield["type"] = subfield_config["type"]
195
+ else:
196
+ clean_subfield["type"] = "keyword" # Default
197
+
198
+ # Copy analyzer if present
199
+ if "analyzer" in subfield_config:
200
+ clean_subfield["analyzer"] = subfield_config["analyzer"]
201
+
202
+ # Copy other relevant properties
203
+ for prop in ["normalizer", "search_analyzer", "index", "store", "format"]:
204
+ if prop in subfield_config:
205
+ clean_subfield[prop] = subfield_config[prop]
206
+
207
+ tql_mapping["fields"][subfield_name] = clean_subfield
208
+
209
+ # Copy other relevant top-level properties
210
+ for prop in ["normalizer", "search_analyzer", "index", "store", "format"]:
211
+ if prop in opensearch_mapping:
212
+ tql_mapping[prop] = opensearch_mapping[prop]
213
+
214
+ return tql_mapping
215
+
216
+
217
+ def _convert_field_mapping_to_tql(field_name: str, opensearch_mapping: Dict[str, Any]) -> Dict[str, Any]:
218
+ """Convert a single OpenSearch field mapping to TQL format.
219
+
220
+ DEPRECATED: Use _convert_opensearch_field_to_tql_format instead.
221
+
222
+ Args:
223
+ field_name: Name of the field
224
+ opensearch_mapping: OpenSearch mapping for the field
225
+
226
+ Returns:
227
+ TQL-format field mapping
228
+ """
229
+ return _convert_opensearch_field_to_tql_format(opensearch_mapping)
230
+
231
+
232
+ def discover_field_mappings_for_query(
233
+ opensearch_client: Any, index_pattern: str, tql_query: str, include_common_analyzers: bool = True
234
+ ) -> Dict[str, Dict[str, Any]]:
235
+ """Discover and enhance field mappings for a TQL query.
236
+
237
+ This function not only extracts existing mappings but also suggests common
238
+ analyzer variants that might be useful for TQL queries.
239
+
240
+ Args:
241
+ opensearch_client: OpenSearch client instance
242
+ index_pattern: Index pattern to query
243
+ tql_query: TQL query string
244
+ include_common_analyzers: Whether to add common analyzer suggestions
245
+
246
+ Returns:
247
+ Enhanced field mappings with common analyzer variants
248
+ """
249
+ # Get base mappings
250
+ base_mappings = extract_field_mappings_from_opensearch(opensearch_client, index_pattern, tql_query)
251
+
252
+ if not include_common_analyzers:
253
+ return base_mappings
254
+
255
+ # Enhance text fields with common analyzers
256
+ enhanced_mappings = {}
257
+
258
+ for field_name, mapping in base_mappings.items():
259
+ enhanced_mapping = mapping.copy()
260
+
261
+ # For text fields, suggest common analyzer variants
262
+ if mapping.get("type") == "text":
263
+ if "fields" not in enhanced_mapping:
264
+ enhanced_mapping["fields"] = {}
265
+
266
+ # Add keyword field if not present
267
+ if "keyword" not in enhanced_mapping["fields"]:
268
+ enhanced_mapping["fields"]["keyword"] = {"type": "keyword"}
269
+
270
+ # Add common text analyzers if not present
271
+ common_analyzers = {
272
+ "lowercase": {"type": "text", "analyzer": "lowercase"},
273
+ "standard": {"type": "text", "analyzer": "standard"},
274
+ "english": {"type": "text", "analyzer": "english"},
275
+ "whitespace": {"type": "text", "analyzer": "whitespace"},
276
+ }
277
+
278
+ for analyzer_name, analyzer_config in common_analyzers.items():
279
+ if analyzer_name not in enhanced_mapping["fields"]:
280
+ enhanced_mapping["fields"][analyzer_name] = analyzer_config
281
+
282
+ enhanced_mappings[field_name] = enhanced_mapping
283
+
284
+ return enhanced_mappings
285
+
286
+
287
+ def get_sample_data_from_index(opensearch_client: Any, index_pattern: str, size: int = 10) -> List[Dict[str, Any]]:
288
+ """Get sample data from an OpenSearch index for testing TQL queries.
289
+
290
+ Args:
291
+ opensearch_client: OpenSearch client instance
292
+ index_pattern: Index pattern to query
293
+ size: Number of sample documents to retrieve
294
+
295
+ Returns:
296
+ List of sample documents
297
+ """
298
+ try:
299
+ response = opensearch_client.search(index=index_pattern, body={"size": size, "query": {"match_all": {}}})
300
+
301
+ documents = []
302
+ for hit in response.get("hits", {}).get("hits", []):
303
+ documents.append(hit.get("_source", {}))
304
+
305
+ return documents
306
+
307
+ except Exception as e:
308
+ logger.error(f"Failed to get sample data: {e}")
309
+ return []