tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""OpenSearch field mapping extraction utilities.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to extract field mappings from OpenSearch indices
|
|
4
|
+
and convert them to the format expected by TQL for intelligent field selection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def extract_field_mappings_from_opensearch(
|
|
14
|
+
opensearch_client: Any, index_pattern: str, tql_query: str, tql_instance: Optional[Any] = None
|
|
15
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
16
|
+
"""Extract field mappings from OpenSearch for fields used in a TQL query.
|
|
17
|
+
|
|
18
|
+
This function extracts field mappings from OpenSearch indices and converts them
|
|
19
|
+
to the format expected by TQL. The output format is designed to work seamlessly
|
|
20
|
+
with TQL's field mapping system and intelligent field selection.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
opensearch_client: OpenSearch client instance
|
|
24
|
+
index_pattern: Index pattern to query (e.g., 'logs-*', 'my-index')
|
|
25
|
+
tql_query: TQL query string to extract field names from
|
|
26
|
+
tql_instance: Optional TQL instance for field extraction (will create one if not provided)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Dictionary of field mappings in TQL format. For example:
|
|
30
|
+
{
|
|
31
|
+
"message": {
|
|
32
|
+
"type": "text",
|
|
33
|
+
"fields": {
|
|
34
|
+
"keyword": {"type": "keyword"},
|
|
35
|
+
"lowercase": {"type": "text", "analyzer": "lowercase"}
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"level": {
|
|
39
|
+
"type": "keyword"
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
Exception: If OpenSearch query fails or mappings cannot be retrieved
|
|
45
|
+
"""
|
|
46
|
+
# Import TQL here to avoid circular imports
|
|
47
|
+
if tql_instance is None:
|
|
48
|
+
from .core import TQL
|
|
49
|
+
|
|
50
|
+
tql_instance = TQL()
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
# Extract field names from the TQL query
|
|
54
|
+
field_names = tql_instance.extract_fields(tql_query)
|
|
55
|
+
logger.info(f"Extracted {len(field_names)} fields from TQL query: {field_names}")
|
|
56
|
+
|
|
57
|
+
if not field_names:
|
|
58
|
+
logger.warning("No fields found in TQL query")
|
|
59
|
+
return {}
|
|
60
|
+
|
|
61
|
+
# Get mappings from OpenSearch
|
|
62
|
+
try:
|
|
63
|
+
mapping_response = opensearch_client.indices.get_mapping(index=index_pattern)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"Failed to get mappings from OpenSearch: {e}")
|
|
66
|
+
raise RuntimeError(f"Failed to retrieve mappings from OpenSearch: {e}")
|
|
67
|
+
|
|
68
|
+
# Extract and convert mappings to TQL format
|
|
69
|
+
tql_mappings = _convert_opensearch_mappings_to_tql_format(mapping_response, field_names)
|
|
70
|
+
|
|
71
|
+
logger.info(f"Successfully converted mappings for {len(tql_mappings)} fields")
|
|
72
|
+
return tql_mappings
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error(f"Error extracting field mappings: {e}")
|
|
76
|
+
raise
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _convert_opensearch_mappings_to_tql_format(
|
|
80
|
+
opensearch_mappings: Dict[str, Any], field_names: List[str]
|
|
81
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
82
|
+
"""Convert OpenSearch mapping response to TQL's expected format.
|
|
83
|
+
|
|
84
|
+
This function converts OpenSearch mappings to the format TQL expects,
|
|
85
|
+
which is the same as the OpenSearch format but ensures proper structure.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
opensearch_mappings: Raw OpenSearch mapping response
|
|
89
|
+
field_names: List of field names to extract mappings for
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Dictionary of TQL-format field mappings
|
|
93
|
+
"""
|
|
94
|
+
tql_mappings = {}
|
|
95
|
+
|
|
96
|
+
# Collect all field mappings from all indices
|
|
97
|
+
all_field_mappings: Dict[str, Any] = {}
|
|
98
|
+
|
|
99
|
+
for _index_name, index_info in opensearch_mappings.items():
|
|
100
|
+
if "mappings" in index_info and "properties" in index_info["mappings"]:
|
|
101
|
+
properties = index_info["mappings"]["properties"]
|
|
102
|
+
_extract_field_mappings_recursive(properties, all_field_mappings)
|
|
103
|
+
|
|
104
|
+
# Convert requested fields to TQL format
|
|
105
|
+
for field_name in field_names:
|
|
106
|
+
if field_name in all_field_mappings:
|
|
107
|
+
# Convert the OpenSearch mapping to TQL format
|
|
108
|
+
opensearch_mapping = all_field_mappings[field_name]
|
|
109
|
+
tql_mapping = _convert_opensearch_field_to_tql_format(opensearch_mapping)
|
|
110
|
+
tql_mappings[field_name] = tql_mapping
|
|
111
|
+
else:
|
|
112
|
+
# Field not found in mappings - create a default keyword mapping
|
|
113
|
+
logger.warning(f"Field '{field_name}' not found in OpenSearch mappings, using default keyword type")
|
|
114
|
+
tql_mappings[field_name] = {"type": "keyword"}
|
|
115
|
+
|
|
116
|
+
return tql_mappings
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _convert_opensearch_mappings_to_tql(
|
|
120
|
+
opensearch_mappings: Dict[str, Any], field_names: List[str]
|
|
121
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
122
|
+
"""Convert OpenSearch mapping response to TQL format.
|
|
123
|
+
|
|
124
|
+
DEPRECATED: Use _convert_opensearch_mappings_to_tql_format instead.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
opensearch_mappings: Raw OpenSearch mapping response
|
|
128
|
+
field_names: List of field names to extract mappings for
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Dictionary of TQL-format field mappings
|
|
132
|
+
"""
|
|
133
|
+
return _convert_opensearch_mappings_to_tql_format(opensearch_mappings, field_names)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _extract_field_mappings_recursive(
|
|
137
|
+
properties: Dict[str, Any], all_mappings: Dict[str, Any], prefix: str = ""
|
|
138
|
+
) -> None:
|
|
139
|
+
"""Recursively extract field mappings from OpenSearch properties.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
properties: OpenSearch properties dictionary
|
|
143
|
+
all_mappings: Dictionary to store extracted mappings
|
|
144
|
+
prefix: Field name prefix for nested fields
|
|
145
|
+
"""
|
|
146
|
+
for field_name, field_config in properties.items():
|
|
147
|
+
full_field_name = f"{prefix}.{field_name}" if prefix else field_name
|
|
148
|
+
|
|
149
|
+
if isinstance(field_config, dict):
|
|
150
|
+
all_mappings[full_field_name] = field_config
|
|
151
|
+
|
|
152
|
+
# Recursively process nested properties
|
|
153
|
+
if "properties" in field_config:
|
|
154
|
+
_extract_field_mappings_recursive(field_config["properties"], all_mappings, full_field_name)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _convert_opensearch_field_to_tql_format(opensearch_mapping: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
|
|
158
|
+
"""Convert a single OpenSearch field mapping to TQL's expected format.
|
|
159
|
+
|
|
160
|
+
This function ensures the mapping is in the exact format TQL expects.
|
|
161
|
+
TQL expects the same structure as OpenSearch mappings but with clean formatting.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
opensearch_mapping: OpenSearch mapping for the field
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
TQL-format field mapping
|
|
168
|
+
"""
|
|
169
|
+
# Create a clean TQL mapping structure
|
|
170
|
+
tql_mapping = {}
|
|
171
|
+
|
|
172
|
+
# Copy the type (required)
|
|
173
|
+
if "type" in opensearch_mapping:
|
|
174
|
+
tql_mapping["type"] = opensearch_mapping["type"]
|
|
175
|
+
else:
|
|
176
|
+
# Default to keyword if no type specified
|
|
177
|
+
tql_mapping["type"] = "keyword"
|
|
178
|
+
|
|
179
|
+
# Copy analyzer if present
|
|
180
|
+
if "analyzer" in opensearch_mapping:
|
|
181
|
+
tql_mapping["analyzer"] = opensearch_mapping["analyzer"]
|
|
182
|
+
|
|
183
|
+
# Convert subfields (fields property)
|
|
184
|
+
if "fields" in opensearch_mapping and isinstance(opensearch_mapping["fields"], dict):
|
|
185
|
+
tql_mapping["fields"] = {}
|
|
186
|
+
|
|
187
|
+
for subfield_name, subfield_config in opensearch_mapping["fields"].items():
|
|
188
|
+
if isinstance(subfield_config, dict):
|
|
189
|
+
# Create clean subfield mapping
|
|
190
|
+
clean_subfield = {}
|
|
191
|
+
|
|
192
|
+
# Type is required for subfields
|
|
193
|
+
if "type" in subfield_config:
|
|
194
|
+
clean_subfield["type"] = subfield_config["type"]
|
|
195
|
+
else:
|
|
196
|
+
clean_subfield["type"] = "keyword" # Default
|
|
197
|
+
|
|
198
|
+
# Copy analyzer if present
|
|
199
|
+
if "analyzer" in subfield_config:
|
|
200
|
+
clean_subfield["analyzer"] = subfield_config["analyzer"]
|
|
201
|
+
|
|
202
|
+
# Copy other relevant properties
|
|
203
|
+
for prop in ["normalizer", "search_analyzer", "index", "store", "format"]:
|
|
204
|
+
if prop in subfield_config:
|
|
205
|
+
clean_subfield[prop] = subfield_config[prop]
|
|
206
|
+
|
|
207
|
+
tql_mapping["fields"][subfield_name] = clean_subfield
|
|
208
|
+
|
|
209
|
+
# Copy other relevant top-level properties
|
|
210
|
+
for prop in ["normalizer", "search_analyzer", "index", "store", "format"]:
|
|
211
|
+
if prop in opensearch_mapping:
|
|
212
|
+
tql_mapping[prop] = opensearch_mapping[prop]
|
|
213
|
+
|
|
214
|
+
return tql_mapping
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _convert_field_mapping_to_tql(field_name: str, opensearch_mapping: Dict[str, Any]) -> Dict[str, Any]:
|
|
218
|
+
"""Convert a single OpenSearch field mapping to TQL format.
|
|
219
|
+
|
|
220
|
+
DEPRECATED: Use _convert_opensearch_field_to_tql_format instead.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
field_name: Name of the field
|
|
224
|
+
opensearch_mapping: OpenSearch mapping for the field
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
TQL-format field mapping
|
|
228
|
+
"""
|
|
229
|
+
return _convert_opensearch_field_to_tql_format(opensearch_mapping)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def discover_field_mappings_for_query(
|
|
233
|
+
opensearch_client: Any, index_pattern: str, tql_query: str, include_common_analyzers: bool = True
|
|
234
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
235
|
+
"""Discover and enhance field mappings for a TQL query.
|
|
236
|
+
|
|
237
|
+
This function not only extracts existing mappings but also suggests common
|
|
238
|
+
analyzer variants that might be useful for TQL queries.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
opensearch_client: OpenSearch client instance
|
|
242
|
+
index_pattern: Index pattern to query
|
|
243
|
+
tql_query: TQL query string
|
|
244
|
+
include_common_analyzers: Whether to add common analyzer suggestions
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Enhanced field mappings with common analyzer variants
|
|
248
|
+
"""
|
|
249
|
+
# Get base mappings
|
|
250
|
+
base_mappings = extract_field_mappings_from_opensearch(opensearch_client, index_pattern, tql_query)
|
|
251
|
+
|
|
252
|
+
if not include_common_analyzers:
|
|
253
|
+
return base_mappings
|
|
254
|
+
|
|
255
|
+
# Enhance text fields with common analyzers
|
|
256
|
+
enhanced_mappings = {}
|
|
257
|
+
|
|
258
|
+
for field_name, mapping in base_mappings.items():
|
|
259
|
+
enhanced_mapping = mapping.copy()
|
|
260
|
+
|
|
261
|
+
# For text fields, suggest common analyzer variants
|
|
262
|
+
if mapping.get("type") == "text":
|
|
263
|
+
if "fields" not in enhanced_mapping:
|
|
264
|
+
enhanced_mapping["fields"] = {}
|
|
265
|
+
|
|
266
|
+
# Add keyword field if not present
|
|
267
|
+
if "keyword" not in enhanced_mapping["fields"]:
|
|
268
|
+
enhanced_mapping["fields"]["keyword"] = {"type": "keyword"}
|
|
269
|
+
|
|
270
|
+
# Add common text analyzers if not present
|
|
271
|
+
common_analyzers = {
|
|
272
|
+
"lowercase": {"type": "text", "analyzer": "lowercase"},
|
|
273
|
+
"standard": {"type": "text", "analyzer": "standard"},
|
|
274
|
+
"english": {"type": "text", "analyzer": "english"},
|
|
275
|
+
"whitespace": {"type": "text", "analyzer": "whitespace"},
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
for analyzer_name, analyzer_config in common_analyzers.items():
|
|
279
|
+
if analyzer_name not in enhanced_mapping["fields"]:
|
|
280
|
+
enhanced_mapping["fields"][analyzer_name] = analyzer_config
|
|
281
|
+
|
|
282
|
+
enhanced_mappings[field_name] = enhanced_mapping
|
|
283
|
+
|
|
284
|
+
return enhanced_mappings
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def get_sample_data_from_index(opensearch_client: Any, index_pattern: str, size: int = 10) -> List[Dict[str, Any]]:
|
|
288
|
+
"""Get sample data from an OpenSearch index for testing TQL queries.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
opensearch_client: OpenSearch client instance
|
|
292
|
+
index_pattern: Index pattern to query
|
|
293
|
+
size: Number of sample documents to retrieve
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
List of sample documents
|
|
297
|
+
"""
|
|
298
|
+
try:
|
|
299
|
+
response = opensearch_client.search(index=index_pattern, body={"size": size, "query": {"match_all": {}}})
|
|
300
|
+
|
|
301
|
+
documents = []
|
|
302
|
+
for hit in response.get("hits", {}).get("hits", []):
|
|
303
|
+
documents.append(hit.get("_source", {}))
|
|
304
|
+
|
|
305
|
+
return documents
|
|
306
|
+
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.error(f"Failed to get sample data: {e}")
|
|
309
|
+
return []
|