tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,775 @@
|
|
|
1
|
+
"""Query conversion logic for OpenSearch backend.
|
|
2
|
+
|
|
3
|
+
This module handles the conversion of TQL AST nodes to OpenSearch Query DSL.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from ..exceptions import TQLUnsupportedOperationError, TQLValidationError
|
|
9
|
+
from .field_mapping import FieldMapping
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class QueryConverter:
|
|
13
|
+
"""Handles conversion of TQL AST to OpenSearch Query DSL."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, field_mappings: Dict[str, FieldMapping], simple_mappings: Dict[str, str]):
|
|
16
|
+
"""Initialize query converter.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
field_mappings: Intelligent field mappings
|
|
20
|
+
simple_mappings: Simple field name mappings
|
|
21
|
+
"""
|
|
22
|
+
self.intelligent_mappings = field_mappings
|
|
23
|
+
self.simple_mappings = simple_mappings
|
|
24
|
+
|
|
25
|
+
def convert_node(self, node: Any) -> Dict[str, Any]:
|
|
26
|
+
"""Convert a single AST node to OpenSearch query fragment."""
|
|
27
|
+
if isinstance(node, dict):
|
|
28
|
+
node_type = node.get("type")
|
|
29
|
+
|
|
30
|
+
if node_type == "comparison":
|
|
31
|
+
return self._convert_comparison(node)
|
|
32
|
+
elif node_type == "logical_op":
|
|
33
|
+
return self._convert_logical_op(node)
|
|
34
|
+
elif node_type == "unary_op":
|
|
35
|
+
return self._convert_unary_op(node)
|
|
36
|
+
elif node_type == "collection_op":
|
|
37
|
+
return self._convert_collection_op(node)
|
|
38
|
+
elif node_type == "geo_expr":
|
|
39
|
+
return self._convert_geo_expr(node)
|
|
40
|
+
elif node_type == "nslookup_expr":
|
|
41
|
+
return self._convert_nslookup_expr(node)
|
|
42
|
+
|
|
43
|
+
raise TQLValidationError(f"Unknown node type: {node}")
|
|
44
|
+
|
|
45
|
+
def _get_effective_field_type(self, field_name: str, mutators: List[Dict[str, Any]]) -> Optional[str]:
|
|
46
|
+
"""Determine the effective field type after applying mutators.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
field_name: Original field name
|
|
50
|
+
mutators: List of mutators applied to the field
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
The effective field type after mutator transformations, or None if unchanged
|
|
54
|
+
"""
|
|
55
|
+
if not mutators:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
# Define mutators that change field types
|
|
59
|
+
type_changing_mutators = {
|
|
60
|
+
"length": "integer", # Returns integer count
|
|
61
|
+
"avg": "float", # Returns float average
|
|
62
|
+
"average": "float", # Alias for avg
|
|
63
|
+
"sum": "float", # Returns numeric sum
|
|
64
|
+
"max": "float", # Returns maximum value
|
|
65
|
+
"min": "float", # Returns minimum value
|
|
66
|
+
"any": "boolean", # Returns boolean
|
|
67
|
+
"all": "boolean", # Returns boolean
|
|
68
|
+
"is_private": "boolean", # Returns boolean
|
|
69
|
+
"is_global": "boolean", # Returns boolean
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Check mutators from left to right to find final type
|
|
73
|
+
for mutator in mutators:
|
|
74
|
+
mutator_name = mutator.get("name", "").lower()
|
|
75
|
+
if mutator_name in type_changing_mutators:
|
|
76
|
+
return type_changing_mutators[mutator_name]
|
|
77
|
+
elif mutator_name == "split":
|
|
78
|
+
# Split converts to array, but we need to know what comes after
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def _convert_comparison(self, node: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
|
|
84
|
+
"""Convert a comparison operation to OpenSearch query."""
|
|
85
|
+
field_name = node["field"]
|
|
86
|
+
operator = node["operator"]
|
|
87
|
+
value = node["value"]
|
|
88
|
+
field_mutators = node.get("field_mutators", [])
|
|
89
|
+
|
|
90
|
+
# Check if mutators change the field type
|
|
91
|
+
effective_field_type = self._get_effective_field_type(field_name, field_mutators)
|
|
92
|
+
|
|
93
|
+
# Check if field has mutators that will be post-processed
|
|
94
|
+
has_post_process_mutators = bool(field_mutators)
|
|
95
|
+
|
|
96
|
+
# Check if node has type-changing mutators (marked by mutator analyzer)
|
|
97
|
+
has_type_changing_mutators = node.get("has_type_changing_mutators", False)
|
|
98
|
+
|
|
99
|
+
# Check for intelligent mappings and validate type compatibility
|
|
100
|
+
if (
|
|
101
|
+
field_name in self.intelligent_mappings
|
|
102
|
+
and effective_field_type is None
|
|
103
|
+
and not has_post_process_mutators
|
|
104
|
+
and not has_type_changing_mutators
|
|
105
|
+
):
|
|
106
|
+
# Only validate original field type if no type-changing mutators and no post-processing
|
|
107
|
+
mapping = self.intelligent_mappings[field_name]
|
|
108
|
+
# This will raise TQLTypeError if incompatible
|
|
109
|
+
mapping.validate_operator_for_field_type(operator)
|
|
110
|
+
|
|
111
|
+
# Get the actual field name to use (could be enhanced to extract analyzer from query context)
|
|
112
|
+
# For type-changing mutators, bypass field resolution since the field type doesn't matter
|
|
113
|
+
if has_type_changing_mutators:
|
|
114
|
+
# Just use the field name as-is since it will be post-processed
|
|
115
|
+
opensearch_field = field_name
|
|
116
|
+
use_wildcard = False
|
|
117
|
+
else:
|
|
118
|
+
opensearch_field, use_wildcard = self._resolve_field_name(field_name, operator)
|
|
119
|
+
|
|
120
|
+
# Convert value types for OpenSearch
|
|
121
|
+
value = self._convert_value(value)
|
|
122
|
+
|
|
123
|
+
# Check if this comparison requires post-processing due to value mutators or type-changing mutators
|
|
124
|
+
# Note: ALL and NOT_ALL operators are handled with script queries and don't need post-processing
|
|
125
|
+
requires_post_processing = node.get("post_process_value", False) or has_type_changing_mutators
|
|
126
|
+
|
|
127
|
+
if requires_post_processing:
|
|
128
|
+
# For operations that require post-processing, we need to query more broadly
|
|
129
|
+
# to ensure we get all potentially matching documents
|
|
130
|
+
if operator in [
|
|
131
|
+
"eq",
|
|
132
|
+
"=",
|
|
133
|
+
"ne",
|
|
134
|
+
"!=",
|
|
135
|
+
"contains",
|
|
136
|
+
"not_contains",
|
|
137
|
+
"startswith",
|
|
138
|
+
"endswith",
|
|
139
|
+
"not_startswith",
|
|
140
|
+
"not_endswith",
|
|
141
|
+
">",
|
|
142
|
+
">=",
|
|
143
|
+
"<",
|
|
144
|
+
"<=",
|
|
145
|
+
"gt",
|
|
146
|
+
"gte",
|
|
147
|
+
"lt",
|
|
148
|
+
"lte",
|
|
149
|
+
"between",
|
|
150
|
+
"not_between",
|
|
151
|
+
]:
|
|
152
|
+
# For these operators, use exists query to get all docs with the field
|
|
153
|
+
# The actual filtering will happen in post-processing
|
|
154
|
+
return {"exists": {"field": opensearch_field}}
|
|
155
|
+
|
|
156
|
+
# Handle special wildcard conversion for keyword fields
|
|
157
|
+
if use_wildcard and operator == "contains":
|
|
158
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
159
|
+
|
|
160
|
+
# Convert operator to OpenSearch query
|
|
161
|
+
if operator in ["eq", "="]:
|
|
162
|
+
# Check if we're using a text field
|
|
163
|
+
is_text_field = self._is_text_field(field_name, opensearch_field)
|
|
164
|
+
|
|
165
|
+
# Use match query for text fields, term for others
|
|
166
|
+
if is_text_field:
|
|
167
|
+
return {"match": {opensearch_field: value}}
|
|
168
|
+
else:
|
|
169
|
+
return {"term": {opensearch_field: value}}
|
|
170
|
+
elif operator in ["ne", "!="]:
|
|
171
|
+
# Check if we're using a text field
|
|
172
|
+
is_text_field = self._is_text_field(field_name, opensearch_field)
|
|
173
|
+
|
|
174
|
+
# Use match query for text fields, term for others
|
|
175
|
+
if is_text_field:
|
|
176
|
+
return {"bool": {"must_not": {"match": {opensearch_field: value}}}}
|
|
177
|
+
else:
|
|
178
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
179
|
+
elif operator in ["gt", ">"]:
|
|
180
|
+
return {"range": {opensearch_field: {"gt": value}}}
|
|
181
|
+
elif operator in ["gte", ">="]:
|
|
182
|
+
return {"range": {opensearch_field: {"gte": value}}}
|
|
183
|
+
elif operator in ["lt", "<"]:
|
|
184
|
+
return {"range": {opensearch_field: {"lt": value}}}
|
|
185
|
+
elif operator in ["lte", "<="]:
|
|
186
|
+
return {"range": {opensearch_field: {"lte": value}}}
|
|
187
|
+
elif operator == "contains":
|
|
188
|
+
# Unwrap single-element lists for string operators
|
|
189
|
+
if isinstance(value, list) and len(value) == 1:
|
|
190
|
+
value = value[0]
|
|
191
|
+
if use_wildcard:
|
|
192
|
+
# Keyword field needs wildcard conversion
|
|
193
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
194
|
+
else:
|
|
195
|
+
# For unmapped fields or when we have a text field, decide based on context
|
|
196
|
+
# If we have intelligent mapping and selected a text field, use match
|
|
197
|
+
# Otherwise default to wildcard for broader compatibility
|
|
198
|
+
if field_name in self.intelligent_mappings:
|
|
199
|
+
# Use match query for text fields in intelligent mappings
|
|
200
|
+
return {"match": {opensearch_field: value}}
|
|
201
|
+
else:
|
|
202
|
+
# Default to wildcard for unmapped fields
|
|
203
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
204
|
+
elif operator == "startswith":
|
|
205
|
+
# Unwrap single-element lists for string operators
|
|
206
|
+
if isinstance(value, list) and len(value) == 1:
|
|
207
|
+
value = value[0]
|
|
208
|
+
# For text fields, use wildcard query as prefix doesn't work well with analyzed text
|
|
209
|
+
if field_name in self.intelligent_mappings:
|
|
210
|
+
field_mapping = self.intelligent_mappings[field_name]
|
|
211
|
+
if isinstance(field_mapping, FieldMapping):
|
|
212
|
+
# Check if we're using a text field
|
|
213
|
+
selected_field = field_mapping.get_field_for_operator(operator)
|
|
214
|
+
if selected_field in field_mapping.text_fields.values():
|
|
215
|
+
# Use wildcard for analyzed text fields with lowercase value
|
|
216
|
+
# Text analyzers typically lowercase the text
|
|
217
|
+
return {"wildcard": {opensearch_field: f"{value.lower()}*"}}
|
|
218
|
+
return {"prefix": {opensearch_field: value}}
|
|
219
|
+
elif operator == "endswith":
|
|
220
|
+
# Unwrap single-element lists for string operators
|
|
221
|
+
if isinstance(value, list) and len(value) == 1:
|
|
222
|
+
value = value[0]
|
|
223
|
+
# For text fields, lowercase the value as text analyzers typically lowercase
|
|
224
|
+
if field_name in self.intelligent_mappings:
|
|
225
|
+
field_mapping = self.intelligent_mappings[field_name]
|
|
226
|
+
if isinstance(field_mapping, FieldMapping):
|
|
227
|
+
selected_field = field_mapping.get_field_for_operator(operator)
|
|
228
|
+
if selected_field in field_mapping.text_fields.values():
|
|
229
|
+
return {"wildcard": {opensearch_field: f"*{value.lower()}"}}
|
|
230
|
+
return {"wildcard": {opensearch_field: f"*{value}"}}
|
|
231
|
+
elif operator == "in":
|
|
232
|
+
if isinstance(value, list):
|
|
233
|
+
return {"terms": {opensearch_field: value}}
|
|
234
|
+
else:
|
|
235
|
+
return {"term": {opensearch_field: value}}
|
|
236
|
+
elif operator == "regexp":
|
|
237
|
+
# Unwrap single-element lists for string operators
|
|
238
|
+
if isinstance(value, list) and len(value) == 1:
|
|
239
|
+
value = value[0]
|
|
240
|
+
return {"regexp": {opensearch_field: value}}
|
|
241
|
+
elif operator == "exists":
|
|
242
|
+
return {"exists": {"field": opensearch_field}}
|
|
243
|
+
elif operator == "is":
|
|
244
|
+
if value is None:
|
|
245
|
+
return {"bool": {"must_not": {"exists": {"field": opensearch_field}}}}
|
|
246
|
+
else:
|
|
247
|
+
return {"term": {opensearch_field: value}}
|
|
248
|
+
elif operator == "between":
|
|
249
|
+
if isinstance(value, list) and len(value) == 2:
|
|
250
|
+
# Convert values to appropriate types
|
|
251
|
+
val1 = self._convert_value(value[0])
|
|
252
|
+
val2 = self._convert_value(value[1])
|
|
253
|
+
|
|
254
|
+
# Allow values in any order
|
|
255
|
+
lower = (
|
|
256
|
+
min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
|
|
257
|
+
)
|
|
258
|
+
upper = (
|
|
259
|
+
max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# For non-numeric values (like dates), we use the original order if we can't determine min/max
|
|
263
|
+
if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
|
|
264
|
+
try:
|
|
265
|
+
# If values can be compared (like strings), try to determine order
|
|
266
|
+
if val1 > val2:
|
|
267
|
+
lower, upper = val2, val1
|
|
268
|
+
else:
|
|
269
|
+
lower, upper = val1, val2
|
|
270
|
+
except TypeError:
|
|
271
|
+
# If comparison fails, use the original order
|
|
272
|
+
lower, upper = value[0], value[1]
|
|
273
|
+
|
|
274
|
+
return {"range": {opensearch_field: {"gte": lower, "lte": upper}}}
|
|
275
|
+
else:
|
|
276
|
+
raise TQLValidationError(f"Between operator requires a list with two values, got: {value}")
|
|
277
|
+
elif operator == "cidr":
|
|
278
|
+
# Unwrap single-element lists for CIDR
|
|
279
|
+
if isinstance(value, list) and len(value) == 1:
|
|
280
|
+
value = value[0]
|
|
281
|
+
# OpenSearch uses special syntax for CIDR queries on IP fields
|
|
282
|
+
# Format: field: "192.168.0.0/24"
|
|
283
|
+
return {"term": {opensearch_field: value}}
|
|
284
|
+
# Negated operators
|
|
285
|
+
elif operator == "not_in":
|
|
286
|
+
if isinstance(value, list):
|
|
287
|
+
return {"bool": {"must_not": {"terms": {opensearch_field: value}}}}
|
|
288
|
+
else:
|
|
289
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
290
|
+
elif operator == "not_contains":
|
|
291
|
+
# Unwrap single-element lists for string operators
|
|
292
|
+
if isinstance(value, list) and len(value) == 1:
|
|
293
|
+
value = value[0]
|
|
294
|
+
if use_wildcard:
|
|
295
|
+
return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
|
|
296
|
+
else:
|
|
297
|
+
if field_name in self.intelligent_mappings:
|
|
298
|
+
return {"bool": {"must_not": {"match": {opensearch_field: value}}}}
|
|
299
|
+
else:
|
|
300
|
+
return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
|
|
301
|
+
elif operator == "not_startswith":
|
|
302
|
+
# Unwrap single-element lists for string operators
|
|
303
|
+
if isinstance(value, list) and len(value) == 1:
|
|
304
|
+
value = value[0]
|
|
305
|
+
return {"bool": {"must_not": {"prefix": {opensearch_field: value}}}}
|
|
306
|
+
elif operator == "not_endswith":
|
|
307
|
+
# Unwrap single-element lists for string operators
|
|
308
|
+
if isinstance(value, list) and len(value) == 1:
|
|
309
|
+
value = value[0]
|
|
310
|
+
return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
|
|
311
|
+
elif operator == "not_regexp":
|
|
312
|
+
# Unwrap single-element lists for string operators
|
|
313
|
+
if isinstance(value, list) and len(value) == 1:
|
|
314
|
+
value = value[0]
|
|
315
|
+
return {"bool": {"must_not": {"regexp": {opensearch_field: value}}}}
|
|
316
|
+
elif operator == "not_exists":
|
|
317
|
+
return {"bool": {"must_not": {"exists": {"field": opensearch_field}}}}
|
|
318
|
+
elif operator == "not_between":
|
|
319
|
+
if isinstance(value, list) and len(value) == 2:
|
|
320
|
+
val1 = self._convert_value(value[0])
|
|
321
|
+
val2 = self._convert_value(value[1])
|
|
322
|
+
lower = (
|
|
323
|
+
min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
|
|
324
|
+
)
|
|
325
|
+
upper = (
|
|
326
|
+
max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
|
|
327
|
+
)
|
|
328
|
+
if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
|
|
329
|
+
try:
|
|
330
|
+
if val1 > val2:
|
|
331
|
+
lower, upper = val2, val1
|
|
332
|
+
else:
|
|
333
|
+
lower, upper = val1, val2
|
|
334
|
+
except TypeError:
|
|
335
|
+
lower, upper = value[0], value[1]
|
|
336
|
+
return {"bool": {"must_not": {"range": {opensearch_field: {"gte": lower, "lte": upper}}}}}
|
|
337
|
+
else:
|
|
338
|
+
raise TQLValidationError(f"Not between operator requires a list with two values, got: {value}")
|
|
339
|
+
elif operator == "not_cidr":
|
|
340
|
+
# Unwrap single-element lists for CIDR
|
|
341
|
+
if isinstance(value, list) and len(value) == 1:
|
|
342
|
+
value = value[0]
|
|
343
|
+
# Negated CIDR query
|
|
344
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
345
|
+
elif operator == "is_not":
|
|
346
|
+
if value is None:
|
|
347
|
+
return {"exists": {"field": opensearch_field}}
|
|
348
|
+
else:
|
|
349
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
350
|
+
elif operator == "any":
|
|
351
|
+
# ANY operator - matches if any element equals the value (default OpenSearch behavior)
|
|
352
|
+
# Works for both single values and arrays
|
|
353
|
+
# Handle case where value might be wrapped in a list from parsing
|
|
354
|
+
if isinstance(value, list) and len(value) == 1:
|
|
355
|
+
value = value[0]
|
|
356
|
+
return {"term": {opensearch_field: value}}
|
|
357
|
+
elif operator == "all":
|
|
358
|
+
# ALL operator - for arrays, all elements must match
|
|
359
|
+
# OpenSearch doesn't have a native "all elements must equal X" query
|
|
360
|
+
# We can use a script query to check this
|
|
361
|
+
return {
|
|
362
|
+
"script": {
|
|
363
|
+
"script": {
|
|
364
|
+
"source": """
|
|
365
|
+
if (!doc.containsKey(params.field) || doc[params.field].size() == 0) {
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
for (value in doc[params.field]) {
|
|
369
|
+
if (value != params.value) {
|
|
370
|
+
return false;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
return true;
|
|
374
|
+
""",
|
|
375
|
+
"params": {"field": opensearch_field, "value": value},
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
elif operator == "not_any":
|
|
380
|
+
# NOT ANY - no element should match
|
|
381
|
+
# Handle case where value might be wrapped in a list from parsing
|
|
382
|
+
if isinstance(value, list) and len(value) == 1:
|
|
383
|
+
value = value[0]
|
|
384
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
385
|
+
elif operator == "not_all":
|
|
386
|
+
# NOT ALL - not all elements equal the value
|
|
387
|
+
# This means: field doesn't exist OR array is empty OR at least one element is different
|
|
388
|
+
# Handle case where value might be wrapped in a list from parsing
|
|
389
|
+
if isinstance(value, list) and len(value) == 1:
|
|
390
|
+
value = value[0]
|
|
391
|
+
return {
|
|
392
|
+
"script": {
|
|
393
|
+
"script": {
|
|
394
|
+
"source": """
|
|
395
|
+
// Check if field exists in the document mapping
|
|
396
|
+
if (!doc.containsKey(params.field)) {
|
|
397
|
+
// Field doesn't exist, so NOT ALL is true
|
|
398
|
+
return true;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Get field values
|
|
402
|
+
def values = doc[params.field];
|
|
403
|
+
|
|
404
|
+
// Empty array means not all elements are the value (vacuously true)
|
|
405
|
+
if (values.size() == 0) {
|
|
406
|
+
return true;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Check if all elements match
|
|
410
|
+
for (value in values) {
|
|
411
|
+
if (value != params.value) {
|
|
412
|
+
// Found an element that doesn't match
|
|
413
|
+
return true;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// All elements match, so NOT all is false
|
|
418
|
+
return false;
|
|
419
|
+
""",
|
|
420
|
+
"params": {"field": opensearch_field, "value": value},
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
else:
|
|
425
|
+
raise TQLUnsupportedOperationError(f"Operator '{operator}' not supported for OpenSearch")
|
|
426
|
+
|
|
427
|
+
def _convert_logical_op(self, node: Dict[str, Any]) -> Dict[str, Any]:
|
|
428
|
+
"""Convert a logical operation to OpenSearch query."""
|
|
429
|
+
operator = node["operator"]
|
|
430
|
+
left_query = self.convert_node(node["left"])
|
|
431
|
+
right_query = self.convert_node(node["right"])
|
|
432
|
+
|
|
433
|
+
if operator == "and":
|
|
434
|
+
return {"bool": {"must": [left_query, right_query]}}
|
|
435
|
+
elif operator == "or":
|
|
436
|
+
return {"bool": {"should": [left_query, right_query], "minimum_should_match": 1}}
|
|
437
|
+
else:
|
|
438
|
+
raise TQLUnsupportedOperationError(f"Logical operator '{operator}' not supported for OpenSearch")
|
|
439
|
+
|
|
440
|
+
def _convert_unary_op(self, node: Dict[str, Any]) -> Dict[str, Any]:
|
|
441
|
+
"""Convert a unary operation to OpenSearch query."""
|
|
442
|
+
operator = node["operator"]
|
|
443
|
+
|
|
444
|
+
if operator == "not":
|
|
445
|
+
operand = node["operand"]
|
|
446
|
+
|
|
447
|
+
# Optimize double negation: NOT (NOT X) -> X
|
|
448
|
+
if isinstance(operand, dict) and operand.get("type") == "unary_op" and operand.get("operator") == "not":
|
|
449
|
+
return self.convert_node(operand["operand"])
|
|
450
|
+
|
|
451
|
+
# Optimize negated operators: NOT (field not_in [values]) -> field in [values]
|
|
452
|
+
if isinstance(operand, dict) and operand.get("type") == "comparison":
|
|
453
|
+
op = operand.get("operator")
|
|
454
|
+
if op == "not_in":
|
|
455
|
+
# Convert NOT (field not_in values) to (field in values)
|
|
456
|
+
field = operand["field"]
|
|
457
|
+
value = operand["value"]
|
|
458
|
+
opensearch_field, _ = self._resolve_field_name(field, "in")
|
|
459
|
+
if isinstance(value, list):
|
|
460
|
+
return {"terms": {opensearch_field: value}}
|
|
461
|
+
else:
|
|
462
|
+
return {"term": {opensearch_field: value}}
|
|
463
|
+
elif op == "not_contains":
|
|
464
|
+
# Convert NOT (field not_contains value) to (field contains value)
|
|
465
|
+
field = operand["field"]
|
|
466
|
+
value = operand["value"]
|
|
467
|
+
# Unwrap single-element lists for string operators
|
|
468
|
+
if isinstance(value, list) and len(value) == 1:
|
|
469
|
+
value = value[0]
|
|
470
|
+
opensearch_field, use_wildcard = self._resolve_field_name(field, "contains")
|
|
471
|
+
if use_wildcard:
|
|
472
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
473
|
+
else:
|
|
474
|
+
if field in self.intelligent_mappings:
|
|
475
|
+
return {"match": {opensearch_field: value}}
|
|
476
|
+
else:
|
|
477
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
478
|
+
# Add more optimizations for other negated operators as needed
|
|
479
|
+
|
|
480
|
+
operand_query = self.convert_node(operand)
|
|
481
|
+
return {"bool": {"must_not": operand_query}}
|
|
482
|
+
else:
|
|
483
|
+
raise TQLUnsupportedOperationError(f"Unary operator '{operator}' not supported for OpenSearch")
|
|
484
|
+
|
|
485
|
+
def _convert_collection_op(self, node: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
|
|
486
|
+
"""Convert a collection operation (ANY/ALL) to OpenSearch query."""
|
|
487
|
+
operator = node["operator"]
|
|
488
|
+
field_name = node["field"]
|
|
489
|
+
comparison_operator = node["comparison_operator"]
|
|
490
|
+
value = node["value"]
|
|
491
|
+
|
|
492
|
+
# Get the mapped field name
|
|
493
|
+
opensearch_field, _ = self._resolve_field_name(field_name, comparison_operator)
|
|
494
|
+
|
|
495
|
+
# For OpenSearch, we're essentially doing a nested query or terms lookup
|
|
496
|
+
# This would ideally use the nested query type, but we'll create a simplified version
|
|
497
|
+
# that works for basic array fields
|
|
498
|
+
|
|
499
|
+
if operator == "any":
|
|
500
|
+
# ANY operator is like checking if any array element matches
|
|
501
|
+
# For basic equality/comparison, we can use a term/terms query directly
|
|
502
|
+
if comparison_operator in ["eq", "="]:
|
|
503
|
+
return {"term": {opensearch_field: value}}
|
|
504
|
+
elif comparison_operator in ["ne", "!="]:
|
|
505
|
+
return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
506
|
+
elif comparison_operator in ["in"]:
|
|
507
|
+
if isinstance(value, list):
|
|
508
|
+
return {"terms": {opensearch_field: value}}
|
|
509
|
+
else:
|
|
510
|
+
return {"term": {opensearch_field: value}}
|
|
511
|
+
# For other comparisons, we create a range query
|
|
512
|
+
elif comparison_operator in ["gt", ">"]:
|
|
513
|
+
return {"range": {opensearch_field: {"gt": value}}}
|
|
514
|
+
elif comparison_operator in ["gte", ">="]:
|
|
515
|
+
return {"range": {opensearch_field: {"gte": value}}}
|
|
516
|
+
elif comparison_operator in ["lt", "<"]:
|
|
517
|
+
return {"range": {opensearch_field: {"lt": value}}}
|
|
518
|
+
elif comparison_operator in ["lte", "<="]:
|
|
519
|
+
return {"range": {opensearch_field: {"lte": value}}}
|
|
520
|
+
# For string operations, we use the appropriate query type
|
|
521
|
+
elif comparison_operator == "contains":
|
|
522
|
+
return {"wildcard": {opensearch_field: f"*{value}*"}}
|
|
523
|
+
elif comparison_operator == "startswith":
|
|
524
|
+
return {"prefix": {opensearch_field: value}}
|
|
525
|
+
elif comparison_operator == "endswith":
|
|
526
|
+
return {"wildcard": {opensearch_field: f"*{value}"}}
|
|
527
|
+
elif comparison_operator == "regexp":
|
|
528
|
+
return {"regexp": {opensearch_field: value}}
|
|
529
|
+
else:
|
|
530
|
+
raise TQLUnsupportedOperationError(
|
|
531
|
+
f"Operator '{comparison_operator}' not supported for ANY collection operator in OpenSearch"
|
|
532
|
+
)
|
|
533
|
+
elif operator == "all":
|
|
534
|
+
# ALL operator is more complex as we need to ensure all elements match
|
|
535
|
+
# We'll use a must_not exists approach with a filter for elements that don't match
|
|
536
|
+
|
|
537
|
+
# Create the negated condition
|
|
538
|
+
if comparison_operator in ["eq", "="]:
|
|
539
|
+
negated_condition = {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
540
|
+
elif comparison_operator in ["ne", "!="]:
|
|
541
|
+
negated_condition = {"term": {opensearch_field: value}}
|
|
542
|
+
elif comparison_operator in ["in"]:
|
|
543
|
+
if isinstance(value, list):
|
|
544
|
+
negated_condition = {"bool": {"must_not": {"terms": {opensearch_field: value}}}}
|
|
545
|
+
else:
|
|
546
|
+
negated_condition = {"bool": {"must_not": {"term": {opensearch_field: value}}}}
|
|
547
|
+
elif comparison_operator in ["gt", ">"]:
|
|
548
|
+
negated_condition = {"range": {opensearch_field: {"lte": value}}}
|
|
549
|
+
elif comparison_operator in ["gte", ">="]:
|
|
550
|
+
negated_condition = {"range": {opensearch_field: {"lt": value}}}
|
|
551
|
+
elif comparison_operator in ["lt", "<"]:
|
|
552
|
+
negated_condition = {"range": {opensearch_field: {"gte": value}}}
|
|
553
|
+
elif comparison_operator in ["lte", "<="]:
|
|
554
|
+
negated_condition = {"range": {opensearch_field: {"gt": value}}}
|
|
555
|
+
elif comparison_operator == "contains":
|
|
556
|
+
negated_condition = {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
|
|
557
|
+
elif comparison_operator == "startswith":
|
|
558
|
+
negated_condition = {"bool": {"must_not": {"prefix": {opensearch_field: value}}}}
|
|
559
|
+
elif comparison_operator == "endswith":
|
|
560
|
+
negated_condition = {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
|
|
561
|
+
elif comparison_operator == "regexp":
|
|
562
|
+
negated_condition = {"bool": {"must_not": {"regexp": {opensearch_field: value}}}}
|
|
563
|
+
else:
|
|
564
|
+
raise TQLUnsupportedOperationError(
|
|
565
|
+
f"Operator '{comparison_operator}' not supported for ALL collection operator in OpenSearch"
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# For ALL to be true, there must not be any elements that don't match the condition
|
|
569
|
+
return {"bool": {"must_not": negated_condition}}
|
|
570
|
+
else:
|
|
571
|
+
raise TQLUnsupportedOperationError(f"Collection operator '{operator}' not supported for OpenSearch")
|
|
572
|
+
|
|
573
|
+
def _has_filtering_conditions(self, node: Any) -> bool:
|
|
574
|
+
"""Check if an AST node contains actual filtering conditions.
|
|
575
|
+
|
|
576
|
+
Args:
|
|
577
|
+
node: AST node to check
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
True if the node contains filtering conditions, False otherwise
|
|
581
|
+
"""
|
|
582
|
+
if not isinstance(node, dict):
|
|
583
|
+
return False
|
|
584
|
+
|
|
585
|
+
node_type = node.get("type")
|
|
586
|
+
|
|
587
|
+
if node_type == "comparison":
|
|
588
|
+
# All comparisons are filtering conditions
|
|
589
|
+
return True
|
|
590
|
+
elif node_type == "logical_op":
|
|
591
|
+
# Check both sides of logical operation
|
|
592
|
+
left_has = self._has_filtering_conditions(node.get("left"))
|
|
593
|
+
right_has = self._has_filtering_conditions(node.get("right"))
|
|
594
|
+
return left_has or right_has
|
|
595
|
+
elif node_type == "unary_op":
|
|
596
|
+
# Check the operand
|
|
597
|
+
return self._has_filtering_conditions(node.get("operand"))
|
|
598
|
+
elif node_type == "collection_op":
|
|
599
|
+
# Collection operations are filtering conditions
|
|
600
|
+
return True
|
|
601
|
+
elif node_type == "geo_expr":
|
|
602
|
+
# Check nested geo conditions
|
|
603
|
+
return self._has_filtering_conditions(node.get("conditions"))
|
|
604
|
+
elif node_type == "nslookup_expr":
|
|
605
|
+
# Check nested nslookup conditions
|
|
606
|
+
return self._has_filtering_conditions(node.get("conditions"))
|
|
607
|
+
|
|
608
|
+
return False
|
|
609
|
+
|
|
610
|
+
def _convert_geo_expr(self, node: Dict[str, Any]) -> Dict[str, Any]:
|
|
611
|
+
"""Convert a geo expression to OpenSearch query.
|
|
612
|
+
|
|
613
|
+
Note: Geo expressions require post-processing since the geoip_lookup
|
|
614
|
+
must be applied to results after they return from OpenSearch.
|
|
615
|
+
|
|
616
|
+
The OpenSearch query depends on whether there are geo conditions:
|
|
617
|
+
- If there are geo conditions, we need an exists query on the IP field
|
|
618
|
+
(since we can only apply geo filters to IPs that exist)
|
|
619
|
+
- If there are no conditions (just enrichment), we return match_all
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
node: Geo expression AST node
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
OpenSearch query
|
|
626
|
+
"""
|
|
627
|
+
field_name = node["field"]
|
|
628
|
+
conditions = node.get("conditions")
|
|
629
|
+
|
|
630
|
+
# Check if there are actual filtering conditions
|
|
631
|
+
if conditions and self._has_filtering_conditions(conditions):
|
|
632
|
+
# We have geo conditions that will filter results, so we need exists query
|
|
633
|
+
# Try to resolve the field name, but if it fails, use the original
|
|
634
|
+
try:
|
|
635
|
+
opensearch_field, _ = self._resolve_field_name(field_name, "exists")
|
|
636
|
+
except TQLUnsupportedOperationError:
|
|
637
|
+
# Field might not have mappings or exists might not be supported
|
|
638
|
+
# Use the original field name
|
|
639
|
+
opensearch_field = field_name
|
|
640
|
+
|
|
641
|
+
return {"exists": {"field": opensearch_field}}
|
|
642
|
+
else:
|
|
643
|
+
# No filtering conditions, just enrichment - match all documents
|
|
644
|
+
return {"match_all": {}}
|
|
645
|
+
|
|
646
|
+
def _convert_nslookup_expr(self, node: Dict[str, Any]) -> Dict[str, Any]:
|
|
647
|
+
"""Convert an nslookup expression to OpenSearch query.
|
|
648
|
+
|
|
649
|
+
Note: NSLookup expressions require post-processing since the DNS lookup
|
|
650
|
+
must be applied to results after they return from OpenSearch.
|
|
651
|
+
|
|
652
|
+
The OpenSearch query depends on whether there are DNS conditions:
|
|
653
|
+
- If there are DNS conditions, we need an exists query on the field
|
|
654
|
+
(since we can only apply DNS filters to fields that exist)
|
|
655
|
+
- If there are no conditions (just enrichment), we return match_all
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
node: NSLookup expression AST node
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
OpenSearch query
|
|
662
|
+
"""
|
|
663
|
+
field_name = node["field"]
|
|
664
|
+
conditions = node.get("conditions")
|
|
665
|
+
|
|
666
|
+
# Check if there are actual filtering conditions
|
|
667
|
+
if conditions and self._has_filtering_conditions(conditions):
|
|
668
|
+
# We have DNS conditions that will filter results, so we need exists query
|
|
669
|
+
# Try to resolve the field name, but if it fails, use the original
|
|
670
|
+
try:
|
|
671
|
+
opensearch_field, _ = self._resolve_field_name(field_name, "exists")
|
|
672
|
+
except TQLUnsupportedOperationError:
|
|
673
|
+
# Field might not have mappings or exists might not be supported
|
|
674
|
+
# Use the original field name
|
|
675
|
+
opensearch_field = field_name
|
|
676
|
+
|
|
677
|
+
return {"exists": {"field": opensearch_field}}
|
|
678
|
+
else:
|
|
679
|
+
# No filtering conditions, just enrichment - match all documents
|
|
680
|
+
return {"match_all": {}}
|
|
681
|
+
|
|
682
|
+
def _resolve_field_name(
|
|
683
|
+
self, field_name: str, operator: str, preferred_analyzer: Optional[str] = None
|
|
684
|
+
) -> tuple[str, bool]:
|
|
685
|
+
"""Resolve field name based on mappings and operator.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
field_name: The TQL field name
|
|
689
|
+
operator: The operator being used
|
|
690
|
+
preferred_analyzer: Preferred analyzer for text operations
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Tuple of (resolved_field_name, use_wildcard_conversion)
|
|
694
|
+
"""
|
|
695
|
+
# Check intelligent mappings first
|
|
696
|
+
if field_name in self.intelligent_mappings:
|
|
697
|
+
field_mapping = self.intelligent_mappings[field_name]
|
|
698
|
+
resolved_field = field_mapping.get_field_for_operator(operator, preferred_analyzer)
|
|
699
|
+
use_wildcard = field_mapping.needs_wildcard_conversion(operator, preferred_analyzer)
|
|
700
|
+
# If resolved field is empty, use the original field name
|
|
701
|
+
if not resolved_field:
|
|
702
|
+
resolved_field = field_name
|
|
703
|
+
return resolved_field, use_wildcard
|
|
704
|
+
|
|
705
|
+
# Check simple mappings
|
|
706
|
+
elif field_name in self.simple_mappings:
|
|
707
|
+
return self.simple_mappings[field_name], False
|
|
708
|
+
|
|
709
|
+
# No mapping, use field name as-is
|
|
710
|
+
else:
|
|
711
|
+
return field_name, False
|
|
712
|
+
|
|
713
|
+
def _convert_value(self, value: Any) -> Any:
|
|
714
|
+
"""Convert value types for OpenSearch compatibility.
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
value: Value to convert
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
Converted value (bool, None, or original)
|
|
721
|
+
"""
|
|
722
|
+
if isinstance(value, str):
|
|
723
|
+
if value.lower() == "true":
|
|
724
|
+
return True
|
|
725
|
+
elif value.lower() == "false":
|
|
726
|
+
return False
|
|
727
|
+
elif value.lower() == "null":
|
|
728
|
+
return None
|
|
729
|
+
return value
|
|
730
|
+
|
|
731
|
+
def _is_text_field(self, field_name: str, opensearch_field: str) -> bool:
|
|
732
|
+
"""Check if the resolved field is a text field.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
field_name: Original field name
|
|
736
|
+
opensearch_field: Resolved OpenSearch field name
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
True if it's a text field, False otherwise
|
|
740
|
+
"""
|
|
741
|
+
# Method 1: Check if field is in intelligent mappings
|
|
742
|
+
if field_name in self.intelligent_mappings:
|
|
743
|
+
mapping = self.intelligent_mappings[field_name]
|
|
744
|
+
# Check if the selected field is a text field
|
|
745
|
+
field_type = mapping.field_types.get(opensearch_field, "keyword")
|
|
746
|
+
if field_type == "text":
|
|
747
|
+
return True
|
|
748
|
+
|
|
749
|
+
# Method 2: Check if the opensearch_field is a variant of a mapped field
|
|
750
|
+
# Extract base field name (e.g., "winlog.computer_name" from "winlog.computer_name.text")
|
|
751
|
+
base_field = opensearch_field
|
|
752
|
+
field_suffix = ""
|
|
753
|
+
|
|
754
|
+
if "." in opensearch_field:
|
|
755
|
+
parts = opensearch_field.rsplit(".", 1)
|
|
756
|
+
possible_base = parts[0]
|
|
757
|
+
possible_suffix = parts[1]
|
|
758
|
+
|
|
759
|
+
# Check if this looks like a field variant
|
|
760
|
+
if possible_suffix in ["text", "keyword", "lowercase", "english", "standard"]:
|
|
761
|
+
base_field = possible_base
|
|
762
|
+
field_suffix = possible_suffix
|
|
763
|
+
|
|
764
|
+
# Check if base field is in mappings
|
|
765
|
+
if base_field in self.intelligent_mappings:
|
|
766
|
+
mapping = self.intelligent_mappings[base_field]
|
|
767
|
+
# Check the field type of the specific variant
|
|
768
|
+
variant_type = mapping.field_types.get(opensearch_field, None)
|
|
769
|
+
if variant_type == "text":
|
|
770
|
+
return True
|
|
771
|
+
elif field_suffix == "text" and variant_type is None:
|
|
772
|
+
# If suffix is "text" and we don't have explicit type info, assume it's a text field
|
|
773
|
+
return True
|
|
774
|
+
|
|
775
|
+
return False
|