tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
- tql/core.py +225 -54
- tql/core_components/opensearch_operations.py +415 -99
- tql/core_components/stats_operations.py +11 -1
- tql/evaluator.py +39 -2
- tql/evaluator_components/special_expressions.py +25 -6
- tql/evaluator_components/value_comparison.py +31 -3
- tql/mutator_analyzer.py +640 -242
- tql/mutators/__init__.py +5 -1
- tql/mutators/dns.py +76 -53
- tql/mutators/security.py +101 -100
- tql/mutators/string.py +74 -0
- tql/opensearch_components/field_mapping.py +9 -3
- tql/opensearch_components/lucene_converter.py +12 -0
- tql/opensearch_components/query_converter.py +134 -25
- tql/opensearch_mappings.py +2 -2
- tql/opensearch_stats.py +170 -39
- tql/parser.py +92 -37
- tql/parser_components/ast_builder.py +37 -1
- tql/parser_components/field_extractor.py +9 -1
- tql/parser_components/grammar.py +32 -8
- tql/post_processor.py +489 -31
- tql/stats_evaluator.py +170 -12
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0
|
@@ -175,6 +175,16 @@ class StatsOperations:
|
|
|
175
175
|
suggestions.append(f"Use aliases to distinguish: {key} as alias1, {key} as alias2")
|
|
176
176
|
agg_fields.append(key)
|
|
177
177
|
|
|
178
|
+
# Normalize group_by to extract just field names for compatibility
|
|
179
|
+
normalized_group_by = []
|
|
180
|
+
for field in group_by:
|
|
181
|
+
if isinstance(field, str):
|
|
182
|
+
normalized_group_by.append(field)
|
|
183
|
+
elif isinstance(field, dict) and "field" in field:
|
|
184
|
+
normalized_group_by.append(field["field"])
|
|
185
|
+
else:
|
|
186
|
+
normalized_group_by.append(str(field))
|
|
187
|
+
|
|
178
188
|
# Build analysis result
|
|
179
189
|
result = {
|
|
180
190
|
"valid": True,
|
|
@@ -182,7 +192,7 @@ class StatsOperations:
|
|
|
182
192
|
"query": query,
|
|
183
193
|
"ast": ast,
|
|
184
194
|
"aggregations": aggregations,
|
|
185
|
-
"group_by":
|
|
195
|
+
"group_by": normalized_group_by,
|
|
186
196
|
"warnings": warnings,
|
|
187
197
|
"suggestions": suggestions,
|
|
188
198
|
}
|
tql/evaluator.py
CHANGED
|
@@ -96,6 +96,18 @@ class TQLEvaluator:
|
|
|
96
96
|
return self.special_evaluator.evaluate_geo_expr(node, record, field_mappings)
|
|
97
97
|
elif node_type == "nslookup_expr":
|
|
98
98
|
return self.special_evaluator.evaluate_nslookup_expr(node, record, field_mappings)
|
|
99
|
+
elif node_type == "query_with_stats":
|
|
100
|
+
# For query_with_stats, only evaluate the filter part
|
|
101
|
+
# The stats part is handled separately
|
|
102
|
+
filter_node = node.get("filter")
|
|
103
|
+
if filter_node:
|
|
104
|
+
return self._evaluate_node(filter_node, record, field_mappings)
|
|
105
|
+
else:
|
|
106
|
+
return True # No filter means match all
|
|
107
|
+
elif node_type == "stats_expr":
|
|
108
|
+
# Pure stats queries match all records
|
|
109
|
+
# The aggregations are handled separately
|
|
110
|
+
return True
|
|
99
111
|
|
|
100
112
|
# Unknown node type
|
|
101
113
|
return False
|
|
@@ -115,7 +127,8 @@ class TQLEvaluator:
|
|
|
115
127
|
"""
|
|
116
128
|
field_name = node["field"]
|
|
117
129
|
operator = node["operator"]
|
|
118
|
-
|
|
130
|
+
# For exists/not_exists operators, value is None
|
|
131
|
+
expected_value = node.get("value") if operator not in ["exists", "not_exists"] else None
|
|
119
132
|
field_mutators = node.get("field_mutators", [])
|
|
120
133
|
value_mutators = node.get("value_mutators", [])
|
|
121
134
|
type_hint = node.get("type_hint")
|
|
@@ -128,7 +141,15 @@ class TQLEvaluator:
|
|
|
128
141
|
|
|
129
142
|
# Apply field mutators if any
|
|
130
143
|
if field_mutators and field_value is not self._MISSING_FIELD:
|
|
131
|
-
|
|
144
|
+
try:
|
|
145
|
+
field_value = apply_mutators(field_value, field_mutators, field_name, record)
|
|
146
|
+
except (ValueError, TypeError):
|
|
147
|
+
# If mutators fail, treat as missing field for exists/not_exists checks
|
|
148
|
+
if operator in ["exists", "not_exists"]:
|
|
149
|
+
field_value = self._MISSING_FIELD
|
|
150
|
+
else:
|
|
151
|
+
# For other operators, the comparison will fail naturally
|
|
152
|
+
return False
|
|
132
153
|
|
|
133
154
|
# Apply value mutators if any
|
|
134
155
|
if value_mutators:
|
|
@@ -212,9 +233,19 @@ class TQLEvaluator:
|
|
|
212
233
|
else:
|
|
213
234
|
# Field exists - evaluate normally
|
|
214
235
|
return not self._evaluate_node(operand, record, field_mappings)
|
|
236
|
+
elif self._is_logical_operation(operand):
|
|
237
|
+
# For logical operations (AND/OR), always evaluate normally
|
|
238
|
+
# They can handle missing fields correctly
|
|
239
|
+
return not self._evaluate_node(operand, record, field_mappings)
|
|
215
240
|
elif self._operand_has_missing_fields(operand, record, field_mappings):
|
|
216
241
|
# For operations on missing fields (except exists/null checks), NOT returns True
|
|
217
242
|
# This matches OpenSearch behavior where must_not includes docs with missing fields
|
|
243
|
+
# However, for collection operators, we should evaluate normally since they handle missing fields
|
|
244
|
+
operand_type = operand.get("type")
|
|
245
|
+
operand_operator = operand.get("operator", "")
|
|
246
|
+
if operand_type == "comparison" and operand_operator in ["any", "all", "none"]:
|
|
247
|
+
# Collection operators handle missing fields in their own evaluation
|
|
248
|
+
return not self._evaluate_node(operand, record, field_mappings)
|
|
218
249
|
return True
|
|
219
250
|
else:
|
|
220
251
|
# Standard NOT operation
|
|
@@ -352,6 +383,12 @@ class TQLEvaluator:
|
|
|
352
383
|
return value is None or (isinstance(value, str) and value.lower() == "null")
|
|
353
384
|
return False
|
|
354
385
|
|
|
386
|
+
def _is_logical_operation(self, node: Any) -> bool:
|
|
387
|
+
"""Check if a node is a logical operation (AND/OR)."""
|
|
388
|
+
if isinstance(node, dict) and node.get("type") == "logical_op":
|
|
389
|
+
return node.get("operator") in ["and", "or"]
|
|
390
|
+
return False
|
|
391
|
+
|
|
355
392
|
def _apply_collection_mutators(
|
|
356
393
|
self, field_value: Any, mutators: List[Dict[str, Any]], field_name: str, record: Dict[str, Any]
|
|
357
394
|
) -> Any:
|
|
@@ -162,8 +162,8 @@ class SpecialExpressionEvaluator:
|
|
|
162
162
|
temp_record["as"] = geo_data["as"]
|
|
163
163
|
return self._evaluate_node(conditions, temp_record, {})
|
|
164
164
|
else:
|
|
165
|
-
# No conditions,
|
|
166
|
-
return
|
|
165
|
+
# No conditions, enrichment-only - always return True
|
|
166
|
+
return True
|
|
167
167
|
|
|
168
168
|
def evaluate_nslookup_expr( # noqa: C901
|
|
169
169
|
self, node: Dict[str, Any], record: Dict[str, Any], field_mappings: Dict[str, str]
|
|
@@ -283,8 +283,27 @@ class SpecialExpressionEvaluator:
|
|
|
283
283
|
if field_mutators:
|
|
284
284
|
field_value = apply_mutators(field_value, field_mutators, field_name, record)
|
|
285
285
|
|
|
286
|
-
# Apply nslookup
|
|
287
|
-
|
|
286
|
+
# Apply nslookup (this enriches the record)
|
|
287
|
+
apply_mutators(field_value, [nslookup_mutator], field_name, record)
|
|
288
|
+
|
|
289
|
+
# Now get the DNS data from where it was stored
|
|
290
|
+
if "." in field_name:
|
|
291
|
+
# Nested field like destination.ip
|
|
292
|
+
parent_path = field_name.rsplit(".", 1)[0]
|
|
293
|
+
parent = record
|
|
294
|
+
for part in parent_path.split("."):
|
|
295
|
+
if isinstance(parent, dict) and part in parent:
|
|
296
|
+
parent = parent[part]
|
|
297
|
+
else:
|
|
298
|
+
parent = None
|
|
299
|
+
break
|
|
300
|
+
if parent and isinstance(parent, dict) and "domain" in parent:
|
|
301
|
+
dns_data = parent["domain"]
|
|
302
|
+
else:
|
|
303
|
+
dns_data = None
|
|
304
|
+
else:
|
|
305
|
+
# Top-level field
|
|
306
|
+
dns_data = record.get("enrichment", {}).get("domain")
|
|
288
307
|
|
|
289
308
|
# Now evaluate the conditions against the DNS data
|
|
290
309
|
if conditions:
|
|
@@ -292,5 +311,5 @@ class SpecialExpressionEvaluator:
|
|
|
292
311
|
temp_record = dns_data if dns_data else {}
|
|
293
312
|
return self._evaluate_node(conditions, temp_record, {})
|
|
294
313
|
else:
|
|
295
|
-
# No conditions,
|
|
296
|
-
return
|
|
314
|
+
# No conditions, enrichment-only - always return True
|
|
315
|
+
return True
|
|
@@ -32,6 +32,10 @@ class ValueComparator:
|
|
|
32
32
|
return False
|
|
33
33
|
elif operator in ["not_exists"]:
|
|
34
34
|
return True # Field doesn't exist, so "not exists" is true
|
|
35
|
+
elif operator == "is_not":
|
|
36
|
+
# For "is not null", missing fields should return False (to match OpenSearch behavior)
|
|
37
|
+
# OpenSearch "is not null" translates to "exists", which only matches if field is present
|
|
38
|
+
return False # Missing fields return False for all "is not" comparisons
|
|
35
39
|
# For negated string operators, missing fields should return True
|
|
36
40
|
# (e.g., if field doesn't exist, it doesn't contain/start with/end with the value)
|
|
37
41
|
elif operator in ["not_contains", "not_startswith", "not_endswith", "not_regexp"]:
|
|
@@ -66,36 +70,60 @@ class ValueComparator:
|
|
|
66
70
|
|
|
67
71
|
try:
|
|
68
72
|
if operator in ["eq", "="]:
|
|
73
|
+
# Handle array fields - check if ANY element equals expected value
|
|
74
|
+
if isinstance(field_value, (list, tuple)):
|
|
75
|
+
return expected_value in field_value
|
|
69
76
|
return field_value == expected_value
|
|
70
77
|
elif operator in ["ne", "!="]:
|
|
78
|
+
# Handle array fields - check if expected value is NOT in array
|
|
79
|
+
if isinstance(field_value, (list, tuple)):
|
|
80
|
+
return expected_value not in field_value
|
|
71
81
|
return field_value != expected_value
|
|
72
82
|
elif operator in ["gt", ">"]:
|
|
83
|
+
# Handle array fields - check if ANY element is greater than expected value
|
|
84
|
+
if isinstance(field_value, (list, tuple)):
|
|
85
|
+
return any(self._convert_numeric(elem) > expected_value for elem in field_value)
|
|
73
86
|
return field_value > expected_value
|
|
74
87
|
elif operator in ["gte", ">="]:
|
|
88
|
+
# Handle array fields - check if ANY element is greater than or equal to expected value
|
|
89
|
+
if isinstance(field_value, (list, tuple)):
|
|
90
|
+
return any(self._convert_numeric(elem) >= expected_value for elem in field_value)
|
|
75
91
|
return field_value >= expected_value
|
|
76
92
|
elif operator in ["lt", "<"]:
|
|
93
|
+
# Handle array fields - check if ANY element is less than expected value
|
|
94
|
+
if isinstance(field_value, (list, tuple)):
|
|
95
|
+
return any(self._convert_numeric(elem) < expected_value for elem in field_value)
|
|
77
96
|
return field_value < expected_value
|
|
78
97
|
elif operator in ["lte", "<="]:
|
|
98
|
+
# Handle array fields - check if ANY element is less than or equal to expected value
|
|
99
|
+
if isinstance(field_value, (list, tuple)):
|
|
100
|
+
return any(self._convert_numeric(elem) <= expected_value for elem in field_value)
|
|
79
101
|
return field_value <= expected_value
|
|
80
102
|
elif operator == "contains":
|
|
81
103
|
# Unwrap single-element lists for string operators
|
|
82
104
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
83
105
|
expected_value = expected_value[0]
|
|
84
|
-
# Handle list fields by checking if
|
|
106
|
+
# Handle list fields by checking if ANY element contains the expected value
|
|
85
107
|
if isinstance(field_value, list):
|
|
86
|
-
# For
|
|
87
|
-
return expected_value in field_value
|
|
108
|
+
# For arrays, check if ANY element contains the expected value
|
|
109
|
+
return any(str(expected_value) in str(elem) for elem in field_value)
|
|
88
110
|
else:
|
|
89
111
|
return str(expected_value) in str(field_value)
|
|
90
112
|
elif operator == "startswith":
|
|
91
113
|
# Unwrap single-element lists for string operators
|
|
92
114
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
93
115
|
expected_value = expected_value[0]
|
|
116
|
+
# Handle array fields - check if ANY element starts with expected value
|
|
117
|
+
if isinstance(field_value, (list, tuple)):
|
|
118
|
+
return any(str(elem).startswith(str(expected_value)) for elem in field_value)
|
|
94
119
|
return str(field_value).startswith(str(expected_value))
|
|
95
120
|
elif operator == "endswith":
|
|
96
121
|
# Unwrap single-element lists for string operators
|
|
97
122
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
98
123
|
expected_value = expected_value[0]
|
|
124
|
+
# Handle array fields - check if ANY element ends with expected value
|
|
125
|
+
if isinstance(field_value, (list, tuple)):
|
|
126
|
+
return any(str(elem).endswith(str(expected_value)) for elem in field_value)
|
|
99
127
|
return str(field_value).endswith(str(expected_value))
|
|
100
128
|
elif operator == "in":
|
|
101
129
|
if isinstance(expected_value, list):
|