tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
tql/post_processor.py
ADDED
|
@@ -0,0 +1,737 @@
|
|
|
1
|
+
"""Post-processor for applying mutators to OpenSearch query results.
|
|
2
|
+
|
|
3
|
+
This module handles the application of mutators that cannot be pre-processed
|
|
4
|
+
by OpenSearch field mappings/analyzers and must be applied to results after
|
|
5
|
+
they are returned from OpenSearch.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import copy
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from .mutator_analyzer import PostProcessingRequirement
|
|
12
|
+
from .mutators import apply_mutators
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QueryPostProcessor:
|
|
16
|
+
"""Applies post-processing mutators to OpenSearch query results."""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
"""Initialize the post-processor."""
|
|
20
|
+
|
|
21
|
+
def filter_results(
|
|
22
|
+
self, results: List[Dict[str, Any]], requirements: List[PostProcessingRequirement]
|
|
23
|
+
) -> List[Dict[str, Any]]:
|
|
24
|
+
"""Filter results based on post-processing requirements.
|
|
25
|
+
|
|
26
|
+
This method handles operator-based filtering for all operators that require
|
|
27
|
+
post-processing evaluation.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
results: List of result records from OpenSearch
|
|
31
|
+
requirements: List of post-processing requirements
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Filtered list of results
|
|
35
|
+
"""
|
|
36
|
+
if not requirements:
|
|
37
|
+
return results
|
|
38
|
+
|
|
39
|
+
filtered_results = []
|
|
40
|
+
|
|
41
|
+
for result in results:
|
|
42
|
+
should_include = True
|
|
43
|
+
|
|
44
|
+
# Check each requirement
|
|
45
|
+
for requirement in requirements:
|
|
46
|
+
if requirement.metadata and "operator" in requirement.metadata:
|
|
47
|
+
operator = requirement.metadata["operator"]
|
|
48
|
+
value = requirement.metadata.get("value")
|
|
49
|
+
|
|
50
|
+
# Get the field value - either mutated or original
|
|
51
|
+
# First check for mutated value in temp field
|
|
52
|
+
temp_field_name = f"__{requirement.field_name}_mutated__"
|
|
53
|
+
if temp_field_name in result:
|
|
54
|
+
field_value = result[temp_field_name]
|
|
55
|
+
else:
|
|
56
|
+
field_value = self._get_field_value(result, requirement.field_name)
|
|
57
|
+
|
|
58
|
+
# Apply the operator check
|
|
59
|
+
if not self._check_operator(field_value, operator, value):
|
|
60
|
+
should_include = False
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
if should_include:
|
|
64
|
+
filtered_results.append(result)
|
|
65
|
+
|
|
66
|
+
return filtered_results
|
|
67
|
+
|
|
68
|
+
def _check_operator(self, field_value: Any, operator: str, value: Any) -> bool: # noqa: C901
|
|
69
|
+
"""Check if a field value matches the given operator and value.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
field_value: The field value to check
|
|
73
|
+
operator: The operator to apply
|
|
74
|
+
value: The value to compare against
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
True if the operator check passes, False otherwise
|
|
78
|
+
"""
|
|
79
|
+
# Unwrap single-element lists for comparison
|
|
80
|
+
if isinstance(value, list) and len(value) == 1:
|
|
81
|
+
value = value[0]
|
|
82
|
+
|
|
83
|
+
# Handle None/missing fields
|
|
84
|
+
if field_value is None:
|
|
85
|
+
# Most operators should return False for missing fields
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
# String operators
|
|
89
|
+
if operator == "contains":
|
|
90
|
+
return str(value).lower() in str(field_value).lower()
|
|
91
|
+
elif operator == "not_contains":
|
|
92
|
+
return str(value).lower() not in str(field_value).lower()
|
|
93
|
+
elif operator == "startswith":
|
|
94
|
+
return str(field_value).lower().startswith(str(value).lower())
|
|
95
|
+
elif operator == "not_startswith":
|
|
96
|
+
return not str(field_value).lower().startswith(str(value).lower())
|
|
97
|
+
elif operator == "endswith":
|
|
98
|
+
return str(field_value).lower().endswith(str(value).lower())
|
|
99
|
+
elif operator == "not_endswith":
|
|
100
|
+
return not str(field_value).lower().endswith(str(value).lower())
|
|
101
|
+
|
|
102
|
+
# Equality operators
|
|
103
|
+
elif operator in ["eq", "="]:
|
|
104
|
+
# Handle boolean comparisons
|
|
105
|
+
if isinstance(field_value, bool) and isinstance(value, str):
|
|
106
|
+
# Convert string to boolean for comparison
|
|
107
|
+
if value.lower() == "true":
|
|
108
|
+
return field_value is True
|
|
109
|
+
elif value.lower() == "false":
|
|
110
|
+
return field_value is False
|
|
111
|
+
return field_value == value
|
|
112
|
+
elif operator in ["ne", "!="]:
|
|
113
|
+
# Handle boolean comparisons
|
|
114
|
+
if isinstance(field_value, bool) and isinstance(value, str):
|
|
115
|
+
# Convert string to boolean for comparison
|
|
116
|
+
if value.lower() == "true":
|
|
117
|
+
return field_value is not True
|
|
118
|
+
elif value.lower() == "false":
|
|
119
|
+
return field_value is not False
|
|
120
|
+
return field_value != value
|
|
121
|
+
|
|
122
|
+
# Comparison operators
|
|
123
|
+
elif operator in ["gt", ">"]:
|
|
124
|
+
try:
|
|
125
|
+
return float(field_value) > float(value)
|
|
126
|
+
except (ValueError, TypeError):
|
|
127
|
+
return str(field_value) > str(value)
|
|
128
|
+
elif operator in ["gte", ">="]:
|
|
129
|
+
try:
|
|
130
|
+
return float(field_value) >= float(value)
|
|
131
|
+
except (ValueError, TypeError):
|
|
132
|
+
return str(field_value) >= str(value)
|
|
133
|
+
elif operator in ["lt", "<"]:
|
|
134
|
+
try:
|
|
135
|
+
return float(field_value) < float(value)
|
|
136
|
+
except (ValueError, TypeError):
|
|
137
|
+
return str(field_value) < str(value)
|
|
138
|
+
elif operator in ["lte", "<="]:
|
|
139
|
+
try:
|
|
140
|
+
return float(field_value) <= float(value)
|
|
141
|
+
except (ValueError, TypeError):
|
|
142
|
+
return str(field_value) <= str(value)
|
|
143
|
+
|
|
144
|
+
# Array operators
|
|
145
|
+
elif operator == "all":
|
|
146
|
+
if isinstance(field_value, (list, tuple)):
|
|
147
|
+
# For arrays, ALL elements must equal the value
|
|
148
|
+
# Empty arrays should not pass ALL
|
|
149
|
+
return len(field_value) > 0 and all(elem == value for elem in field_value)
|
|
150
|
+
else:
|
|
151
|
+
# For single values, simple equality
|
|
152
|
+
return field_value == value
|
|
153
|
+
elif operator == "not_all":
|
|
154
|
+
if isinstance(field_value, (list, tuple)):
|
|
155
|
+
# For arrays, if ALL elements equal the value, fail
|
|
156
|
+
# Empty arrays should pass NOT_ALL
|
|
157
|
+
return len(field_value) == 0 or not all(elem == value for elem in field_value)
|
|
158
|
+
else:
|
|
159
|
+
# For single values, if equal, fail
|
|
160
|
+
return field_value != value
|
|
161
|
+
|
|
162
|
+
# Default to False for unknown operators
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
def process_results(
|
|
166
|
+
self,
|
|
167
|
+
results: List[Dict[str, Any]],
|
|
168
|
+
requirements: List[PostProcessingRequirement],
|
|
169
|
+
track_enrichments: bool = False,
|
|
170
|
+
) -> List[Dict[str, Any]]:
|
|
171
|
+
"""Apply post-processing mutators to query results.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
results: List of result records from OpenSearch
|
|
175
|
+
requirements: List of post-processing requirements
|
|
176
|
+
track_enrichments: If True, track which records were enriched
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of processed results with mutators applied.
|
|
180
|
+
If track_enrichments is True, each result will have a '_enriched' flag.
|
|
181
|
+
"""
|
|
182
|
+
if not requirements:
|
|
183
|
+
return results
|
|
184
|
+
|
|
185
|
+
processed_results = []
|
|
186
|
+
|
|
187
|
+
for result in results:
|
|
188
|
+
# Deep copy to avoid modifying original
|
|
189
|
+
processed_result = copy.deepcopy(result)
|
|
190
|
+
enriched = False
|
|
191
|
+
|
|
192
|
+
# Apply each post-processing requirement
|
|
193
|
+
for requirement in requirements:
|
|
194
|
+
try:
|
|
195
|
+
was_enriched = self._apply_requirement(processed_result, requirement)
|
|
196
|
+
if was_enriched:
|
|
197
|
+
enriched = True
|
|
198
|
+
except Exception:
|
|
199
|
+
# Log error but continue processing
|
|
200
|
+
# In a production system, you might want to log this
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
# Track enrichment status if requested
|
|
204
|
+
if track_enrichments:
|
|
205
|
+
processed_result["_enriched"] = enriched
|
|
206
|
+
|
|
207
|
+
processed_results.append(processed_result)
|
|
208
|
+
|
|
209
|
+
return processed_results
|
|
210
|
+
|
|
211
|
+
def _apply_requirement(self, result: Dict[str, Any], requirement: PostProcessingRequirement) -> bool:
|
|
212
|
+
"""Apply a single post-processing requirement to a result.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
result: The result record to modify
|
|
216
|
+
requirement: The post-processing requirement to apply
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
True if the record was enriched, False otherwise
|
|
220
|
+
"""
|
|
221
|
+
if requirement.applies_to == "field":
|
|
222
|
+
return self._apply_field_mutators(result, requirement)
|
|
223
|
+
elif requirement.applies_to == "value":
|
|
224
|
+
return self._apply_value_mutators(result, requirement)
|
|
225
|
+
elif requirement.applies_to == "geo_expr":
|
|
226
|
+
return self._apply_geo_expression(result, requirement)
|
|
227
|
+
elif requirement.applies_to == "nslookup_expr":
|
|
228
|
+
return self._apply_nslookup_expression(result, requirement)
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
def _apply_field_mutators(self, result: Dict[str, Any], requirement: PostProcessingRequirement) -> bool:
|
|
232
|
+
"""Apply field mutators to a result record.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
result: The result record to modify
|
|
236
|
+
requirement: The field mutator requirement
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
True if enrichment occurred, False otherwise
|
|
240
|
+
"""
|
|
241
|
+
# Check if this is an operator-only requirement (like ALL operator with no mutators)
|
|
242
|
+
if requirement.metadata and "operator" in requirement.metadata and not requirement.mutators:
|
|
243
|
+
# This is handled separately in filter_results
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
# Get the field value using the mapped field name
|
|
247
|
+
field_value = self._get_field_value(result, requirement.mapped_field_name)
|
|
248
|
+
|
|
249
|
+
if field_value is None:
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
# Apply mutators to the field value
|
|
253
|
+
try:
|
|
254
|
+
mutated_value = apply_mutators(field_value, requirement.mutators, requirement.field_name, result)
|
|
255
|
+
|
|
256
|
+
# Check if this is a type-changing mutator that should not replace the field
|
|
257
|
+
# These mutators are used for filtering, not transforming the field value
|
|
258
|
+
TYPE_CHANGING_FILTER_MUTATORS = {
|
|
259
|
+
"is_private",
|
|
260
|
+
"is_global",
|
|
261
|
+
"length",
|
|
262
|
+
"any",
|
|
263
|
+
"all",
|
|
264
|
+
"avg",
|
|
265
|
+
"average",
|
|
266
|
+
"sum",
|
|
267
|
+
"max",
|
|
268
|
+
"min",
|
|
269
|
+
"split",
|
|
270
|
+
}
|
|
271
|
+
mutator_names = {m.get("name", "").lower() for m in requirement.mutators}
|
|
272
|
+
|
|
273
|
+
# Check the operator from metadata to determine if this is for filtering only
|
|
274
|
+
operator = requirement.metadata.get("operator", "") if requirement.metadata else ""
|
|
275
|
+
is_filtering_operation = operator in [
|
|
276
|
+
"contains",
|
|
277
|
+
"not_contains",
|
|
278
|
+
"startswith",
|
|
279
|
+
"endswith",
|
|
280
|
+
"not_startswith",
|
|
281
|
+
"not_endswith",
|
|
282
|
+
"eq",
|
|
283
|
+
"=",
|
|
284
|
+
"ne",
|
|
285
|
+
"!=",
|
|
286
|
+
">",
|
|
287
|
+
">=",
|
|
288
|
+
"<",
|
|
289
|
+
"<=",
|
|
290
|
+
"gt",
|
|
291
|
+
"gte",
|
|
292
|
+
"lt",
|
|
293
|
+
"lte",
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
if mutator_names.intersection(TYPE_CHANGING_FILTER_MUTATORS) or is_filtering_operation:
|
|
297
|
+
# For type-changing mutators or filtering operations, store the result in a temporary field
|
|
298
|
+
# This allows re-evaluation to work correctly
|
|
299
|
+
temp_field_name = f"__{requirement.field_name}_mutated__"
|
|
300
|
+
self._set_field_value(result, temp_field_name, mutated_value)
|
|
301
|
+
else:
|
|
302
|
+
# Update the result with the mutated value
|
|
303
|
+
# Use the original field name for the output
|
|
304
|
+
self._set_field_value(result, requirement.field_name, mutated_value)
|
|
305
|
+
|
|
306
|
+
# Check if this is an enrichment mutator
|
|
307
|
+
from .mutators import ENRICHMENT_MUTATORS
|
|
308
|
+
|
|
309
|
+
for mutator in requirement.mutators:
|
|
310
|
+
if mutator.get("name", "").lower() in ENRICHMENT_MUTATORS:
|
|
311
|
+
return True
|
|
312
|
+
|
|
313
|
+
except Exception:
|
|
314
|
+
# If mutation fails, leave original value
|
|
315
|
+
pass
|
|
316
|
+
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
def _apply_value_mutators(self, result: Dict[str, Any], requirement: PostProcessingRequirement) -> bool:
|
|
320
|
+
"""Apply value mutators to a result record.
|
|
321
|
+
|
|
322
|
+
Note: Value mutators are typically applied during query evaluation,
|
|
323
|
+
not to results. This method is included for completeness but may
|
|
324
|
+
not be commonly used.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
result: The result record to modify
|
|
328
|
+
requirement: The value mutator requirement
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
False (value mutators do not enrich records)
|
|
332
|
+
"""
|
|
333
|
+
# Value mutators are typically applied to query values, not result values
|
|
334
|
+
# This method is included for completeness but may not be used in practice
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
def _apply_geo_expression( # noqa: C901
|
|
338
|
+
self, result: Dict[str, Any], requirement: PostProcessingRequirement
|
|
339
|
+
) -> bool:
|
|
340
|
+
"""Apply geo expression enrichment and filtering to a result.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
result: The result record to modify
|
|
344
|
+
requirement: The geo expression requirement
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
True if geo enrichment occurred, False otherwise
|
|
348
|
+
"""
|
|
349
|
+
# Get the IP field value
|
|
350
|
+
ip_value = self._get_field_value(result, requirement.field_name)
|
|
351
|
+
|
|
352
|
+
if not ip_value:
|
|
353
|
+
# No IP value, nothing to enrich
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
# Apply geoip_lookup mutator for enrichment
|
|
357
|
+
try:
|
|
358
|
+
geo_data = apply_mutators(
|
|
359
|
+
ip_value, requirement.mutators, requirement.field_name, result # Contains geoip_lookup mutator
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# The geo data is returned as a dict with geo.* and as.* fields
|
|
363
|
+
# We need to nest it under the parent of the IP field
|
|
364
|
+
if isinstance(geo_data, dict) and geo_data:
|
|
365
|
+
# Check if a custom field location was specified
|
|
366
|
+
custom_field = None
|
|
367
|
+
for mutator in requirement.mutators:
|
|
368
|
+
if "params" in mutator:
|
|
369
|
+
params = mutator["params"]
|
|
370
|
+
# Convert params from list format to dict if needed
|
|
371
|
+
if isinstance(params, list):
|
|
372
|
+
params_dict = {}
|
|
373
|
+
for param in params:
|
|
374
|
+
if len(param) == 2:
|
|
375
|
+
params_dict[param[0]] = param[1]
|
|
376
|
+
params = params_dict
|
|
377
|
+
|
|
378
|
+
if "field" in params:
|
|
379
|
+
custom_field = params["field"]
|
|
380
|
+
break
|
|
381
|
+
|
|
382
|
+
if custom_field:
|
|
383
|
+
# Use the custom field location
|
|
384
|
+
parent = self._get_or_create_parent(result, custom_field)
|
|
385
|
+
# Store geo data directly at the custom location
|
|
386
|
+
if "geo" in geo_data:
|
|
387
|
+
parent.update(geo_data["geo"])
|
|
388
|
+
# Store AS data separately if present
|
|
389
|
+
if "as" in geo_data and custom_field:
|
|
390
|
+
# If custom field has a parent, store AS data as sibling
|
|
391
|
+
if "." in custom_field:
|
|
392
|
+
as_parent_path = custom_field.rsplit(".", 1)[0]
|
|
393
|
+
as_parent = self._get_or_create_parent(result, as_parent_path)
|
|
394
|
+
as_parent["as"] = geo_data["as"]
|
|
395
|
+
else:
|
|
396
|
+
# Store at root level
|
|
397
|
+
result["as"] = geo_data["as"]
|
|
398
|
+
else:
|
|
399
|
+
# Default behavior: store under parent.geo and parent.as
|
|
400
|
+
if "." in requirement.field_name:
|
|
401
|
+
# Nested field like destination.ip or source.ip
|
|
402
|
+
parent_path = requirement.field_name.rsplit(".", 1)[0]
|
|
403
|
+
parent = self._get_or_create_parent(result, parent_path)
|
|
404
|
+
|
|
405
|
+
# Add geo and as data under the parent
|
|
406
|
+
if "geo" in geo_data:
|
|
407
|
+
parent["geo"] = geo_data["geo"]
|
|
408
|
+
if "as" in geo_data:
|
|
409
|
+
parent["as"] = geo_data["as"]
|
|
410
|
+
else:
|
|
411
|
+
# Top-level field like 'ip' - use generic enrichment parent
|
|
412
|
+
if "enrichment" not in result:
|
|
413
|
+
result["enrichment"] = {}
|
|
414
|
+
|
|
415
|
+
if "geo" in geo_data:
|
|
416
|
+
result["enrichment"]["geo"] = geo_data["geo"]
|
|
417
|
+
if "as" in geo_data:
|
|
418
|
+
result["enrichment"]["as"] = geo_data["as"]
|
|
419
|
+
|
|
420
|
+
# Note: Filtering based on conditions is handled separately
|
|
421
|
+
# during the filter_results phase, not here
|
|
422
|
+
return True # Geo enrichment occurred
|
|
423
|
+
|
|
424
|
+
except Exception:
|
|
425
|
+
# If geo lookup fails, continue without enrichment
|
|
426
|
+
pass
|
|
427
|
+
|
|
428
|
+
return False
|
|
429
|
+
|
|
430
|
+
def _apply_nslookup_expression( # noqa: C901
|
|
431
|
+
self, result: Dict[str, Any], requirement: PostProcessingRequirement
|
|
432
|
+
) -> bool:
|
|
433
|
+
"""Apply nslookup expression enrichment and filtering to a result.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
result: The result record to modify
|
|
437
|
+
requirement: The nslookup expression requirement
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
True if DNS enrichment occurred, False otherwise
|
|
441
|
+
"""
|
|
442
|
+
# Get the field value (IP or hostname)
|
|
443
|
+
field_value = self._get_field_value(result, requirement.field_name)
|
|
444
|
+
|
|
445
|
+
if not field_value:
|
|
446
|
+
# No value, nothing to enrich
|
|
447
|
+
return False
|
|
448
|
+
|
|
449
|
+
# Apply nslookup mutator for enrichment
|
|
450
|
+
try:
|
|
451
|
+
dns_data = apply_mutators(
|
|
452
|
+
field_value, requirement.mutators, requirement.field_name, result # Contains nslookup mutator
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# The DNS data is returned as a dict with the query value as key
|
|
456
|
+
# Each value contains ECS-compliant DNS data
|
|
457
|
+
if isinstance(dns_data, dict) and dns_data:
|
|
458
|
+
# DNS data should have one entry for the queried value
|
|
459
|
+
# Extract the ECS data for the field value
|
|
460
|
+
ecs_dns_data = None
|
|
461
|
+
if field_value in dns_data:
|
|
462
|
+
ecs_dns_data = dns_data[field_value]
|
|
463
|
+
elif len(dns_data) == 1:
|
|
464
|
+
# If there's only one entry, use it
|
|
465
|
+
ecs_dns_data = next(iter(dns_data.values()))
|
|
466
|
+
|
|
467
|
+
if ecs_dns_data:
|
|
468
|
+
# Check if a custom field location was specified
|
|
469
|
+
custom_field = None
|
|
470
|
+
for mutator in requirement.mutators:
|
|
471
|
+
if "params" in mutator:
|
|
472
|
+
params = mutator["params"]
|
|
473
|
+
# Convert params from list format to dict if needed
|
|
474
|
+
if isinstance(params, list):
|
|
475
|
+
params_dict = {}
|
|
476
|
+
for param in params:
|
|
477
|
+
if len(param) == 2:
|
|
478
|
+
params_dict[param[0]] = param[1]
|
|
479
|
+
params = params_dict
|
|
480
|
+
|
|
481
|
+
if "field" in params:
|
|
482
|
+
custom_field = params["field"]
|
|
483
|
+
break
|
|
484
|
+
|
|
485
|
+
if custom_field:
|
|
486
|
+
# Use the custom field location
|
|
487
|
+
parent = self._get_or_create_parent(result, custom_field)
|
|
488
|
+
# Store DNS data directly at the custom location
|
|
489
|
+
parent.update(ecs_dns_data)
|
|
490
|
+
else:
|
|
491
|
+
# Default behavior: store at parent.domain
|
|
492
|
+
if "." in requirement.field_name:
|
|
493
|
+
# Nested field like destination.ip or source.hostname
|
|
494
|
+
parent_path = requirement.field_name.rsplit(".", 1)[0]
|
|
495
|
+
parent = self._get_or_create_parent(result, parent_path)
|
|
496
|
+
|
|
497
|
+
# Add ECS DNS data under the parent
|
|
498
|
+
parent["domain"] = ecs_dns_data
|
|
499
|
+
else:
|
|
500
|
+
# Top-level field like 'ip' - use generic enrichment parent
|
|
501
|
+
if "enrichment" not in result:
|
|
502
|
+
result["enrichment"] = {}
|
|
503
|
+
|
|
504
|
+
result["enrichment"]["domain"] = ecs_dns_data
|
|
505
|
+
|
|
506
|
+
# Note: Filtering based on conditions is handled separately
|
|
507
|
+
# during the filter_results phase, not here
|
|
508
|
+
return True # DNS enrichment occurred
|
|
509
|
+
|
|
510
|
+
except Exception:
|
|
511
|
+
# If DNS lookup fails, continue without enrichment
|
|
512
|
+
pass
|
|
513
|
+
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
def _get_or_create_parent(self, record: Dict[str, Any], parent_path: str) -> Dict[str, Any]:
|
|
517
|
+
"""Get or create a parent object in the record.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
record: The record to modify
|
|
521
|
+
parent_path: Dot-separated path to the parent
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
The parent dictionary
|
|
525
|
+
"""
|
|
526
|
+
parts = parent_path.split(".")
|
|
527
|
+
current = record
|
|
528
|
+
|
|
529
|
+
for part in parts:
|
|
530
|
+
if part not in current:
|
|
531
|
+
current[part] = {}
|
|
532
|
+
elif not isinstance(current[part], dict):
|
|
533
|
+
# If the parent exists but isn't a dict, we can't add to it
|
|
534
|
+
raise ValueError(f"Cannot add geo data: {parent_path} is not an object")
|
|
535
|
+
current = current[part]
|
|
536
|
+
|
|
537
|
+
return current
|
|
538
|
+
|
|
539
|
+
def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
|
|
540
|
+
"""Get a field value from a record, supporting nested fields.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
record: The record dictionary
|
|
544
|
+
field_path: Dot-separated field path or literal field name
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
The field value, or None if not found
|
|
548
|
+
"""
|
|
549
|
+
# First try the field_path as a literal key
|
|
550
|
+
if isinstance(record, dict) and field_path in record:
|
|
551
|
+
return record[field_path]
|
|
552
|
+
|
|
553
|
+
# If not found as literal, try as dot-separated nested path
|
|
554
|
+
parts = field_path.split(".")
|
|
555
|
+
current = record
|
|
556
|
+
|
|
557
|
+
for part in parts:
|
|
558
|
+
if isinstance(current, dict) and part in current:
|
|
559
|
+
current = current[part]
|
|
560
|
+
else:
|
|
561
|
+
return None
|
|
562
|
+
|
|
563
|
+
return current
|
|
564
|
+
|
|
565
|
+
def _set_field_value(self, record: Dict[str, Any], field_path: str, value: Any) -> None:
|
|
566
|
+
"""Set a field value in a record, supporting nested fields.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
record: The record dictionary to modify
|
|
570
|
+
field_path: Dot-separated field path or literal field name
|
|
571
|
+
value: The value to set
|
|
572
|
+
"""
|
|
573
|
+
# For setting values, we'll use the dot-separated path approach
|
|
574
|
+
# and create nested structures as needed
|
|
575
|
+
parts = field_path.split(".")
|
|
576
|
+
current = record
|
|
577
|
+
|
|
578
|
+
# Navigate to the parent of the target field
|
|
579
|
+
for part in parts[:-1]:
|
|
580
|
+
if part not in current:
|
|
581
|
+
current[part] = {}
|
|
582
|
+
current = current[part]
|
|
583
|
+
|
|
584
|
+
# Set the final value
|
|
585
|
+
current[parts[-1]] = value
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class PostProcessingContext:
|
|
589
|
+
"""Context information for post-processing operations."""
|
|
590
|
+
|
|
591
|
+
def __init__(self, query: str, field_mappings: Dict[str, Any], requirements: List[PostProcessingRequirement]):
|
|
592
|
+
"""Initialize post-processing context.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
query: Original TQL query string
|
|
596
|
+
field_mappings: Field mappings used in the query
|
|
597
|
+
requirements: Post-processing requirements
|
|
598
|
+
"""
|
|
599
|
+
self.query = query
|
|
600
|
+
self.field_mappings = field_mappings
|
|
601
|
+
self.requirements = requirements
|
|
602
|
+
self.stats = PostProcessingStats()
|
|
603
|
+
|
|
604
|
+
def get_performance_impact(self) -> Dict[str, Any]:
|
|
605
|
+
"""Get information about the performance impact of post-processing.
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
Dictionary with performance impact information
|
|
609
|
+
"""
|
|
610
|
+
impact: Dict[str, Any] = {
|
|
611
|
+
"has_post_processing": bool(self.requirements),
|
|
612
|
+
"requirement_count": len(self.requirements),
|
|
613
|
+
"impacted_fields": list(set(req.field_name for req in self.requirements)),
|
|
614
|
+
"mutator_types": [],
|
|
615
|
+
"estimated_overhead": "low",
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
# Analyze mutator types for performance estimation
|
|
619
|
+
mutator_counts: Dict[str, int] = {}
|
|
620
|
+
for req in self.requirements:
|
|
621
|
+
for mutator in req.mutators:
|
|
622
|
+
mutator_name = mutator.get("name", "unknown")
|
|
623
|
+
mutator_counts[mutator_name] = mutator_counts.get(mutator_name, 0) + 1
|
|
624
|
+
if mutator_name not in impact["mutator_types"]:
|
|
625
|
+
impact["mutator_types"].append(mutator_name)
|
|
626
|
+
|
|
627
|
+
# Estimate overhead based on mutator types
|
|
628
|
+
expensive_mutators = {"geoip_lookup", "nslookup", "geo"}
|
|
629
|
+
if any(mutator in expensive_mutators for mutator in mutator_counts):
|
|
630
|
+
impact["estimated_overhead"] = "high"
|
|
631
|
+
elif len(self.requirements) > 5:
|
|
632
|
+
impact["estimated_overhead"] = "medium"
|
|
633
|
+
|
|
634
|
+
impact["mutator_usage"] = mutator_counts
|
|
635
|
+
|
|
636
|
+
return impact
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
class PostProcessingStats:
|
|
640
|
+
"""Statistics tracking for post-processing operations."""
|
|
641
|
+
|
|
642
|
+
def __init__(self):
|
|
643
|
+
"""Initialize stats tracking."""
|
|
644
|
+
self.processed_records = 0
|
|
645
|
+
self.failed_records = 0
|
|
646
|
+
self.mutator_applications = 0
|
|
647
|
+
self.errors = []
|
|
648
|
+
|
|
649
|
+
def record_success(self):
|
|
650
|
+
"""Record a successful record processing."""
|
|
651
|
+
self.processed_records += 1
|
|
652
|
+
|
|
653
|
+
def record_failure(self, error: str):
|
|
654
|
+
"""Record a failed record processing."""
|
|
655
|
+
self.failed_records += 1
|
|
656
|
+
self.errors.append(error)
|
|
657
|
+
|
|
658
|
+
def record_mutator_application(self):
|
|
659
|
+
"""Record a mutator application."""
|
|
660
|
+
self.mutator_applications += 1
|
|
661
|
+
|
|
662
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
663
|
+
"""Get a summary of processing statistics.
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
Dictionary with processing statistics
|
|
667
|
+
"""
|
|
668
|
+
total_records = self.processed_records + self.failed_records
|
|
669
|
+
success_rate = (self.processed_records / total_records * 100) if total_records > 0 else 0
|
|
670
|
+
|
|
671
|
+
return {
|
|
672
|
+
"total_records": total_records,
|
|
673
|
+
"processed_successfully": self.processed_records,
|
|
674
|
+
"failed_records": self.failed_records,
|
|
675
|
+
"success_rate_percent": round(success_rate, 2),
|
|
676
|
+
"mutator_applications": self.mutator_applications,
|
|
677
|
+
"error_count": len(self.errors),
|
|
678
|
+
"recent_errors": self.errors[-5:] if self.errors else [], # Last 5 errors
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
class PostProcessingError(Exception):
|
|
683
|
+
"""Exception raised during post-processing operations."""
|
|
684
|
+
|
|
685
|
+
def __init__(self, message: str, field_name: Optional[str] = None, mutator_name: Optional[str] = None):
|
|
686
|
+
"""Initialize post-processing error.
|
|
687
|
+
|
|
688
|
+
Args:
|
|
689
|
+
message: Error message
|
|
690
|
+
field_name: Field name where error occurred
|
|
691
|
+
mutator_name: Mutator name that caused the error
|
|
692
|
+
"""
|
|
693
|
+
super().__init__(message)
|
|
694
|
+
self.field_name = field_name
|
|
695
|
+
self.mutator_name = mutator_name
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
class BatchPostProcessor(QueryPostProcessor):
|
|
699
|
+
"""Post-processor optimized for large batches of results."""
|
|
700
|
+
|
|
701
|
+
def __init__(self, batch_size: int = 1000):
|
|
702
|
+
"""Initialize batch post-processor.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
batch_size: Number of records to process in each batch
|
|
706
|
+
"""
|
|
707
|
+
super().__init__()
|
|
708
|
+
self.batch_size = batch_size
|
|
709
|
+
|
|
710
|
+
def process_results(
|
|
711
|
+
self,
|
|
712
|
+
results: List[Dict[str, Any]],
|
|
713
|
+
requirements: List[PostProcessingRequirement],
|
|
714
|
+
track_enrichments: bool = False,
|
|
715
|
+
) -> List[Dict[str, Any]]:
|
|
716
|
+
"""Process results in batches for better memory efficiency.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
results: List of result records from OpenSearch
|
|
720
|
+
requirements: List of post-processing requirements
|
|
721
|
+
track_enrichments: Whether to track enrichment operations
|
|
722
|
+
|
|
723
|
+
Returns:
|
|
724
|
+
List of processed results with mutators applied
|
|
725
|
+
"""
|
|
726
|
+
if not requirements:
|
|
727
|
+
return results
|
|
728
|
+
|
|
729
|
+
processed_results = []
|
|
730
|
+
|
|
731
|
+
# Process in batches
|
|
732
|
+
for i in range(0, len(results), self.batch_size):
|
|
733
|
+
batch = results[i : i + self.batch_size]
|
|
734
|
+
processed_batch = super().process_results(batch, requirements, track_enrichments)
|
|
735
|
+
processed_results.extend(processed_batch)
|
|
736
|
+
|
|
737
|
+
return processed_results
|