tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tql/post_processor.py CHANGED
@@ -18,7 +18,7 @@ class QueryPostProcessor:
18
18
  def __init__(self):
19
19
  """Initialize the post-processor."""
20
20
 
21
- def filter_results(
21
+ def filter_results( # noqa: C901
22
22
  self, results: List[Dict[str, Any]], requirements: List[PostProcessingRequirement]
23
23
  ) -> List[Dict[str, Any]]:
24
24
  """Filter results based on post-processing requirements.
@@ -38,27 +38,137 @@ class QueryPostProcessor:
38
38
 
39
39
  filtered_results = []
40
40
 
41
+ # Check if we have a logical expression requirement
42
+ has_logical_expr_req = any(req.applies_to == "logical_expression" for req in requirements)
43
+
41
44
  for result in results:
42
45
  should_include = True
43
46
 
44
- # Check each requirement
45
- for requirement in requirements:
46
- if requirement.metadata and "operator" in requirement.metadata:
47
- operator = requirement.metadata["operator"]
48
- value = requirement.metadata.get("value")
49
-
50
- # Get the field value - either mutated or original
51
- # First check for mutated value in temp field
52
- temp_field_name = f"__{requirement.field_name}_mutated__"
53
- if temp_field_name in result:
54
- field_value = result[temp_field_name]
55
- else:
56
- field_value = self._get_field_value(result, requirement.field_name)
57
-
58
- # Apply the operator check
59
- if not self._check_operator(field_value, operator, value):
60
- should_include = False
61
- break
47
+ # If we have a logical expression requirement, use only that for filtering
48
+ if has_logical_expr_req:
49
+ # Only apply logical expression requirements
50
+ for requirement in requirements:
51
+ if requirement.applies_to == "logical_expression":
52
+ expression = requirement.metadata.get("expression", {}) if requirement.metadata else {}
53
+ if not self._evaluate_logical_expression(result, expression):
54
+ should_include = False
55
+ break
56
+ else:
57
+ # Apply other requirements normally
58
+ for requirement in requirements:
59
+ # Handle nslookup expressions with conditions
60
+ if (
61
+ requirement.applies_to == "nslookup_expr"
62
+ and requirement.metadata
63
+ and "conditions" in requirement.metadata
64
+ ):
65
+ # Create evaluator components for nslookup expression evaluation
66
+ from tql.evaluator import TQLEvaluator
67
+ from tql.evaluator_components.field_access import FieldAccessor
68
+ from tql.evaluator_components.special_expressions import SpecialExpressionEvaluator
69
+
70
+ field_accessor = FieldAccessor()
71
+ evaluator = TQLEvaluator()
72
+ special_evaluator = SpecialExpressionEvaluator(
73
+ field_accessor.get_field_value, evaluator._evaluate_node
74
+ )
75
+
76
+ # Build node for evaluation
77
+ node = {
78
+ "type": "nslookup_expr",
79
+ "field": requirement.field_name,
80
+ "conditions": requirement.metadata["conditions"],
81
+ "nslookup_params": requirement.metadata.get("nslookup_params", {}),
82
+ }
83
+
84
+ # Evaluate the nslookup expression
85
+ if not special_evaluator.evaluate_nslookup_expr(node, result, {}):
86
+ should_include = False
87
+ break
88
+ # Handle geo expressions with conditions
89
+ elif (
90
+ requirement.applies_to == "geo_expr"
91
+ and requirement.metadata
92
+ and "conditions" in requirement.metadata
93
+ ):
94
+ conditions = requirement.metadata["conditions"]
95
+ if conditions:
96
+ # Get the geo data that was enriched
97
+ geo_data = None
98
+ if "." in requirement.field_name:
99
+ # For nested fields like destination.ip, check destination.geo
100
+ parent_path = requirement.field_name.rsplit(".", 1)[0]
101
+ parent = self._get_field_value(result, parent_path)
102
+ if isinstance(parent, dict):
103
+ geo_data = parent
104
+ else:
105
+ # For top-level fields, check enrichment
106
+ if "enrichment" in result and isinstance(result["enrichment"], dict):
107
+ geo_data = result["enrichment"]
108
+
109
+ # Evaluate conditions against the geo data
110
+ if geo_data:
111
+ # Create a temporary record with the geo data
112
+ temp_record = geo_data.get("geo", {})
113
+ # Also include AS data if present
114
+ if "as" in geo_data:
115
+ temp_record["as"] = geo_data["as"]
116
+
117
+ # Evaluate the conditions using the same evaluator
118
+ from tql.evaluator import TQLEvaluator
119
+
120
+ evaluator = TQLEvaluator()
121
+ if not evaluator._evaluate_node(conditions, temp_record, {}):
122
+ should_include = False
123
+ break
124
+ else:
125
+ # No geo data found, exclude the result
126
+ should_include = False
127
+ break
128
+ elif requirement.metadata and "operator" in requirement.metadata:
129
+ # Check if this is an array operator with comparison
130
+ if "comparison_operator" in requirement.metadata:
131
+ # This is a special case: field | any/all/none eq value
132
+ array_operator = requirement.metadata["operator"]
133
+ comparison_operator = requirement.metadata["comparison_operator"]
134
+ value = requirement.metadata.get("value")
135
+
136
+ # Get the field value
137
+ temp_field_name = f"__{requirement.field_name}_mutated__"
138
+ field_value = self._get_field_value(result, temp_field_name)
139
+ if field_value is None:
140
+ # No mutated value, get original
141
+ field_value = self._get_field_value(result, requirement.field_name)
142
+
143
+ # Apply array operator with comparison
144
+ if not self._check_array_operator_with_comparison(
145
+ field_value, array_operator, comparison_operator, value
146
+ ):
147
+ should_include = False
148
+ break
149
+ else:
150
+ # Regular operator check
151
+ operator = requirement.metadata["operator"]
152
+ value = requirement.metadata.get("value")
153
+
154
+ # Check if this was originally a different operator (for type-changing mutators)
155
+ if requirement.metadata.get("_original_comparison"):
156
+ original = requirement.metadata["_original_comparison"]
157
+ operator = original["operator"]
158
+ value = original.get("value", value)
159
+
160
+ # Get the field value - either mutated or original
161
+ # First check for mutated value in temp field
162
+ temp_field_name = f"__{requirement.field_name}_mutated__"
163
+ field_value = self._get_field_value(result, temp_field_name)
164
+ if field_value is None:
165
+ # No mutated value, get original
166
+ field_value = self._get_field_value(result, requirement.field_name)
167
+
168
+ # Apply the operator check
169
+ if not self._check_operator(field_value, operator, value):
170
+ should_include = False
171
+ break
62
172
 
63
173
  if should_include:
64
174
  filtered_results.append(result)
@@ -108,6 +218,17 @@ class QueryPostProcessor:
108
218
  return field_value is True
109
219
  elif value.lower() == "false":
110
220
  return field_value is False
221
+ # Handle numeric comparisons
222
+ if isinstance(field_value, (int, float)) and isinstance(value, str):
223
+ try:
224
+ return field_value == float(value)
225
+ except (ValueError, TypeError):
226
+ pass
227
+ elif isinstance(value, (int, float)) and isinstance(field_value, str):
228
+ try:
229
+ return float(field_value) == value
230
+ except (ValueError, TypeError):
231
+ pass
111
232
  return field_value == value
112
233
  elif operator in ["ne", "!="]:
113
234
  # Handle boolean comparisons
@@ -117,6 +238,17 @@ class QueryPostProcessor:
117
238
  return field_value is not True
118
239
  elif value.lower() == "false":
119
240
  return field_value is not False
241
+ # Handle numeric comparisons
242
+ if isinstance(field_value, (int, float)) and isinstance(value, str):
243
+ try:
244
+ return field_value != float(value)
245
+ except (ValueError, TypeError):
246
+ pass
247
+ elif isinstance(value, (int, float)) and isinstance(field_value, str):
248
+ try:
249
+ return float(field_value) != value
250
+ except (ValueError, TypeError):
251
+ pass
120
252
  return field_value != value
121
253
 
122
254
  # Comparison operators
@@ -142,6 +274,20 @@ class QueryPostProcessor:
142
274
  return str(field_value) <= str(value)
143
275
 
144
276
  # Array operators
277
+ elif operator == "any":
278
+ if isinstance(field_value, (list, tuple)):
279
+ # For arrays, ANY element must equal the value
280
+ return any(elem == value for elem in field_value)
281
+ else:
282
+ # For single values, simple equality
283
+ return field_value == value
284
+ elif operator == "not_any":
285
+ if isinstance(field_value, (list, tuple)):
286
+ # For arrays, if ANY element equals the value, fail
287
+ return not any(elem == value for elem in field_value)
288
+ else:
289
+ # For single values, if equal, fail
290
+ return field_value != value
145
291
  elif operator == "all":
146
292
  if isinstance(field_value, (list, tuple)):
147
293
  # For arrays, ALL elements must equal the value
@@ -158,6 +304,210 @@ class QueryPostProcessor:
158
304
  else:
159
305
  # For single values, if equal, fail
160
306
  return field_value != value
307
+ elif operator == "none":
308
+ if isinstance(field_value, (list, tuple)):
309
+ # For arrays, NO element must equal the value (same as not_any)
310
+ return not any(elem == value for elem in field_value)
311
+ else:
312
+ # For single values, must not equal
313
+ return field_value != value
314
+
315
+ # Existence operators
316
+ elif operator == "exists":
317
+ # For exists, we just check that the field has a value
318
+ # The actual exists check was already done by OpenSearch
319
+ return field_value is not None
320
+ elif operator == "not_exists":
321
+ # This shouldn't normally reach post-processing, but handle it
322
+ return field_value is None
323
+
324
+ # Default to False for unknown operators
325
+ return False
326
+
327
+ def _evaluate_logical_expression(self, result: Dict[str, Any], expression: Dict[str, Any]) -> bool: # noqa: C901
328
+ """Evaluate a logical expression (AND/OR) against a result.
329
+
330
+ Args:
331
+ result: The result record to check
332
+ expression: The logical expression AST node
333
+
334
+ Returns:
335
+ True if the expression matches, False otherwise
336
+ """
337
+ if not expression or "type" not in expression:
338
+ return True
339
+
340
+ expr_type = expression.get("type")
341
+
342
+ if expr_type == "logical_expression":
343
+ operator = expression.get("operator", "").upper()
344
+ left = expression.get("left", {})
345
+ right = expression.get("right", {})
346
+
347
+ # Recursively evaluate left and right
348
+ left_result = self._evaluate_logical_expression(result, left)
349
+
350
+ # Short-circuit evaluation
351
+ if operator == "OR" and left_result:
352
+ return True
353
+ elif operator == "AND" and not left_result:
354
+ return False
355
+
356
+ right_result = self._evaluate_logical_expression(result, right)
357
+
358
+ if operator == "OR":
359
+ return left_result or right_result
360
+ elif operator == "AND":
361
+ return left_result and right_result
362
+ else:
363
+ return False
364
+
365
+ elif expr_type == "comparison":
366
+ # Evaluate a comparison expression
367
+ field_name = expression.get("field")
368
+ operator = expression.get("operator")
369
+ value = expression.get("value")
370
+ field_mutators = expression.get("field_mutators", [])
371
+
372
+ if not field_name:
373
+ return False
374
+
375
+ # Get the field value
376
+ temp_field_name = f"__{field_name}_mutated__"
377
+ field_value = self._get_field_value(result, temp_field_name)
378
+ if field_value is None:
379
+ # No mutated value, get original
380
+ field_value = self._get_field_value(result, field_name)
381
+
382
+ # Check for array operators in field_mutators
383
+ array_operator = None
384
+ for mutator in field_mutators:
385
+ mutator_name = mutator.get("name", "").lower()
386
+ if mutator_name in ["any", "all", "none"]:
387
+ array_operator = mutator_name
388
+ break
389
+
390
+ if array_operator:
391
+ # Use array operator comparison
392
+ if operator is None:
393
+ return False
394
+ return self._check_array_operator_with_comparison(field_value, array_operator, operator, value)
395
+ else:
396
+ # Regular operator check
397
+ if operator is None:
398
+ return False
399
+ return self._check_operator(field_value, operator, value)
400
+
401
+ else:
402
+ # Unknown expression type
403
+ return True
404
+
405
+ def _check_array_operator_with_comparison( # noqa: C901
406
+ self, field_value: Any, array_operator: str, comparison_operator: str, value: Any
407
+ ) -> bool:
408
+ """Check if a field value matches the array operator with comparison.
409
+
410
+ Handles cases like: field | any eq value, field | all gt value, etc.
411
+
412
+ Args:
413
+ field_value: The field value to check (can be array or single value)
414
+ array_operator: The array operator (any, all, none)
415
+ comparison_operator: The comparison operator (eq, gt, contains, etc.)
416
+ value: The value to compare against
417
+
418
+ Returns:
419
+ True if the check passes, False otherwise
420
+ """
421
+ # Unwrap single-element lists for comparison value
422
+ if isinstance(value, list) and len(value) == 1:
423
+ value = value[0]
424
+
425
+ # Handle None/missing fields
426
+ if field_value is None:
427
+ return False
428
+
429
+ # Convert single values to list for uniform processing
430
+ if not isinstance(field_value, (list, tuple)):
431
+ field_value = [field_value]
432
+
433
+ # Apply the array operator with comparison
434
+ if array_operator == "any":
435
+ # ANY element must match the comparison
436
+ for elem in field_value:
437
+ if self._check_single_value_operator(elem, comparison_operator, value):
438
+ return True
439
+ return False
440
+
441
+ elif array_operator == "all":
442
+ # ALL elements must match the comparison
443
+ if len(field_value) == 0:
444
+ return False # Empty arrays fail ALL checks
445
+ for elem in field_value:
446
+ if not self._check_single_value_operator(elem, comparison_operator, value):
447
+ return False
448
+ return True
449
+
450
+ elif array_operator == "none":
451
+ # NO element must match the comparison
452
+ for elem in field_value:
453
+ if self._check_single_value_operator(elem, comparison_operator, value):
454
+ return False
455
+ return True
456
+
457
+ # Unknown array operator
458
+ return False
459
+
460
+ def _check_single_value_operator(self, field_value: Any, operator: str, value: Any) -> bool: # noqa: C901
461
+ """Check if a single value matches the given operator and value.
462
+
463
+ This is a helper for array operator checks.
464
+ """
465
+ # Handle None/missing values
466
+ if field_value is None:
467
+ return False
468
+
469
+ # Reuse existing operator logic
470
+ # String operators
471
+ if operator == "contains":
472
+ return str(value).lower() in str(field_value).lower()
473
+ elif operator == "not_contains":
474
+ return str(value).lower() not in str(field_value).lower()
475
+ elif operator == "startswith":
476
+ return str(field_value).lower().startswith(str(value).lower())
477
+ elif operator == "not_startswith":
478
+ return not str(field_value).lower().startswith(str(value).lower())
479
+ elif operator == "endswith":
480
+ return str(field_value).lower().endswith(str(value).lower())
481
+ elif operator == "not_endswith":
482
+ return not str(field_value).lower().endswith(str(value).lower())
483
+
484
+ # Equality operators
485
+ elif operator in ["eq", "="]:
486
+ return field_value == value
487
+ elif operator in ["ne", "!="]:
488
+ return field_value != value
489
+
490
+ # Comparison operators
491
+ elif operator in ["gt", ">"]:
492
+ try:
493
+ return float(field_value) > float(value)
494
+ except (ValueError, TypeError):
495
+ return str(field_value) > str(value)
496
+ elif operator in ["gte", ">="]:
497
+ try:
498
+ return float(field_value) >= float(value)
499
+ except (ValueError, TypeError):
500
+ return str(field_value) >= str(value)
501
+ elif operator in ["lt", "<"]:
502
+ try:
503
+ return float(field_value) < float(value)
504
+ except (ValueError, TypeError):
505
+ return str(field_value) < str(value)
506
+ elif operator in ["lte", "<="]:
507
+ try:
508
+ return float(field_value) <= float(value)
509
+ except (ValueError, TypeError):
510
+ return str(field_value) <= str(value)
161
511
 
162
512
  # Default to False for unknown operators
163
513
  return False
@@ -228,7 +578,9 @@ class QueryPostProcessor:
228
578
  return self._apply_nslookup_expression(result, requirement)
229
579
  return False
230
580
 
231
- def _apply_field_mutators(self, result: Dict[str, Any], requirement: PostProcessingRequirement) -> bool:
581
+ def _apply_field_mutators( # noqa: C901
582
+ self, result: Dict[str, Any], requirement: PostProcessingRequirement
583
+ ) -> bool:
232
584
  """Apply field mutators to a result record.
233
585
 
234
586
  Args:
@@ -268,6 +620,20 @@ class QueryPostProcessor:
268
620
  "min",
269
621
  "split",
270
622
  }
623
+
624
+ # Transform mutators that should always transform the output field
625
+ TRANSFORM_MUTATORS = {
626
+ "lowercase",
627
+ "uppercase",
628
+ "trim",
629
+ "replace",
630
+ "refang",
631
+ "defang",
632
+ "b64encode",
633
+ "b64decode",
634
+ "urldecode",
635
+ }
636
+
271
637
  mutator_names = {m.get("name", "").lower() for m in requirement.mutators}
272
638
 
273
639
  # Check the operator from metadata to determine if this is for filtering only
@@ -293,22 +659,86 @@ class QueryPostProcessor:
293
659
  "lte",
294
660
  ]
295
661
 
296
- if mutator_names.intersection(TYPE_CHANGING_FILTER_MUTATORS) or is_filtering_operation:
297
- # For type-changing mutators or filtering operations, store the result in a temporary field
298
- # This allows re-evaluation to work correctly
299
- temp_field_name = f"__{requirement.field_name}_mutated__"
300
- self._set_field_value(result, temp_field_name, mutated_value)
662
+ # Check the LAST mutator to determine output behavior
663
+ last_mutator_name = None
664
+ if requirement.mutators:
665
+ last_mutator_name = requirement.mutators[-1].get("name", "").lower()
666
+
667
+ # Special case: exists operator with non-type-changing mutators should transform output
668
+ is_exists_with_transform_mutators = operator == "exists" and not mutator_names.intersection(
669
+ TYPE_CHANGING_FILTER_MUTATORS
670
+ )
671
+
672
+ # Determine whether to transform the field or store in temp field
673
+ # The key is: what does the LAST mutator do?
674
+ if last_mutator_name in TYPE_CHANGING_FILTER_MUTATORS:
675
+ # Last mutator changes type - always store in temp field
676
+ should_transform_output = False
677
+ elif last_mutator_name in TRANSFORM_MUTATORS:
678
+ # Last mutator is a transformer - always transform output
679
+ should_transform_output = True
301
680
  else:
681
+ # Fall back to previous logic
682
+ should_transform_output = (
683
+ # Exists operator with non-type-changing mutators
684
+ is_exists_with_transform_mutators
685
+ # No filtering operation and no type-changing mutators
686
+ or (not is_filtering_operation and not mutator_names.intersection(TYPE_CHANGING_FILTER_MUTATORS))
687
+ )
688
+
689
+ # Check if this is an enrichment mutator first
690
+ from .mutators import ENRICHMENT_MUTATORS
691
+
692
+ # Check if we have geo/geoip_lookup enrichment mutator
693
+ is_geo_enrichment = False
694
+ for mutator in requirement.mutators:
695
+ mutator_name = mutator.get("name", "").lower()
696
+ if mutator_name in ["geo", "geoip_lookup"]:
697
+ is_geo_enrichment = True
698
+ break
699
+
700
+ if should_transform_output and not is_geo_enrichment:
302
701
  # Update the result with the mutated value
303
702
  # Use the original field name for the output
304
703
  self._set_field_value(result, requirement.field_name, mutated_value)
704
+ elif not is_geo_enrichment:
705
+ # For type-changing mutators with filtering operations, store in temp field
706
+ temp_field_name = f"__{requirement.field_name}_mutated__"
707
+ self._set_field_value(result, temp_field_name, mutated_value)
305
708
 
306
- # Check if this is an enrichment mutator
307
- from .mutators import ENRICHMENT_MUTATORS
308
-
709
+ # Check if we have any enrichment mutators
710
+ enrichment_mutator_found = False
309
711
  for mutator in requirement.mutators:
310
712
  if mutator.get("name", "").lower() in ENRICHMENT_MUTATORS:
311
- return True
713
+ enrichment_mutator_found = True
714
+ break
715
+
716
+ # Handle enrichment mutators specially for geo/geoip_lookup
717
+ if enrichment_mutator_found and last_mutator_name in ["geo", "geoip_lookup"]:
718
+ # For geo enrichment mutators applied as field mutators,
719
+ # we need to store the enrichment data at the parent level
720
+ if isinstance(mutated_value, dict) and "geo" in mutated_value:
721
+ if "." in requirement.field_name:
722
+ # Nested field like destination.ip
723
+ parent_path = requirement.field_name.rsplit(".", 1)[0]
724
+ parent = self._get_or_create_parent(result, parent_path)
725
+
726
+ # Add geo and as data under the parent
727
+ if "geo" in mutated_value:
728
+ parent["geo"] = mutated_value["geo"]
729
+ if "as" in mutated_value:
730
+ parent["as"] = mutated_value["as"]
731
+ else:
732
+ # Top-level field - use enrichment parent
733
+ if "enrichment" not in result:
734
+ result["enrichment"] = {}
735
+
736
+ if "geo" in mutated_value:
737
+ result["enrichment"]["geo"] = mutated_value["geo"]
738
+ if "as" in mutated_value:
739
+ result["enrichment"]["as"] = mutated_value["as"]
740
+
741
+ return enrichment_mutator_found
312
742
 
313
743
  except Exception:
314
744
  # If mutation fails, leave original value
@@ -446,6 +876,34 @@ class QueryPostProcessor:
446
876
  # No value, nothing to enrich
447
877
  return False
448
878
 
879
+ # Check if DNS data already exists (from evaluation phase)
880
+ existing_dns_data = None
881
+ if "." in requirement.field_name:
882
+ # Check nested field location
883
+ parent_path = requirement.field_name.rsplit(".", 1)[0]
884
+ parent = self._get_field_value(result, parent_path)
885
+ if isinstance(parent, dict) and "domain" in parent:
886
+ existing_dns_data = parent["domain"]
887
+ else:
888
+ # Check top-level enrichment location
889
+ if "enrichment" in result and isinstance(result["enrichment"], dict):
890
+ existing_dns_data = result["enrichment"].get("domain")
891
+
892
+ # Check if we should force a new lookup
893
+ force_lookup = False
894
+ for mutator in requirement.mutators:
895
+ if "params" in mutator:
896
+ params = mutator["params"]
897
+ if isinstance(params, list):
898
+ for param in params:
899
+ if len(param) == 2 and param[0] == "force" and param[1]:
900
+ force_lookup = True
901
+ break
902
+
903
+ # If DNS data already exists and we're not forcing, skip
904
+ if existing_dns_data and not force_lookup:
905
+ return False
906
+
449
907
  # Apply nslookup mutator for enrichment
450
908
  try:
451
909
  dns_data = apply_mutators(
@@ -503,8 +961,8 @@ class QueryPostProcessor:
503
961
 
504
962
  result["enrichment"]["domain"] = ecs_dns_data
505
963
 
506
- # Note: Filtering based on conditions is handled separately
507
- # during the filter_results phase, not here
964
+ # Enrichment successful
965
+ # Note: Filtering based on conditions is handled in filter_results phase
508
966
  return True # DNS enrichment occurred
509
967
 
510
968
  except Exception: