PyPI - tellaro-query-language - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
tql/core.py +225 -54
tql/core_components/opensearch_operations.py +415 -99
tql/core_components/stats_operations.py +11 -1
tql/evaluator.py +39 -2
tql/evaluator_components/special_expressions.py +25 -6
tql/evaluator_components/value_comparison.py +31 -3
tql/mutator_analyzer.py +640 -242
tql/mutators/__init__.py +5 -1
tql/mutators/dns.py +76 -53
tql/mutators/security.py +101 -100
tql/mutators/string.py +74 -0
tql/opensearch_components/field_mapping.py +9 -3
tql/opensearch_components/lucene_converter.py +12 -0
tql/opensearch_components/query_converter.py +134 -25
tql/opensearch_mappings.py +2 -2
tql/opensearch_stats.py +170 -39
tql/parser.py +92 -37
tql/parser_components/ast_builder.py +37 -1
tql/parser_components/field_extractor.py +9 -1
tql/parser_components/grammar.py +32 -8
tql/post_processor.py +489 -31
tql/stats_evaluator.py +170 -12
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0

tql/post_processor.py CHANGED Viewed

@@ -18,7 +18,7 @@ class QueryPostProcessor:
     def __init__(self):
         """Initialize the post-processor."""
-    def filter_results(
+    def filter_results(  # noqa: C901
         self, results: List[Dict[str, Any]], requirements: List[PostProcessingRequirement]
     ) -> List[Dict[str, Any]]:
         """Filter results based on post-processing requirements.
@@ -38,27 +38,137 @@ class QueryPostProcessor:
         filtered_results = []
+        # Check if we have a logical expression requirement
+        has_logical_expr_req = any(req.applies_to == "logical_expression" for req in requirements)
         for result in results:
             should_include = True
-            # Check each requirement
-            for requirement in requirements:
-                if requirement.metadata and "operator" in requirement.metadata:
-                    operator = requirement.metadata["operator"]
-                    value = requirement.metadata.get("value")
-                    # Get the field value - either mutated or original
-                    # First check for mutated value in temp field
-                    temp_field_name = f"__{requirement.field_name}_mutated__"
-                    if temp_field_name in result:
-                        field_value = result[temp_field_name]
-                    else:
-                        field_value = self._get_field_value(result, requirement.field_name)
-                    # Apply the operator check
-                    if not self._check_operator(field_value, operator, value):
-                        should_include = False
-                        break
+            # If we have a logical expression requirement, use only that for filtering
+            if has_logical_expr_req:
+                # Only apply logical expression requirements
+                for requirement in requirements:
+                    if requirement.applies_to == "logical_expression":
+                        expression = requirement.metadata.get("expression", {}) if requirement.metadata else {}
+                        if not self._evaluate_logical_expression(result, expression):
+                            should_include = False
+                            break
+            else:
+                # Apply other requirements normally
+                for requirement in requirements:
+                    # Handle nslookup expressions with conditions
+                    if (
+                        requirement.applies_to == "nslookup_expr"
+                        and requirement.metadata
+                        and "conditions" in requirement.metadata
+                    ):
+                        # Create evaluator components for nslookup expression evaluation
+                        from tql.evaluator import TQLEvaluator
+                        from tql.evaluator_components.field_access import FieldAccessor
+                        from tql.evaluator_components.special_expressions import SpecialExpressionEvaluator
+                        field_accessor = FieldAccessor()
+                        evaluator = TQLEvaluator()
+                        special_evaluator = SpecialExpressionEvaluator(
+                            field_accessor.get_field_value, evaluator._evaluate_node
+                        )
+                        # Build node for evaluation
+                        node = {
+                            "type": "nslookup_expr",
+                            "field": requirement.field_name,
+                            "conditions": requirement.metadata["conditions"],
+                            "nslookup_params": requirement.metadata.get("nslookup_params", {}),
+                        }
+                        # Evaluate the nslookup expression
+                        if not special_evaluator.evaluate_nslookup_expr(node, result, {}):
+                            should_include = False
+                            break
+                    # Handle geo expressions with conditions
+                    elif (
+                        requirement.applies_to == "geo_expr"
+                        and requirement.metadata
+                        and "conditions" in requirement.metadata
+                    ):
+                        conditions = requirement.metadata["conditions"]
+                        if conditions:
+                            # Get the geo data that was enriched
+                            geo_data = None
+                            if "." in requirement.field_name:
+                                # For nested fields like destination.ip, check destination.geo
+                                parent_path = requirement.field_name.rsplit(".", 1)[0]
+                                parent = self._get_field_value(result, parent_path)
+                                if isinstance(parent, dict):
+                                    geo_data = parent
+                            else:
+                                # For top-level fields, check enrichment
+                                if "enrichment" in result and isinstance(result["enrichment"], dict):
+                                    geo_data = result["enrichment"]
+                            # Evaluate conditions against the geo data
+                            if geo_data:
+                                # Create a temporary record with the geo data
+                                temp_record = geo_data.get("geo", {})
+                                # Also include AS data if present
+                                if "as" in geo_data:
+                                    temp_record["as"] = geo_data["as"]
+                                # Evaluate the conditions using the same evaluator
+                                from tql.evaluator import TQLEvaluator
+                                evaluator = TQLEvaluator()
+                                if not evaluator._evaluate_node(conditions, temp_record, {}):
+                                    should_include = False
+                                    break
+                            else:
+                                # No geo data found, exclude the result
+                                should_include = False
+                                break
+                    elif requirement.metadata and "operator" in requirement.metadata:
+                        # Check if this is an array operator with comparison
+                        if "comparison_operator" in requirement.metadata:
+                            # This is a special case: field | any/all/none eq value
+                            array_operator = requirement.metadata["operator"]
+                            comparison_operator = requirement.metadata["comparison_operator"]
+                            value = requirement.metadata.get("value")
+                            # Get the field value
+                            temp_field_name = f"__{requirement.field_name}_mutated__"
+                            field_value = self._get_field_value(result, temp_field_name)
+                            if field_value is None:
+                                # No mutated value, get original
+                                field_value = self._get_field_value(result, requirement.field_name)
+                            # Apply array operator with comparison
+                            if not self._check_array_operator_with_comparison(
+                                field_value, array_operator, comparison_operator, value
+                            ):
+                                should_include = False
+                                break
+                        else:
+                            # Regular operator check
+                            operator = requirement.metadata["operator"]
+                            value = requirement.metadata.get("value")
+                            # Check if this was originally a different operator (for type-changing mutators)
+                            if requirement.metadata.get("_original_comparison"):
+                                original = requirement.metadata["_original_comparison"]
+                                operator = original["operator"]
+                                value = original.get("value", value)
+                            # Get the field value - either mutated or original
+                            # First check for mutated value in temp field
+                            temp_field_name = f"__{requirement.field_name}_mutated__"
+                            field_value = self._get_field_value(result, temp_field_name)
+                            if field_value is None:
+                                # No mutated value, get original
+                                field_value = self._get_field_value(result, requirement.field_name)
+                            # Apply the operator check
+                            if not self._check_operator(field_value, operator, value):
+                                should_include = False
+                                break
             if should_include:
                 filtered_results.append(result)
@@ -108,6 +218,17 @@ class QueryPostProcessor:
                     return field_value is True
                 elif value.lower() == "false":
                     return field_value is False
+            # Handle numeric comparisons
+            if isinstance(field_value, (int, float)) and isinstance(value, str):
+                try:
+                    return field_value == float(value)
+                except (ValueError, TypeError):
+                    pass
+            elif isinstance(value, (int, float)) and isinstance(field_value, str):
+                try:
+                    return float(field_value) == value
+                except (ValueError, TypeError):
+                    pass
             return field_value == value
         elif operator in ["ne", "!="]:
             # Handle boolean comparisons
@@ -117,6 +238,17 @@ class QueryPostProcessor:
                     return field_value is not True
                 elif value.lower() == "false":
                     return field_value is not False
+            # Handle numeric comparisons
+            if isinstance(field_value, (int, float)) and isinstance(value, str):
+                try:
+                    return field_value != float(value)
+                except (ValueError, TypeError):
+                    pass
+            elif isinstance(value, (int, float)) and isinstance(field_value, str):
+                try:
+                    return float(field_value) != value
+                except (ValueError, TypeError):
+                    pass
             return field_value != value
         # Comparison operators
@@ -142,6 +274,20 @@ class QueryPostProcessor:
                 return str(field_value) <= str(value)
         # Array operators
+        elif operator == "any":
+            if isinstance(field_value, (list, tuple)):
+                # For arrays, ANY element must equal the value
+                return any(elem == value for elem in field_value)
+            else:
+                # For single values, simple equality
+                return field_value == value
+        elif operator == "not_any":
+            if isinstance(field_value, (list, tuple)):
+                # For arrays, if ANY element equals the value, fail
+                return not any(elem == value for elem in field_value)
+            else:
+                # For single values, if equal, fail
+                return field_value != value
         elif operator == "all":
             if isinstance(field_value, (list, tuple)):
                 # For arrays, ALL elements must equal the value
@@ -158,6 +304,210 @@ class QueryPostProcessor:
             else:
                 # For single values, if equal, fail
                 return field_value != value
+        elif operator == "none":
+            if isinstance(field_value, (list, tuple)):
+                # For arrays, NO element must equal the value (same as not_any)
+                return not any(elem == value for elem in field_value)
+            else:
+                # For single values, must not equal
+                return field_value != value
+        # Existence operators
+        elif operator == "exists":
+            # For exists, we just check that the field has a value
+            # The actual exists check was already done by OpenSearch
+            return field_value is not None
+        elif operator == "not_exists":
+            # This shouldn't normally reach post-processing, but handle it
+            return field_value is None
+        # Default to False for unknown operators
+        return False
+    def _evaluate_logical_expression(self, result: Dict[str, Any], expression: Dict[str, Any]) -> bool:  # noqa: C901
+        """Evaluate a logical expression (AND/OR) against a result.
+        Args:
+            result: The result record to check
+            expression: The logical expression AST node
+        Returns:
+            True if the expression matches, False otherwise
+        """
+        if not expression or "type" not in expression:
+            return True
+        expr_type = expression.get("type")
+        if expr_type == "logical_expression":
+            operator = expression.get("operator", "").upper()
+            left = expression.get("left", {})
+            right = expression.get("right", {})
+            # Recursively evaluate left and right
+            left_result = self._evaluate_logical_expression(result, left)
+            # Short-circuit evaluation
+            if operator == "OR" and left_result:
+                return True
+            elif operator == "AND" and not left_result:
+                return False
+            right_result = self._evaluate_logical_expression(result, right)
+            if operator == "OR":
+                return left_result or right_result
+            elif operator == "AND":
+                return left_result and right_result
+            else:
+                return False
+        elif expr_type == "comparison":
+            # Evaluate a comparison expression
+            field_name = expression.get("field")
+            operator = expression.get("operator")
+            value = expression.get("value")
+            field_mutators = expression.get("field_mutators", [])
+            if not field_name:
+                return False
+            # Get the field value
+            temp_field_name = f"__{field_name}_mutated__"
+            field_value = self._get_field_value(result, temp_field_name)
+            if field_value is None:
+                # No mutated value, get original
+                field_value = self._get_field_value(result, field_name)
+            # Check for array operators in field_mutators
+            array_operator = None
+            for mutator in field_mutators:
+                mutator_name = mutator.get("name", "").lower()
+                if mutator_name in ["any", "all", "none"]:
+                    array_operator = mutator_name
+                    break
+            if array_operator:
+                # Use array operator comparison
+                if operator is None:
+                    return False
+                return self._check_array_operator_with_comparison(field_value, array_operator, operator, value)
+            else:
+                # Regular operator check
+                if operator is None:
+                    return False
+                return self._check_operator(field_value, operator, value)
+        else:
+            # Unknown expression type
+            return True
+    def _check_array_operator_with_comparison(  # noqa: C901
+        self, field_value: Any, array_operator: str, comparison_operator: str, value: Any
+    ) -> bool:
+        """Check if a field value matches the array operator with comparison.
+        Handles cases like: field | any eq value, field | all gt value, etc.
+        Args:
+            field_value: The field value to check (can be array or single value)
+            array_operator: The array operator (any, all, none)
+            comparison_operator: The comparison operator (eq, gt, contains, etc.)
+            value: The value to compare against
+        Returns:
+            True if the check passes, False otherwise
+        """
+        # Unwrap single-element lists for comparison value
+        if isinstance(value, list) and len(value) == 1:
+            value = value[0]
+        # Handle None/missing fields
+        if field_value is None:
+            return False
+        # Convert single values to list for uniform processing
+        if not isinstance(field_value, (list, tuple)):
+            field_value = [field_value]
+        # Apply the array operator with comparison
+        if array_operator == "any":
+            # ANY element must match the comparison
+            for elem in field_value:
+                if self._check_single_value_operator(elem, comparison_operator, value):
+                    return True
+            return False
+        elif array_operator == "all":
+            # ALL elements must match the comparison
+            if len(field_value) == 0:
+                return False  # Empty arrays fail ALL checks
+            for elem in field_value:
+                if not self._check_single_value_operator(elem, comparison_operator, value):
+                    return False
+            return True
+        elif array_operator == "none":
+            # NO element must match the comparison
+            for elem in field_value:
+                if self._check_single_value_operator(elem, comparison_operator, value):
+                    return False
+            return True
+        # Unknown array operator
+        return False
+    def _check_single_value_operator(self, field_value: Any, operator: str, value: Any) -> bool:  # noqa: C901
+        """Check if a single value matches the given operator and value.
+        This is a helper for array operator checks.
+        """
+        # Handle None/missing values
+        if field_value is None:
+            return False
+        # Reuse existing operator logic
+        # String operators
+        if operator == "contains":
+            return str(value).lower() in str(field_value).lower()
+        elif operator == "not_contains":
+            return str(value).lower() not in str(field_value).lower()
+        elif operator == "startswith":
+            return str(field_value).lower().startswith(str(value).lower())
+        elif operator == "not_startswith":
+            return not str(field_value).lower().startswith(str(value).lower())
+        elif operator == "endswith":
+            return str(field_value).lower().endswith(str(value).lower())
+        elif operator == "not_endswith":
+            return not str(field_value).lower().endswith(str(value).lower())
+        # Equality operators
+        elif operator in ["eq", "="]:
+            return field_value == value
+        elif operator in ["ne", "!="]:
+            return field_value != value
+        # Comparison operators
+        elif operator in ["gt", ">"]:
+            try:
+                return float(field_value) > float(value)
+            except (ValueError, TypeError):
+                return str(field_value) > str(value)
+        elif operator in ["gte", ">="]:
+            try:
+                return float(field_value) >= float(value)
+            except (ValueError, TypeError):
+                return str(field_value) >= str(value)
+        elif operator in ["lt", "<"]:
+            try:
+                return float(field_value) < float(value)
+            except (ValueError, TypeError):
+                return str(field_value) < str(value)
+        elif operator in ["lte", "<="]:
+            try:
+                return float(field_value) <= float(value)
+            except (ValueError, TypeError):
+                return str(field_value) <= str(value)
         # Default to False for unknown operators
         return False
@@ -228,7 +578,9 @@ class QueryPostProcessor:
             return self._apply_nslookup_expression(result, requirement)
         return False
-    def _apply_field_mutators(self, result: Dict[str, Any], requirement: PostProcessingRequirement) -> bool:
+    def _apply_field_mutators(  # noqa: C901
+        self, result: Dict[str, Any], requirement: PostProcessingRequirement
+    ) -> bool:
         """Apply field mutators to a result record.
         Args:
@@ -268,6 +620,20 @@ class QueryPostProcessor:
                 "min",
                 "split",
             }
+            # Transform mutators that should always transform the output field
+            TRANSFORM_MUTATORS = {
+                "lowercase",
+                "uppercase",
+                "trim",
+                "replace",
+                "refang",
+                "defang",
+                "b64encode",
+                "b64decode",
+                "urldecode",
+            }
             mutator_names = {m.get("name", "").lower() for m in requirement.mutators}
             # Check the operator from metadata to determine if this is for filtering only
@@ -293,22 +659,86 @@ class QueryPostProcessor:
                 "lte",
             ]
-            if mutator_names.intersection(TYPE_CHANGING_FILTER_MUTATORS) or is_filtering_operation:
-                # For type-changing mutators or filtering operations, store the result in a temporary field
-                # This allows re-evaluation to work correctly
-                temp_field_name = f"__{requirement.field_name}_mutated__"
-                self._set_field_value(result, temp_field_name, mutated_value)
+            # Check the LAST mutator to determine output behavior
+            last_mutator_name = None
+            if requirement.mutators:
+                last_mutator_name = requirement.mutators[-1].get("name", "").lower()
+            # Special case: exists operator with non-type-changing mutators should transform output
+            is_exists_with_transform_mutators = operator == "exists" and not mutator_names.intersection(
+                TYPE_CHANGING_FILTER_MUTATORS
+            )
+            # Determine whether to transform the field or store in temp field
+            # The key is: what does the LAST mutator do?
+            if last_mutator_name in TYPE_CHANGING_FILTER_MUTATORS:
+                # Last mutator changes type - always store in temp field
+                should_transform_output = False
+            elif last_mutator_name in TRANSFORM_MUTATORS:
+                # Last mutator is a transformer - always transform output
+                should_transform_output = True
             else:
+                # Fall back to previous logic
+                should_transform_output = (
+                    # Exists operator with non-type-changing mutators
+                    is_exists_with_transform_mutators
+                    # No filtering operation and no type-changing mutators
+                    or (not is_filtering_operation and not mutator_names.intersection(TYPE_CHANGING_FILTER_MUTATORS))
+                )
+            # Check if this is an enrichment mutator first
+            from .mutators import ENRICHMENT_MUTATORS
+            # Check if we have geo/geoip_lookup enrichment mutator
+            is_geo_enrichment = False
+            for mutator in requirement.mutators:
+                mutator_name = mutator.get("name", "").lower()
+                if mutator_name in ["geo", "geoip_lookup"]:
+                    is_geo_enrichment = True
+                    break
+            if should_transform_output and not is_geo_enrichment:
                 # Update the result with the mutated value
                 # Use the original field name for the output
                 self._set_field_value(result, requirement.field_name, mutated_value)
+            elif not is_geo_enrichment:
+                # For type-changing mutators with filtering operations, store in temp field
+                temp_field_name = f"__{requirement.field_name}_mutated__"
+                self._set_field_value(result, temp_field_name, mutated_value)
-            # Check if this is an enrichment mutator
-            from .mutators import ENRICHMENT_MUTATORS
+            # Check if we have any enrichment mutators
+            enrichment_mutator_found = False
             for mutator in requirement.mutators:
                 if mutator.get("name", "").lower() in ENRICHMENT_MUTATORS:
-                    return True
+                    enrichment_mutator_found = True
+                    break
+            # Handle enrichment mutators specially for geo/geoip_lookup
+            if enrichment_mutator_found and last_mutator_name in ["geo", "geoip_lookup"]:
+                # For geo enrichment mutators applied as field mutators,
+                # we need to store the enrichment data at the parent level
+                if isinstance(mutated_value, dict) and "geo" in mutated_value:
+                    if "." in requirement.field_name:
+                        # Nested field like destination.ip
+                        parent_path = requirement.field_name.rsplit(".", 1)[0]
+                        parent = self._get_or_create_parent(result, parent_path)
+                        # Add geo and as data under the parent
+                        if "geo" in mutated_value:
+                            parent["geo"] = mutated_value["geo"]
+                        if "as" in mutated_value:
+                            parent["as"] = mutated_value["as"]
+                    else:
+                        # Top-level field - use enrichment parent
+                        if "enrichment" not in result:
+                            result["enrichment"] = {}
+                        if "geo" in mutated_value:
+                            result["enrichment"]["geo"] = mutated_value["geo"]
+                        if "as" in mutated_value:
+                            result["enrichment"]["as"] = mutated_value["as"]
+            return enrichment_mutator_found
         except Exception:
             # If mutation fails, leave original value
@@ -446,6 +876,34 @@ class QueryPostProcessor:
             # No value, nothing to enrich
             return False
+        # Check if DNS data already exists (from evaluation phase)
+        existing_dns_data = None
+        if "." in requirement.field_name:
+            # Check nested field location
+            parent_path = requirement.field_name.rsplit(".", 1)[0]
+            parent = self._get_field_value(result, parent_path)
+            if isinstance(parent, dict) and "domain" in parent:
+                existing_dns_data = parent["domain"]
+        else:
+            # Check top-level enrichment location
+            if "enrichment" in result and isinstance(result["enrichment"], dict):
+                existing_dns_data = result["enrichment"].get("domain")
+        # Check if we should force a new lookup
+        force_lookup = False
+        for mutator in requirement.mutators:
+            if "params" in mutator:
+                params = mutator["params"]
+                if isinstance(params, list):
+                    for param in params:
+                        if len(param) == 2 and param[0] == "force" and param[1]:
+                            force_lookup = True
+                            break
+        # If DNS data already exists and we're not forcing, skip
+        if existing_dns_data and not force_lookup:
+            return False
         # Apply nslookup mutator for enrichment
         try:
             dns_data = apply_mutators(
@@ -503,8 +961,8 @@ class QueryPostProcessor:
                             result["enrichment"]["domain"] = ecs_dns_data
-            # Note: Filtering based on conditions is handled separately
-            # during the filter_results phase, not here
+            # Enrichment successful
+            # Note: Filtering based on conditions is handled in filter_results phase
             return True  # DNS enrichment occurred
         except Exception:

tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl