PyPI - tellaro-query-language - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
tql/core.py +225 -54
tql/core_components/opensearch_operations.py +415 -99
tql/core_components/stats_operations.py +11 -1
tql/evaluator.py +39 -2
tql/evaluator_components/special_expressions.py +25 -6
tql/evaluator_components/value_comparison.py +31 -3
tql/mutator_analyzer.py +640 -242
tql/mutators/__init__.py +5 -1
tql/mutators/dns.py +76 -53
tql/mutators/security.py +101 -100
tql/mutators/string.py +74 -0
tql/opensearch_components/field_mapping.py +9 -3
tql/opensearch_components/lucene_converter.py +12 -0
tql/opensearch_components/query_converter.py +134 -25
tql/opensearch_mappings.py +2 -2
tql/opensearch_stats.py +170 -39
tql/parser.py +92 -37
tql/parser_components/ast_builder.py +37 -1
tql/parser_components/field_extractor.py +9 -1
tql/parser_components/grammar.py +32 -8
tql/post_processor.py +489 -31
tql/stats_evaluator.py +170 -12
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0

tql/mutator_analyzer.py CHANGED Viewed

@@ -30,6 +30,7 @@ MUTATOR_CLASSIFICATIONS: Dict[str, MutatorType] = {
     "uppercase": MutatorType.POST_PROCESSABLE,  # Always post-process (transforms result)
     "trim": MutatorType.POST_PROCESSABLE,  # Always post-process (transforms result)
     "split": MutatorType.POST_PROCESSABLE,  # Always post-process (returns array)
+    "replace": MutatorType.POST_PROCESSABLE,  # Always post-process (transforms result)
     "nslookup": MutatorType.POST_PROCESSABLE,  # Always post-process (enrichment)
     "geoip_lookup": MutatorType.POST_PROCESSABLE,  # Always post-process (enrichment)
     "geo": MutatorType.POST_PROCESSABLE,  # Always post-process (enrichment)
@@ -41,6 +42,18 @@ MUTATOR_CLASSIFICATIONS: Dict[str, MutatorType] = {
     "urldecode": MutatorType.POST_PROCESSABLE,  # Always post-process (modifies value)
     "is_private": MutatorType.POST_PROCESSABLE,  # Always post-process (returns bool)
     "is_global": MutatorType.POST_PROCESSABLE,  # Always post-process (returns bool)
+    "any": MutatorType.POST_PROCESSABLE,  # Always post-process (array evaluation)
+    "all": MutatorType.POST_PROCESSABLE,  # Always post-process (array evaluation)
+    "none": MutatorType.POST_PROCESSABLE,  # Always post-process (array evaluation)
+    "avg": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "average": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "sum": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "min": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "max": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "count": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "unique": MutatorType.POST_PROCESSABLE,  # Always post-process (array computation)
+    "first": MutatorType.POST_PROCESSABLE,  # Always post-process (array access)
+    "last": MutatorType.POST_PROCESSABLE,  # Always post-process (array access)
 }
@@ -52,8 +65,8 @@ class PostProcessingRequirement:
     mapped_field_name: str  # Field name used in OpenSearch query
     mutators: List[Dict[str, Any]]  # List of mutator specifications
     applies_to: Literal[
-        "field", "value", "geo_expr", "nslookup_expr"
-    ]  # Whether this applies to field, value mutators, geo, or nslookup expressions
+        "field", "value", "geo_expr", "nslookup_expr", "logical_expression"
+    ]  # Whether this applies to field, value mutators, geo, nslookup, or logical expressions
     metadata: Optional[Dict[str, Any]] = None  # Additional metadata for special processing
@@ -73,6 +86,8 @@ class MutatorAnalysisResult:
 class MutatorAnalyzer:
     """Analyzes TQL queries to determine mutator processing requirements."""
+    context: Optional[str] = None  # Temporary storage for execution context
     def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
         """Initialize the analyzer.
@@ -134,9 +149,27 @@ class MutatorAnalyzer:
         # Track if enrichment saving is requested
         save_enrichment_requested = False
+        # Store context temporarily for use in _analyze_node
+        self.context = context
+        # Check if this is a stats query
+        is_stats_query = ast.get("type") in ["stats_expr", "query_with_stats"]
         # Analyze the AST recursively
         self._analyze_node(optimized_ast, post_processing_requirements, health_reasons, optimizations_applied)
+        # Clean up context
+        self.context = None
+        # Clean up nodes marked for removal
+        cleaned_ast = self._clean_ast(optimized_ast)
+        # If the entire AST was removed (e.g., just "field | any eq value"), return match_all
+        if cleaned_ast is None:
+            optimized_ast = {"type": "match_all"}
+        else:
+            optimized_ast = cleaned_ast
         # Check if any mutator requested enrichment saving
         for req in post_processing_requirements:
             for mutator in req.mutators:
@@ -158,7 +191,29 @@ class MutatorAnalyzer:
         # Determine overall health status based on context
         health_status: Literal["green", "yellow", "red"] = "green"
-        if post_processing_requirements:
+        # Special handling for stats queries with post-processing in OpenSearch context
+        if is_stats_query and context == "opensearch" and post_processing_requirements:
+            # Stats queries that require post-processing have extremely poor performance
+            health_status = "red"
+            health_reasons.append(
+                {
+                    "status": "red",
+                    "query_part": "stats with post-processing",
+                    "reason": "Stats query requires fetching all documents for post-processing mutators. "
+                    "This will have extremely poor performance on large datasets. "
+                    "Consider pre-processing data or using OpenSearch-compatible operations.",
+                }
+            )
+        # For in_memory context, we need to evaluate health considering ALL mutators
+        # (both those in post-processing and those remaining in the AST)
+        elif context == "in_memory":
+            # Pass the optimized AST to health evaluation for in_memory context
+            health_eval = self._evaluate_health_for_context(post_processing_requirements, context, optimized_ast)
+            health_status = health_eval["health_status"]  # type: ignore[assignment]
+            health_reasons.extend(health_eval["health_reasons"])
+        elif post_processing_requirements:
             # Evaluate health based on context
             health_eval = self._evaluate_health_for_context(post_processing_requirements, context)
             health_status = health_eval["health_status"]  # type: ignore[assignment]
@@ -179,6 +234,166 @@ class MutatorAnalyzer:
             save_enrichment_requested=save_enrichment_requested,
         )
+    def _clean_ast(self, node: Any) -> Any:  # noqa: C901
+        """Remove nodes marked for removal from the AST.
+        Args:
+            node: AST node to clean
+        Returns:
+            Cleaned AST node or None if node should be removed
+        """
+        if not isinstance(node, dict):
+            return node
+        # Check if this node should be removed
+        if node.get("_remove_from_query"):
+            return None
+        # Clean child nodes
+        if node.get("type") == "logical_op":
+            operator = node.get("operator", "").lower()
+            left = self._clean_ast(node.get("left"))
+            right = self._clean_ast(node.get("right"))
+            # Special handling for OR with removed nodes
+            if operator == "or" and (left is None or right is None):
+                # If either side of OR has array operators (was removed),
+                # we need to return match_all and handle everything in post-processing
+                if left is None or right is None:
+                    return {"type": "match_all"}
+            # Regular handling for AND
+            if left is None and right is None:
+                return None
+            elif left is None:
+                return right
+            elif right is None:
+                return left
+            else:
+                node["left"] = left
+                node["right"] = right
+                return node
+        elif node.get("type") == "unary_op":
+            operand = self._clean_ast(node.get("operand"))
+            if operand is None:
+                return None
+            node["operand"] = operand
+            return node
+        # For other node types, check if it should be converted to match_all
+        if node.get("_convert_to_match_all"):
+            return {"type": "match_all"}
+        return node
+    def _has_array_operators(self, node: Any) -> bool:
+        """Check if an AST node contains array operators (any, all, none).
+        Args:
+            node: AST node to check
+        Returns:
+            True if node contains array operators
+        """
+        if not isinstance(node, dict):
+            return False
+        node_type = node.get("type")
+        if node_type == "comparison":
+            # Check field mutators for array operators
+            field_mutators = node.get("field_mutators", [])
+            for mutator in field_mutators:
+                if mutator.get("name", "").lower() in ["any", "all", "none"]:
+                    return True
+            return False
+        elif node_type == "logical_op":
+            # Check both sides
+            return self._has_array_operators(node.get("left", {})) or self._has_array_operators(node.get("right", {}))
+        elif node_type == "unary_op":
+            # Check operand
+            return self._has_array_operators(node.get("operand", {}))
+        return False
+    def _has_transform_mutators_with_filtering(self, node: Any) -> bool:
+        """Check if an AST node contains transform mutators with filtering operations.
+        Args:
+            node: AST node to check
+        Returns:
+            True if node contains transform mutators with filtering operations
+        """
+        if not isinstance(node, dict):
+            return False
+        node_type = node.get("type")
+        if node_type == "comparison":
+            # Check if this is a filtering operation
+            operator = node.get("operator", "")
+            is_filtering = operator in [
+                "eq",
+                "=",
+                "ne",
+                "!=",
+                "gt",
+                ">",
+                "gte",
+                ">=",
+                "lt",
+                "<",
+                "lte",
+                "<=",
+                "contains",
+                "not_contains",
+                "startswith",
+                "endswith",
+                "not_startswith",
+                "not_endswith",
+                "in",
+                "not_in",
+            ]
+            if not is_filtering:
+                return False
+            # Check field mutators for transform mutators or type-changing mutators that need post-processing
+            field_mutators = node.get("field_mutators", [])
+            for mutator in field_mutators:
+                mutator_name = mutator.get("name", "").lower()
+                # Transform mutators that modify the value OR type-changing mutators
+                if mutator_name in [
+                    "lowercase",
+                    "uppercase",
+                    "trim",
+                    "replace",
+                    "refang",
+                    "defang",
+                    "b64encode",
+                    "b64decode",
+                    "urldecode",
+                    # Type-changing mutators that need post-processing
+                    "length",
+                    "is_private",
+                    "is_global",
+                    "split",
+                ]:
+                    return True
+            return False
+        elif node_type == "logical_op":
+            # Check both sides
+            return self._has_transform_mutators_with_filtering(
+                node.get("left", {})
+            ) or self._has_transform_mutators_with_filtering(node.get("right", {}))
+        elif node_type == "unary_op":
+            # Check operand
+            return self._has_transform_mutators_with_filtering(node.get("operand", {}))
+        return False
     def _analyze_node(  # noqa: C901
         self,
         node: Dict[str, Any],
@@ -204,98 +419,191 @@ class MutatorAnalyzer:
         elif node_type == "collection_op":
             self._analyze_collection_node(node, post_processing_reqs, health_reasons, optimizations)
         elif node_type == "logical_op":
-            # Recursively analyze both sides
+            operator = node.get("operator", "").lower()
+            # Check if this is an OR with array operators OR transform mutators with filtering
+            # BEFORE analyzing children (because analyzing children might modify the nodes)
+            needs_logical_expression = False
+            metadata_type = None
+            if operator == "or":
+                if self._has_array_operators(node):
+                    needs_logical_expression = True
+                    metadata_type = "or_with_array_operators"
+                elif self._has_transform_mutators_with_filtering(node):
+                    needs_logical_expression = True
+                    metadata_type = "or_with_transform_mutators"
+            if needs_logical_expression:
+                # We need to evaluate the entire OR in post-processing
+                # But we still want the base query to run (without array operators)
+                # Deep copy the original expression before it gets modified
+                original_expression = copy.deepcopy(node)
+                # Add a special requirement for the entire logical expression
+                post_processing_reqs.append(
+                    PostProcessingRequirement(
+                        field_name="_logical_expression",
+                        mapped_field_name="_logical_expression",
+                        mutators=[],
+                        applies_to="logical_expression",
+                        metadata={"expression": original_expression, "type": metadata_type},
+                    )
+                )
+            # Always analyze both sides
             self._analyze_node(node.get("left", {}), post_processing_reqs, health_reasons, optimizations)
             self._analyze_node(node.get("right", {}), post_processing_reqs, health_reasons, optimizations)
         elif node_type == "unary_op":
+            operator = node.get("operator", "").lower()
+            # Check if this is a NOT with transform mutators that need filtering
+            if operator == "not" and self._has_transform_mutators_with_filtering(node.get("operand", {})):
+                # We need to evaluate the entire NOT in post-processing
+                # Deep copy the original expression before it gets modified
+                original_expression = copy.deepcopy(node)
+                # Add a special requirement for the entire logical expression
+                post_processing_reqs.append(
+                    PostProcessingRequirement(
+                        field_name="_logical_expression",
+                        mapped_field_name="_logical_expression",
+                        mutators=[],
+                        applies_to="logical_expression",
+                        metadata={"expression": original_expression, "type": "not_with_transform_mutators"},
+                    )
+                )
             # Analyze the operand
             self._analyze_node(node.get("operand", {}), post_processing_reqs, health_reasons, optimizations)
         elif node_type == "geo_expr":
-            # Geo expressions always require post-processing since they involve geoip_lookup
             field_name = node.get("field")
             conditions = node.get("conditions")
             geo_params = node.get("geo_params", {})
             if field_name:
-                # Create a special post-processing requirement for geo expressions
-                # that includes both the enrichment and the filtering
-                # Build mutator params list from geo_params
-                mutator_params = []
-                for param_name, param_value in geo_params.items():
-                    mutator_params.append([param_name, param_value])
-                geo_requirement = PostProcessingRequirement(
-                    field_name=field_name,
-                    mapped_field_name=field_name,
-                    mutators=(
-                        [{"name": "geoip_lookup", "params": mutator_params}]
-                        if mutator_params
-                        else [{"name": "geoip_lookup"}]
-                    ),
-                    applies_to="geo_expr",  # Special type for geo expressions
-                    metadata={
-                        "conditions": conditions,  # Include the conditions for filtering
-                        "node_type": "geo_expr",
-                        "geo_params": geo_params,  # Include geo parameters
-                    },
-                )
-                post_processing_reqs.append(geo_requirement)
-                # Mark the node for post-processing
-                node["requires_post_processing"] = True
-                node["post_process_type"] = "geo_expr"
-                if conditions:
-                    optimizations.append(
-                        f"Geo expression on field '{field_name}' with conditions requires post-processing"
+                # For OpenSearch context, geo expressions require post-processing
+                if self.context == "opensearch":
+                    # Create a post-processing requirement for the geo expression
+                    # Build the geoip_lookup mutator
+                    mutator_params = []
+                    for param_name, param_value in geo_params.items():
+                        mutator_params.append([param_name, param_value])
+                    geo_mutator = {"name": "geoip_lookup"}
+                    if mutator_params:
+                        geo_mutator["params"] = mutator_params
+                    # Create the requirement
+                    req = PostProcessingRequirement(
+                        field_name=field_name,
+                        mapped_field_name=field_name,  # Will be mapped during processing
+                        mutators=[geo_mutator],
+                        applies_to="geo_expr",
+                        metadata={"conditions": conditions, "geo_params": geo_params},
                     )
+                    post_processing_reqs.append(req)
+                    if conditions:
+                        optimizations.append(
+                            f"Geo expression on field '{field_name}' with conditions requires post-processing"
+                        )
+                    else:
+                        optimizations.append(
+                            f"Geo expression on field '{field_name}' for enrichment requires post-processing"
+                        )
                 else:
-                    optimizations.append(f"Geo expression on field '{field_name}' for enrichment only")
+                    # For in-memory evaluation, handled during evaluation phase
+                    if conditions:
+                        optimizations.append(
+                            f"Geo expression on field '{field_name}' with conditions handled during evaluation"
+                        )
+                    else:
+                        optimizations.append(
+                            f"Geo expression on field '{field_name}' for enrichment handled during evaluation"
+                        )
             # Don't analyze conditions recursively - they're part of the geo expression
         elif node_type == "nslookup_expr":
-            # NSLookup expressions always require post-processing since they involve DNS lookups
             field_name = node.get("field")
             conditions = node.get("conditions")
             nslookup_params = node.get("nslookup_params", {})
             if field_name:
-                # Create a special post-processing requirement for nslookup expressions
-                # that includes both the enrichment and the filtering
-                # Build mutator params list from nslookup_params
-                mutator_params = []
-                for param_name, param_value in nslookup_params.items():
-                    mutator_params.append([param_name, param_value])
-                nslookup_requirement = PostProcessingRequirement(
-                    field_name=field_name,
-                    mapped_field_name=field_name,
-                    mutators=(
-                        [{"name": "nslookup", "params": mutator_params}] if mutator_params else [{"name": "nslookup"}]
-                    ),
-                    applies_to="nslookup_expr",  # Special type for nslookup expressions
-                    metadata={
-                        "conditions": conditions,  # Include the conditions for filtering
-                        "node_type": "nslookup_expr",
-                        "nslookup_params": nslookup_params,  # Include nslookup parameters
-                    },
-                )
-                post_processing_reqs.append(nslookup_requirement)
-                # Mark the node for post-processing
-                node["requires_post_processing"] = True
-                node["post_process_type"] = "nslookup_expr"
-                if conditions:
-                    optimizations.append(
-                        f"NSLookup expression on field '{field_name}' with conditions requires post-processing"
+                # For OpenSearch context, nslookup expressions require post-processing
+                if self.context == "opensearch":
+                    # Create a post-processing requirement for the nslookup expression
+                    # Build the nslookup mutator
+                    mutator_params = []
+                    for param_name, param_value in nslookup_params.items():
+                        mutator_params.append([param_name, param_value])
+                    nslookup_mutator = {"name": "nslookup"}
+                    if mutator_params:
+                        nslookup_mutator["params"] = mutator_params
+                    # Create the requirement
+                    req = PostProcessingRequirement(
+                        field_name=field_name,
+                        mapped_field_name=field_name,  # Will be mapped during processing
+                        mutators=[nslookup_mutator],
+                        applies_to="nslookup_expr",
+                        metadata={"conditions": conditions, "nslookup_params": nslookup_params},
                     )
+                    post_processing_reqs.append(req)
+                    if conditions:
+                        optimizations.append(
+                            f"NSLookup expression on field '{field_name}' with conditions requires post-processing"
+                        )
+                    else:
+                        optimizations.append(
+                            f"NSLookup expression on field '{field_name}' for enrichment requires post-processing"
+                        )
                 else:
-                    optimizations.append(f"NSLookup expression on field '{field_name}' for enrichment only")
+                    # For in-memory evaluation, handled during evaluation phase
+                    if conditions:
+                        optimizations.append(
+                            f"NSLookup expression on field '{field_name}' with conditions handled during evaluation"
+                        )
+                    else:
+                        optimizations.append(
+                            f"NSLookup expression on field '{field_name}' for enrichment handled during evaluation"
+                        )
             # Don't analyze conditions recursively - they're part of the nslookup expression
+        elif node_type == "query_with_stats":
+            # Handle query_with_stats node by analyzing the filter part
+            filter_node = node.get("filter")
+            if filter_node:
+                self._analyze_node(filter_node, post_processing_reqs, health_reasons, optimizations)
+            # Analyze the stats part if it contains mutators (though this is rare)
+            stats_node = node.get("stats")
+            if stats_node:
+                self._analyze_node(stats_node, post_processing_reqs, health_reasons, optimizations)
+        elif node_type == "stats_expr":
+            # Handle pure stats expressions - they typically don't have mutators
+            # but check aggregations and group_by fields for any field transformations
+            aggregations = node.get("aggregations", [])
+            for agg in aggregations:
+                # In case aggregations have field mutators in the future
+                if isinstance(agg, dict) and agg.get("field_mutators"):
+                    # Analyze field mutators within aggregations if they exist
+                    field_mutators = agg.get("field_mutators", [])
+                    if field_mutators:
+                        field_name = agg.get("field", "*")
+                        # Add post-processing requirement for mutators in aggregations
+                        post_processing_reqs.append(
+                            PostProcessingRequirement(
+                                field_name=field_name,
+                                mapped_field_name=field_name,
+                                mutators=field_mutators,
+                                applies_to="field",
+                            )
+                        )
     def _analyze_comparison_node(  # noqa: C901
         self,
@@ -315,37 +623,226 @@ class MutatorAnalyzer:
         field_name = node.get("field")
         operator = node.get("operator")
         field_mutators = node.get("field_mutators", [])
-        value_mutators = node.get("value_mutators", [])
         if not field_name or not operator:
             return
         # Analyze field mutators
         if field_mutators:
-            result = self._analyze_field_mutators(field_name, field_mutators, operator)
-            # Update node with optimized mutators
-            if result.optimized_mutators != field_mutators:
-                if result.optimized_mutators:
-                    node["field_mutators"] = result.optimized_mutators
+            # Special case: if the last mutator is any/all/none and we have a comparison operator,
+            # treat it as an array comparison operator, not a regular mutator
+            last_mutator = field_mutators[-1] if field_mutators else None
+            if (
+                last_mutator
+                and last_mutator.get("name", "").lower() in ["any", "all", "none"]
+                and operator
+                in [
+                    "eq",
+                    "=",
+                    "ne",
+                    "!=",
+                    "gt",
+                    ">",
+                    "lt",
+                    "<",
+                    "gte",
+                    ">=",
+                    "lte",
+                    "<=",
+                    "contains",
+                    "not_contains",
+                    "startswith",
+                    "endswith",
+                    "not_startswith",
+                    "not_endswith",
+                ]
+            ):
+                # Extract the array operator
+                array_operator = last_mutator["name"].lower()
+                # Process any mutators before the array operator
+                remaining_mutators = field_mutators[:-1]
+                if remaining_mutators:
+                    result = self._analyze_field_mutators(field_name, remaining_mutators, operator)
+                    # Update node with optimized mutators
+                    if result.optimized_mutators != remaining_mutators:
+                        if result.optimized_mutators:
+                            node["field_mutators"] = result.optimized_mutators
+                        else:
+                            # Remove field_mutators if all were optimized away
+                            node.pop("field_mutators", None)
+                        optimizations.extend(result.optimizations)
+                    # Add post-processing requirements for the remaining mutators
+                    if result.post_processing_mutators:
+                        post_processing_reqs.append(
+                            PostProcessingRequirement(
+                                field_name=field_name,
+                                mapped_field_name=result.selected_field or field_name,
+                                mutators=result.post_processing_mutators,
+                                applies_to="field",
+                            )
+                        )
                 else:
-                    # Remove field_mutators if all were optimized away
+                    # No other mutators, remove field_mutators from node
                     node.pop("field_mutators", None)
-                optimizations.extend(result.optimizations)
-            # Add post-processing requirements
-            if result.post_processing_mutators:
+                # Add post-processing requirement for the array comparison
                 post_processing_reqs.append(
                     PostProcessingRequirement(
                         field_name=field_name,
-                        mapped_field_name=result.selected_field or field_name,
-                        mutators=result.post_processing_mutators,
+                        mapped_field_name=field_name,
+                        mutators=[],  # No mutators, just operator-based filtering
                         applies_to="field",
-                        metadata={"operator": operator, "value": node.get("value")},
+                        metadata={
+                            "operator": array_operator,
+                            "comparison_operator": operator,
+                            "value": node.get("value"),
+                        },
                     )
                 )
+                # Array operators should not affect the OpenSearch query at all
+                # They are purely post-processing filters
+                # Store the original node info in the post-processing requirement
+                if post_processing_reqs and post_processing_reqs[-1].metadata is not None:
+                    post_processing_reqs[-1].metadata["original_node"] = {
+                        "type": "comparison",
+                        "field": field_name,
+                        "operator": operator,
+                        "value": node.get("value"),
+                        "field_mutators": [{"name": array_operator}],
+                    }
+                # Array operators should be completely removed from the OpenSearch query
+                # Mark this node for removal
+                node["_remove_from_query"] = True
+                # Don't mark the node for post-processing - let the query be generated normally
+                # The array operator is applied as a post-processing filter on top of the results
+                optimizations.append(
+                    f"Array operator '{array_operator}' with '{operator}' will be applied in post-processing"
+                )
+                # Skip the regular mutator processing that follows
+                return
+            else:
+                # Regular mutator processing
+                result = self._analyze_field_mutators(field_name, field_mutators, operator)
+                # For in-memory context, keep mutators in AST for evaluation
+                if self.context == "in_memory":
+                    # Don't remove mutators from AST for in-memory queries
+                    # They need to be applied during evaluation
+                    pass
+                else:
+                    # Update node with optimized mutators for OpenSearch context
+                    if result.optimized_mutators != field_mutators:
+                        if result.optimized_mutators:
+                            node["field_mutators"] = result.optimized_mutators
+                        else:
+                            # Remove field_mutators if all were optimized away
+                            node.pop("field_mutators", None)
+                        optimizations.extend(result.optimizations)
+                # Add post-processing requirements
+                if result.post_processing_mutators:
+                    # For in-memory context, we need special handling
+                    if self.context == "in_memory":
+                        # Check if any mutators are transform mutators that need to be applied to results
+                        transform_mutators = []
+                        for mutator in result.post_processing_mutators:
+                            mutator_name = mutator.get("name", "").lower()
+                            # Transform mutators that modify the result
+                            if mutator_name in [
+                                "split",
+                                "lowercase",
+                                "uppercase",
+                                "trim",
+                                "replace",
+                                "refang",
+                                "defang",
+                            ]:
+                                transform_mutators.append(mutator)
+                        # If we have transform mutators, add them as post-processing for result transformation
+                        if transform_mutators:
+                            post_processing_reqs.append(
+                                PostProcessingRequirement(
+                                    field_name=field_name,
+                                    mapped_field_name=field_name,
+                                    mutators=transform_mutators,
+                                    applies_to="field",
+                                    metadata={"transform_only": True},  # Mark as transform-only
+                                )
+                            )
+                    else:
+                        # Always include operator and value in metadata for post-processing filtering
+                        metadata = {"operator": operator, "value": node.get("value")}
+                        # Include original comparison info if it exists
+                        if node.get("_original_comparison"):
+                            metadata["_original_comparison"] = node["_original_comparison"]
+                        post_processing_reqs.append(
+                            PostProcessingRequirement(
+                                field_name=field_name,
+                                mapped_field_name=result.selected_field or field_name,
+                                mutators=result.post_processing_mutators,
+                                applies_to="field",
+                                metadata=metadata,
+                            )
+                        )
+                    # Check if we have transform mutators with filtering operators
+                    # These need special handling in query conversion
+                    TRANSFORM_MUTATORS = {
+                        "lowercase",
+                        "uppercase",
+                        "trim",
+                        "replace",
+                        "refang",
+                        "defang",
+                        "b64encode",
+                        "b64decode",
+                        "urldecode",
+                    }
+                    has_transform_with_filter = False
+                    for mutator in result.post_processing_mutators:
+                        if mutator.get("name", "").lower() in TRANSFORM_MUTATORS:
+                            has_transform_with_filter = True
+                            break
+                    if has_transform_with_filter and operator in [
+                        "eq",
+                        "=",
+                        "ne",
+                        "!=",
+                        "contains",
+                        "not_contains",
+                        "startswith",
+                        "endswith",
+                        "not_startswith",
+                        "not_endswith",
+                        ">",
+                        ">=",
+                        "<",
+                        "<=",
+                        "gt",
+                        "gte",
+                        "lt",
+                        "lte",
+                        "between",
+                        "not_between",
+                    ]:
+                        # Mark the node so query converter knows to use exists query
+                        node["has_transform_mutators_with_filter"] = True
                 # Check if any mutators change the field type
                 has_type_changing_mutator = any(
                     mutator.get("name", "").lower()
@@ -374,6 +871,12 @@ class MutatorAnalyzer:
                     # Also mark if we have type-changing mutators
                     if has_type_changing_mutator:
                         node["has_type_changing_mutators"] = True
+                        # For in-memory queries with type-changing mutators, DON'T convert to exists check
+                        # The mutators should be applied during evaluation
+                        if self.context == "in_memory":
+                            # Keep the original comparison intact for in-memory evaluation
+                            pass
                 elif has_type_changing_mutator:
                     # For type-changing mutators with numeric operators, mark for special handling
                     node["has_type_changing_mutators"] = True
@@ -385,53 +888,11 @@ class MutatorAnalyzer:
             if result.selected_field and result.selected_field != field_name:
                 node["field"] = result.selected_field
-        # Check if operator requires post-processing (e.g., ALL operator on arrays)
-        if operator in ["all", "not_all"]:
-            # These operators need post-processing for array fields
-            post_processing_reqs.append(
-                PostProcessingRequirement(
-                    field_name=field_name,
-                    mapped_field_name=field_name,
-                    mutators=[],  # No mutators, just operator-based filtering
-                    applies_to="field",
-                    metadata={"operator": operator, "value": node.get("value")},
-                )
-            )
-            # Mark for special handling in OpenSearch
-            node["post_process_value"] = True
-        # Analyze value mutators (these are typically post-processing)
-        if value_mutators:
-            post_processing_value_mutators = []
-            for mutator in value_mutators:
-                mutator_name = mutator.get("name", "").lower()
-                classification = MUTATOR_CLASSIFICATIONS.get(mutator_name, MutatorType.POST_PROCESSABLE)
-                if classification in [MutatorType.POST_PROCESSABLE, MutatorType.CONDITIONAL]:
-                    post_processing_value_mutators.append(mutator)
-            if post_processing_value_mutators:
-                post_processing_reqs.append(
-                    PostProcessingRequirement(
-                        field_name=field_name,
-                        mapped_field_name=field_name,  # Value mutators don't affect field mapping
-                        mutators=post_processing_value_mutators,
-                        applies_to="value",
-                    )
-                )
+        # Note: ALL and NOT_ALL operators are handled during evaluation, not post-processing
-                # For value mutators on equality operations, we need to make the query less restrictive
-                # This allows post-processing to correctly filter results
-                if operator in ["eq", "=", "ne", "!="]:
-                    # Mark the node to indicate it needs special handling in OpenSearch
-                    node["post_process_value"] = True
-                    # Keep the original value for reference
-                    node["original_value"] = node.get("value")
-                # Remove value mutators from AST since they'll be post-processed
-                node.pop("value_mutators", None)
-                optimizations.append(f"Moved {len(post_processing_value_mutators)} value mutator(s) to post-processing")
+        # Value mutators are handled during evaluation, not post-processing
+        # The evaluator's _evaluate_comparison method applies value mutators before comparison
+        # So we don't need to treat them as post-processing requirements
     def _analyze_collection_node(
         self,
@@ -450,7 +911,6 @@ class MutatorAnalyzer:
         """
         field_name = node.get("field")
         field_mutators = node.get("field_mutators", [])
-        value_mutators = node.get("value_mutators", [])
         if not field_name:
             return
@@ -487,21 +947,14 @@ class MutatorAnalyzer:
                     f"post-processing for collection operation"
                 )
-        if value_mutators:
-            # Value mutators always go to post-processing for collection operations
-            post_processing_reqs.append(
-                PostProcessingRequirement(
-                    field_name=field_name, mapped_field_name=field_name, mutators=value_mutators, applies_to="value"
-                )
-            )
-            node.pop("value_mutators", None)
-            optimizations.append(
-                f"Moved {len(value_mutators)} value mutator(s) to post-processing for collection operation"
-            )
+        # Value mutators are handled during evaluation for collection operations too
+        # The evaluator applies them before comparison in _evaluate_collection_comparison
     def _evaluate_health_for_context(  # noqa: C901
-        self, post_processing_requirements: List[PostProcessingRequirement], context: str
+        self,
+        post_processing_requirements: List[PostProcessingRequirement],
+        context: str,
+        ast: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, Any]:
         """Evaluate health status based on context and mutator performance characteristics.
@@ -518,9 +971,10 @@ class MutatorAnalyzer:
         slow_mutators = []
         all_mutators = []
-        # Collect all mutators and their performance classes
-        for req in post_processing_requirements:
-            for mutator_spec in req.mutators:
+        # Helper function to process mutators
+        def process_mutators(mutator_list):
+            nonlocal fast_count, moderate_count, slow_count
+            for mutator_spec in mutator_list:
                 mutator_name = mutator_spec.get("name", "")
                 all_mutators.append(mutator_name)
@@ -540,6 +994,28 @@ class MutatorAnalyzer:
                     # If we can't create the mutator, assume moderate performance
                     moderate_count += 1
+        # Collect all mutators from post-processing requirements
+        for req in post_processing_requirements:
+            process_mutators(req.mutators)
+        # For in_memory context with AST, also collect mutators from the AST
+        if context == "in_memory" and ast:
+            def collect_ast_mutators(node):
+                if isinstance(node, dict):
+                    # Check for field mutators
+                    if "field_mutators" in node:
+                        process_mutators(node["field_mutators"])
+                    # Check for value mutators
+                    if "value_mutators" in node:
+                        process_mutators(node["value_mutators"])
+                    # Recurse into child nodes
+                    for key, value in node.items():
+                        if key in ["left", "right", "operand"]:
+                            collect_ast_mutators(value)
+            collect_ast_mutators(ast)
         # Determine health status based on context
         health_status = "green"
         health_reasons = []
@@ -692,85 +1168,19 @@ class FieldMutatorAnalyzer:
         self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
     ) -> "MutatorOptimizationResult":
         """Try to optimize a lowercase mutator using field mappings."""
-        # Check if we have a text field with lowercase analyzer
-        lowercase_field = field_mapping.text_fields.get("lowercase")
-        standard_field = field_mapping.text_fields.get("standard")
-        if lowercase_field:
-            # Perfect match - we have a lowercase analyzer
-            return MutatorOptimizationResult(
-                can_optimize=True,
-                selected_field=lowercase_field,
-                post_process_mutator=None,
-                optimization_description=f"Using field '{lowercase_field}' with lowercase analyzer instead of mutator",
-            )
-        elif standard_field:
-            # Standard analyzer might handle lowercase - use it but also post-process
-            return MutatorOptimizationResult(
-                can_optimize=False,
-                selected_field=standard_field,
-                post_process_mutator=mutator,
-                optimization_description=f"Using text field '{standard_field}' but post-processing lowercase mutator",
-            )
-        elif field_mapping.keyword_field:
-            # Only keyword field available - check operator compatibility
-            if operator in [
-                "eq",
-                "=",
-                "ne",
-                "!=",
-                "in",
-                "not_in",
-                "contains",
-                "not_contains",
-                "startswith",
-                "endswith",
-                "not_startswith",
-                "not_endswith",
-            ]:
-                # These operators will work with post-processing
-                return MutatorOptimizationResult(
-                    can_optimize=False,
-                    selected_field=field_mapping.keyword_field,
-                    post_process_mutator=mutator,
-                    optimization_description=f"Using keyword field '{field_mapping.keyword_field}' "
-                    f"with post-processing",
-                    health_issue={
-                        "status": "yellow",
-                        "query_part": f"{field_mapping.base_field_name} | lowercase",
-                        "reason": "Keyword field used with lowercase mutator requires post-processing",
-                    },
-                )
-            else:
-                # Range operators don't make sense with lowercase
-                return MutatorOptimizationResult(
-                    can_optimize=False,
-                    selected_field=None,
-                    post_process_mutator=None,
-                    optimization_description="",
-                    health_issue={
-                        "status": "red",
-                        "query_part": f"{field_mapping.base_field_name} | lowercase {operator}",
-                        "reason": (
-                            f"Field '{field_mapping.base_field_name}' does not support case-insensitive "
-                            f"searching with operator '{operator}'. Available: {field_mapping.keyword_field} (keyword)"
-                        ),
-                    },
-                )
-        else:
-            # No suitable fields
-            return MutatorOptimizationResult(
-                can_optimize=False,
-                selected_field=None,
-                post_process_mutator=mutator,
-                optimization_description="No suitable field mappings for lowercase optimization",
-            )
+        # Per requirement: lowercase should always be post-processing
+        # Even if we have a lowercase analyzer field, we don't optimize
+        return MutatorOptimizationResult(
+            can_optimize=False,
+            selected_field=None,
+            post_process_mutator=mutator,
+            optimization_description="Lowercase mutator always requires post-processing",
+        )
     def _optimize_uppercase_mutator(
         self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
     ) -> "MutatorOptimizationResult":
         """Try to optimize an uppercase mutator using field mappings."""
-        # Check if we actually have an uppercase analyzer
         # We need to check the text_fields dict directly to ensure we have the specific analyzer
         if "uppercase" in field_mapping.text_fields:
             uppercase_field = field_mapping.text_fields["uppercase"]
@@ -793,26 +1203,14 @@ class FieldMutatorAnalyzer:
         self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
     ) -> "MutatorOptimizationResult":
         """Try to optimize a trim mutator using field mappings."""
-        # Check if any text field might handle trimming
-        # Most analyzers include trimming by default, but we can't be sure
-        text_field = field_mapping.text_fields.get("standard")
-        if text_field:
-            # Assume standard analyzer handles trimming (common case)
-            return MutatorOptimizationResult(
-                can_optimize=True,
-                selected_field=text_field,
-                post_process_mutator=None,
-                optimization_description=f"Assuming field '{text_field}' analyzer handles trimming",
-            )
-        else:
-            # No text field - requires post-processing
-            return MutatorOptimizationResult(
-                can_optimize=False,
-                selected_field=None,
-                post_process_mutator=mutator,
-                optimization_description="No text field available for trim optimization",
-            )
+        # Trim should always require post-processing to ensure consistent behavior
+        # We can't reliably know if an analyzer trims whitespace
+        return MutatorOptimizationResult(
+            can_optimize=False,
+            selected_field=None,
+            post_process_mutator=mutator,
+            optimization_description="Trim mutator always requires post-processing",
+        )
 @dataclass

tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl