PyPI - tellaro-query-language - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

tellaro-query-language 0.2.2py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

tellaro_query_language-0.2.5.dist-info/LICENSE +72 -0
tellaro_query_language-0.2.5.dist-info/METADATA +806 -0
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/RECORD +25 -22
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/entry_points.txt +1 -0
tql/__init__.py +1 -1
tql/cache/base.py +79 -7
tql/cache/memory.py +126 -18
tql/cli.py +484 -0
tql/core.py +261 -5
tql/core_components/opensearch_operations.py +23 -4
tql/evaluator.py +3 -1
tql/evaluator_components/special_expressions.py +62 -10
tql/evaluator_components/value_comparison.py +70 -12
tql/exceptions.py +6 -4
tql/field_type_inference.py +285 -0
tql/mutator_analyzer.py +2 -2
tql/mutators/geo.py +57 -20
tql/opensearch_components/query_converter.py +1 -1
tql/opensearch_stats.py +10 -7
tql/parser.py +56 -21
tql/post_processor.py +44 -11
tql/scripts.py +19 -2
tql/stats_evaluator.py +361 -7
tql/streaming_file_processor.py +335 -0
tellaro_query_language-0.2.2.dist-info/LICENSE +0 -21
tellaro_query_language-0.2.2.dist-info/METADATA +0 -433
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/WHEEL +0 -0

tql/core.py CHANGED Viewed

@@ -4,12 +4,19 @@ This module provides the main TQL class that serves as the primary interface
 for parsing and executing TQL queries against different backends.
 """
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, Generator, List, Optional, Union
 from .analyzer import EnhancedFieldMapping
 from .core_components import FileOperations, OpenSearchOperations, StatsOperations, ValidationOperations
 from .evaluator import TQLEvaluator
-from .exceptions import TQLOperatorError, TQLParseError, TQLSyntaxError, TQLTypeError, TQLValidationError
+from .exceptions import (
+    TQLExecutionError,
+    TQLOperatorError,
+    TQLParseError,
+    TQLSyntaxError,
+    TQLTypeError,
+    TQLValidationError,
+)
 from .mutator_analyzer import MutatorAnalysisResult
 from .parser import TQLParser
 from .stats_evaluator import TQLStatsEvaluator
@@ -27,7 +34,7 @@ class TQL:
         >>> results = tql.query(data, query)
     """
-    def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
+    def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):  # noqa: C901
         """Initialize TQL instance.
         Args:
@@ -100,8 +107,26 @@ class TQL:
                     # This is an OpenSearch-style mapping, map field to itself
                     self._simple_mappings[k] = k
                 else:
-                    # Extract the first key as the simple mapping
-                    self._simple_mappings[k] = next(iter(v.keys()))
+                    # Intelligent field mapping extraction for complex mappings
+                    # Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
+                    if k in v:
+                        # Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
+                        self._simple_mappings[k] = k
+                    else:
+                        # Find primary field (keys without dots, not starting with underscore)
+                        primary_fields = [
+                            field_key
+                            for field_key in v.keys()
+                            if "." not in field_key and not field_key.startswith("_")
+                        ]
+                        if primary_fields:
+                            # Use first primary field
+                            self._simple_mappings[k] = primary_fields[0]
+                        else:
+                            # Fallback to first key (maintain backward compatibility)
+                            self._simple_mappings[k] = next(iter(v.keys()))
             else:
                 # Default to mapping field to itself
                 self._simple_mappings[k] = k
@@ -1032,6 +1057,237 @@ class TQL:
         """
         return self.stats_ops.analyze_stats_query(query)
+    def query_file_streaming(
+        self,
+        file_path: str,
+        query: str,
+        input_format: str = "auto",
+        csv_delimiter: str = ",",
+        csv_headers: Optional[List[str]] = None,
+        no_header: bool = False,
+        field_types: Optional[Dict[str, str]] = None,
+        sample_size: int = 100,
+    ) -> Generator[Dict[str, Any], None, None]:
+        """Execute a TQL query against a file in streaming mode.
+        This method processes files line-by-line with minimal memory usage,
+        yielding matching records as they are found.
+        Args:
+            file_path: Path to file
+            query: TQL query string (filter query only, not stats)
+            input_format: File format ('json', 'jsonl', 'csv', 'auto')
+            csv_delimiter: CSV delimiter character
+            csv_headers: Manual CSV header names
+            no_header: Force CSV to be treated as having no header
+            field_types: Manual field type mappings
+            sample_size: Number of records to sample for type inference
+        Yields:
+            Matching records as dictionaries
+        Raises:
+            TQLParseError: If query parsing fails
+            TQLExecutionError: If file processing fails
+        """
+        from .streaming_file_processor import StreamingFileProcessor
+        # Parse the query
+        ast = self.parse(query)
+        # Validate query type (only filter queries supported for streaming)
+        query_type = ast.get("type")
+        if query_type in ["stats_expr", "query_with_stats"]:
+            raise TQLExecutionError("Stats queries not supported in streaming mode. Use query_file_stats() instead.")
+        # Create streaming processor
+        processor = StreamingFileProcessor(
+            sample_size=sample_size,
+            csv_delimiter=csv_delimiter,
+            field_types=field_types,
+            csv_headers=csv_headers,
+            no_header=no_header,
+        )
+        # Process file and evaluate query on each record
+        for record in processor.process_file(file_path, input_format):
+            if self.evaluator._evaluate_node(ast, record, self._simple_mappings):
+                yield record
+    def query_file_stats(
+        self,
+        file_path: str,
+        query: str,
+        input_format: str = "auto",
+        csv_delimiter: str = ",",
+        csv_headers: Optional[List[str]] = None,
+        no_header: bool = False,
+        field_types: Optional[Dict[str, str]] = None,
+        sample_size: int = 100,
+    ) -> Dict[str, Any]:
+        """Execute a TQL stats query against a file in streaming mode.
+        This method processes files line-by-line with accumulator-based stats
+        calculations for memory efficiency.
+        Args:
+            file_path: Path to file
+            query: TQL query string (can include filters and stats)
+            input_format: File format ('json', 'jsonl', 'csv', 'auto')
+            csv_delimiter: CSV delimiter character
+            csv_headers: Manual CSV header names
+            no_header: Force CSV to be treated as having no header
+            field_types: Manual field type mappings
+            sample_size: Number of records to sample for type inference
+        Returns:
+            Dictionary containing aggregation results
+        Raises:
+            TQLParseError: If query parsing fails
+            TQLExecutionError: If file processing fails
+        """
+        from .streaming_file_processor import StreamingFileProcessor
+        # Parse the query
+        ast = self.parse(query)
+        query_type = ast.get("type")
+        # Create streaming processor
+        processor = StreamingFileProcessor(
+            sample_size=sample_size,
+            csv_delimiter=csv_delimiter,
+            field_types=field_types,
+            csv_headers=csv_headers,
+            no_header=no_header,
+        )
+        # Handle different query types
+        if query_type == "stats_expr":
+            # Pure stats query - process all records
+            record_iter = processor.process_file(file_path, input_format)
+            return self.stats_evaluator.evaluate_stats_streaming(record_iter, ast, self.field_mappings)
+        elif query_type == "query_with_stats":
+            # Filter + stats query
+            filter_ast = ast["filter"]
+            stats_ast = ast["stats"]
+            # Create filtered iterator
+            def filtered_records():
+                for record in processor.process_file(file_path, input_format):
+                    if self.evaluator._evaluate_node(filter_ast, record, self._simple_mappings):
+                        yield record
+            return self.stats_evaluator.evaluate_stats_streaming(filtered_records(), stats_ast, self.field_mappings)
+        else:
+            # Regular filter query - shouldn't use stats method
+            raise TQLExecutionError("Use query_file_streaming() for filter queries without stats aggregations.")
+    def query_folder(
+        self,
+        folder_path: str,
+        query: str,
+        pattern: str = "*",
+        input_format: str = "auto",
+        recursive: bool = False,
+        parallel: int = 4,
+        csv_delimiter: str = ",",
+        csv_headers: Optional[List[str]] = None,
+        no_header: bool = False,
+        field_types: Optional[Dict[str, str]] = None,
+        sample_size: int = 100,
+    ) -> Dict[str, Any]:
+        """Execute a TQL query against multiple files in a folder.
+        This method processes all matching files and aggregates results,
+        supporting both filter queries (with records) and stats queries.
+        Args:
+            folder_path: Path to folder
+            query: TQL query string
+            pattern: Glob pattern for file matching
+            input_format: File format ('json', 'jsonl', 'csv', 'auto')
+            recursive: Process subdirectories recursively
+            parallel: Number of parallel workers
+            csv_delimiter: CSV delimiter character
+            csv_headers: Manual CSV header names
+            no_header: Force CSV to be treated as having no header
+            field_types: Manual field type mappings
+            sample_size: Number of records to sample for type inference
+        Returns:
+            Dictionary containing results and/or stats aggregated across all files
+        Raises:
+            TQLParseError: If query parsing fails
+            TQLExecutionError: If folder processing fails
+        """
+        from .streaming_file_processor import StreamingFileProcessor
+        # Parse the query
+        ast = self.parse(query)
+        query_type = ast.get("type")
+        # Create streaming processor
+        processor = StreamingFileProcessor(
+            sample_size=sample_size,
+            csv_delimiter=csv_delimiter,
+            field_types=field_types,
+            csv_headers=csv_headers,
+            no_header=no_header,
+        )
+        # Process folder based on query type
+        if query_type == "stats_expr":
+            # Pure stats query - aggregate across all files
+            def all_records():
+                for _file_path, record in processor.process_folder(
+                    folder_path, pattern, input_format, recursive, parallel
+                ):
+                    yield record
+            stats_result = self.stats_evaluator.evaluate_stats_streaming(all_records(), ast, self.field_mappings)
+            return {"stats": stats_result, "files_processed": "multiple"}
+        elif query_type == "query_with_stats":
+            # Filter + stats query
+            filter_ast = ast["filter"]
+            stats_ast = ast["stats"]
+            def filtered_records():
+                for _file_path, record in processor.process_folder(
+                    folder_path, pattern, input_format, recursive, parallel
+                ):
+                    if self.evaluator._evaluate_node(filter_ast, record, self._simple_mappings):
+                        yield record
+            stats_result = self.stats_evaluator.evaluate_stats_streaming(
+                filtered_records(), stats_ast, self.field_mappings
+            )
+            return {"stats": stats_result, "files_processed": "multiple"}
+        else:
+            # Regular filter query - collect matching records from all files
+            matched_records = []
+            files_processed = 0
+            files_with_matches = 0
+            for file_path, record in processor.process_folder(folder_path, pattern, input_format, recursive, parallel):
+                files_processed += 1
+                if self.evaluator._evaluate_node(ast, record, self._simple_mappings):
+                    matched_records.append({"_source_file": file_path, **record})
+                    files_with_matches += 1
+            return {
+                "results": matched_records,
+                "total": len(matched_records),
+                "files_processed": files_processed,
+                "files_with_matches": files_with_matches,
+            }
     def _apply_mutators_to_record(self, ast: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]:
         """Apply any mutators in the AST to enrich the record.

tql/core_components/opensearch_operations.py CHANGED Viewed

@@ -239,7 +239,7 @@ class OpenSearchOperations:
             analysis_result = self.analyze_opensearch_query(query)
             has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
             needs_post_processing_for_stats = (
-                has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
+                has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False  # type: ignore[union-attr]
             )
             # Handle stats queries differently
@@ -258,7 +258,7 @@ class OpenSearchOperations:
                 if filter_ast:
                     # Use the optimized AST if we have mutators
                     if has_mutators and needs_post_processing_for_stats:
-                        filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
+                        filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]  # type: ignore[union-attr]
                     else:
                         filter_query = backend.convert(filter_ast)["query"]
                 else:
@@ -529,6 +529,8 @@ class OpenSearchOperations:
                 stats_evaluator = TQLStatsEvaluator()
                 # Execute the stats aggregation in memory
+                if stats_ast_for_post_processing is None:
+                    raise ValueError("Stats AST is None but phase2 processing was requested")
                 stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
                 # Format response for stats-only (no documents)
@@ -547,7 +549,7 @@ class OpenSearchOperations:
                     "performance_impact": {
                         "overhead_ms": 0,  # Would need timing to calculate
                         "documents_processed": len(all_documents),
-                        "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
+                        "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,  # type: ignore[union-attr]
                     },
                     "opensearch_query": complete_opensearch_query,
                 }
@@ -580,6 +582,8 @@ class OpenSearchOperations:
                 translator = OpenSearchStatsTranslator()
                 # Transform the response using the translator
+                if stats_ast is None:
+                    raise ValueError("Stats AST is None but grouping was detected")
                 transformed_response = translator.transform_response(response, stats_ast)
                 # The transformed response already has the correct structure
@@ -925,6 +929,21 @@ class OpenSearchOperations:
         # Get opensearch total before filtering
         opensearch_total = total_hits
+        # Track optimization features used in this query
+        optimizations_applied = []
+        if scan_all:
+            optimizations_applied.append("scroll_api")
+        if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
+            optimizations_applied.append("auto_pagination")
+        if request_cache:
+            optimizations_applied.append("request_cache")
+        if preference:
+            optimizations_applied.append("preference_routing")
+        if routing:
+            optimizations_applied.append("custom_routing")
+        if terminate_after:
+            optimizations_applied.append("early_termination")
         result = {
             "results": results,
             "total": len(results),
@@ -934,7 +953,7 @@ class OpenSearchOperations:
             "health_status": health_status,
             "health_reasons": health_reasons,
             "performance_impact": performance_impact,
-            "optimizations_applied": [],  # TODO: Track actual optimizations  # noqa: W0511
+            "optimizations_applied": optimizations_applied,
             "opensearch_query": (
                 complete_opensearch_query if "complete_opensearch_query" in locals() else {}
             ),  # Include the full query body

tql/evaluator.py CHANGED Viewed

@@ -67,7 +67,7 @@ class TQLEvaluator:
         field_mappings = field_mappings or {}
         return self._evaluate_node(ast, record, field_mappings)
-    def _evaluate_node(self, node: Any, record: Dict[str, Any], field_mappings: Dict[str, str]) -> bool:
+    def _evaluate_node(self, node: Any, record: Dict[str, Any], field_mappings: Dict[str, str]) -> bool:  # noqa: C901
         """Evaluate a single AST node against a record.
         Args:
@@ -350,6 +350,8 @@ class TQLEvaluator:
                 return left_missing or right_missing
             elif node_type == "unary_op":
                 # Don't recurse through NOT operators - they handle missing fields themselves
+                # The NOT operator has special logic at lines 213-254 that handles missing fields correctly
+                # Recursing here would cause double-handling and incorrect results
                 return False
             elif node_type == "collection_op":
                 field_name = node["field"]

tql/evaluator_components/special_expressions.py CHANGED Viewed

@@ -15,15 +15,27 @@ class SpecialExpressionEvaluator:
     # Sentinel value to distinguish missing fields from None values
     _MISSING_FIELD = object()
-    def __init__(self, get_field_value_func, evaluate_node_func):
+    def __init__(self, get_field_value_func, evaluate_node_func, set_field_value_func=None):
         """Initialize the special expression evaluator.
         Args:
             get_field_value_func: Function to get field values from records
             evaluate_node_func: Function to evaluate AST nodes
+            set_field_value_func: Optional function to set field values in records
         """
         self._get_field_value = get_field_value_func
         self._evaluate_node = evaluate_node_func
+        self._set_field_value = set_field_value_func or self._default_set_field_value
+    def _default_set_field_value(self, record: Dict[str, Any], field_path: str, value: Any) -> None:
+        """Default implementation of set_field_value for nested field assignment."""
+        parts = field_path.split(".")
+        current = record
+        for part in parts[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+        current[parts[-1]] = value
     def evaluate_geo_expr(  # noqa: C901
         self, node: Dict[str, Any], record: Dict[str, Any], field_mappings: Dict[str, str]
@@ -106,19 +118,26 @@ class SpecialExpressionEvaluator:
                     elif "as" in record:
                         geo_data["as"] = record["as"]
         else:
-            # Default locations
+            # Default locations (ECS style)
             if "." in actual_field:
-                # For nested fields like destination.ip, check destination.geo
+                # For nested fields like destination.ip, check destination.geo and destination.as
                 parent_path = actual_field.rsplit(".", 1)[0]
                 parent = self._get_field_value(record, parent_path)
-                if isinstance(parent, dict) and "geo" in parent:
-                    # Found geo data under parent
-                    geo_data = parent
+                if isinstance(parent, dict) and ("geo" in parent or "as" in parent):
+                    # Found geo/as data under parent
+                    geo_data = {}
+                    if "geo" in parent:
+                        geo_data["geo"] = parent["geo"]
+                    if "as" in parent:
+                        geo_data["as"] = parent["as"]
             else:
-                # For top-level fields, check enrichment.geo
-                if "enrichment" in record and isinstance(record["enrichment"], dict):
-                    if "geo" in record["enrichment"]:
-                        geo_data = record["enrichment"]
+                # For top-level fields like ip, check top-level geo and as fields (ECS style)
+                if "geo" in record or "as" in record:
+                    geo_data = {}
+                    if "geo" in record:
+                        geo_data["geo"] = record["geo"]
+                    if "as" in record:
+                        geo_data["as"] = record["as"]
         # Check if we should use existing geo data or force a new lookup
         force_lookup = geo_params.get("force", False)
@@ -148,6 +167,39 @@ class SpecialExpressionEvaluator:
             # Apply geo lookup
             geo_data = apply_mutators(field_value, [geo_mutator], actual_field, record)
+        # Always include enrichment in query results (save=True adds to record for output)
+        # Note: This does not modify source files - enrichment only appears in query results
+        save_enrichment = geo_params.get("save", True)
+        if save_enrichment and geo_data and isinstance(geo_data, dict):
+            # Determine where to save the enrichment
+            if custom_field:
+                # Save to custom field location
+                self._set_field_value(record, custom_field, geo_data.get("geo"))
+                if "as" in geo_data:
+                    # Save AS data as sibling to geo field
+                    if "." in custom_field:
+                        as_parent_path = custom_field.rsplit(".", 1)[0]
+                        parent = self._get_field_value(record, as_parent_path)
+                        if isinstance(parent, dict):
+                            parent["as"] = geo_data["as"]
+                    else:
+                        record["as"] = geo_data["as"]
+            elif "." in actual_field:
+                # For nested fields like destination.ip, save to destination.geo and destination.as (ECS style)
+                parent_path = actual_field.rsplit(".", 1)[0]
+                parent = self._get_field_value(record, parent_path)
+                if isinstance(parent, dict):
+                    if "geo" in geo_data:
+                        parent["geo"] = geo_data["geo"]
+                    if "as" in geo_data:
+                        parent["as"] = geo_data["as"]
+            else:
+                # For top-level fields like ip, save to top-level geo and as fields (ECS style)
+                if "geo" in geo_data:
+                    record["geo"] = geo_data["geo"]
+                if "as" in geo_data:
+                    record["as"] = geo_data["as"]
         # Now evaluate the conditions against the geo data
         if conditions:
             # Handle None geo_data (e.g., private IPs or lookup failures)

tql/evaluator_components/value_comparison.py CHANGED Viewed

@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
 import ipaddress
 import re
+from functools import lru_cache
 from typing import Any
@@ -15,6 +16,23 @@ class ValueComparator:
     # Sentinel value to distinguish missing fields from None values
     _MISSING_FIELD = object()
+    @staticmethod
+    @lru_cache(maxsize=256)
+    def _compile_regex(pattern: str) -> re.Pattern:
+        """Compile and cache regex patterns for performance.
+        Args:
+            pattern: Regex pattern string
+        Returns:
+            Compiled regex pattern
+        Note:
+            Uses LRU cache with max 256 patterns. This significantly improves
+            performance when the same regex patterns are used repeatedly in queries.
+        """
+        return re.compile(pattern)
     def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool:  # noqa: C901
         """Compare a field value against an expected value using the given operator.
@@ -49,9 +67,17 @@ class ValueComparator:
                 return False
         # Handle None field values (field exists but is None)
+        # IMPORTANT: None is a valid value, distinct from missing fields.
+        # For 'exists' operator: This code path should NOT be reached because 'exists'
+        # checks field presence in the record, not the value. The evaluator handles
+        # 'exists' before calling compare_values. If we reach here with None, it means
+        # the field exists but has None value, which should NOT match 'exists'.
         if field_value is None:
             if operator in ["exists"]:
-                return True  # Field exists, even if value is None
+                # Field key exists in record but value is None
+                # Semantics: 'exists' means "field has a non-null value"
+                # This matches database behavior where NULL != EXISTS
+                return False  # None value does not satisfy 'exists'
             elif operator in ["is"]:
                 # Check for null comparison - expected_value can be None or "null"
                 return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
@@ -68,6 +94,20 @@ class ValueComparator:
         if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
             field_value = field_value.lower() == "true"
+        # Type compatibility check for numeric operators
+        # If operator requires numeric comparison, both values must be numeric
+        # Exception: Arrays are handled specially in the operator logic below
+        if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
+            # Skip check if field_value is an array - handled by array logic below
+            if not isinstance(field_value, (list, tuple)):
+                field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
+                expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
+                if not (field_is_numeric and expected_is_numeric):
+                    # At least one value failed numeric conversion
+                    # Cannot perform numeric comparison - return False
+                    return False
         try:
             if operator in ["eq", "="]:
                 # Handle array fields - check if ANY element equals expected value
@@ -104,27 +144,30 @@ class ValueComparator:
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
                 # Handle list fields by checking if ANY element contains the expected value
+                # Case-insensitive comparison to match post-processor behavior
                 if isinstance(field_value, list):
                     # For arrays, check if ANY element contains the expected value
-                    return any(str(expected_value) in str(elem) for elem in field_value)
+                    return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
                 else:
-                    return str(expected_value) in str(field_value)
+                    return str(expected_value).lower() in str(field_value).lower()
             elif operator == "startswith":
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
                 # Handle array fields - check if ANY element starts with expected value
+                # Case-insensitive comparison to match post-processor behavior
                 if isinstance(field_value, (list, tuple)):
-                    return any(str(elem).startswith(str(expected_value)) for elem in field_value)
-                return str(field_value).startswith(str(expected_value))
+                    return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
+                return str(field_value).lower().startswith(str(expected_value).lower())
             elif operator == "endswith":
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
                 # Handle array fields - check if ANY element ends with expected value
+                # Case-insensitive comparison to match post-processor behavior
                 if isinstance(field_value, (list, tuple)):
-                    return any(str(elem).endswith(str(expected_value)) for elem in field_value)
-                return str(field_value).endswith(str(expected_value))
+                    return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
+                return str(field_value).lower().endswith(str(expected_value).lower())
             elif operator == "in":
                 if isinstance(expected_value, list):
                     if len(expected_value) == 1 and isinstance(field_value, list):
@@ -143,7 +186,13 @@ class ValueComparator:
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
-                return bool(re.search(str(expected_value), str(field_value)))
+                # Use cached regex compilation for performance
+                try:
+                    pattern = self._compile_regex(str(expected_value))
+                    return bool(pattern.search(str(field_value)))
+                except (re.error, TypeError):
+                    # Invalid regex pattern, fall back to no match
+                    return False
             elif operator == "cidr":
                 # Unwrap single-element lists for CIDR
                 if isinstance(expected_value, list) and len(expected_value) == 1:
@@ -194,22 +243,31 @@ class ValueComparator:
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
-                return str(expected_value) not in str(field_value)
+                # Case-insensitive comparison to match post-processor behavior
+                return str(expected_value).lower() not in str(field_value).lower()
             elif operator == "not_startswith":
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
-                return not str(field_value).startswith(str(expected_value))
+                # Case-insensitive comparison to match post-processor behavior
+                return not str(field_value).lower().startswith(str(expected_value).lower())
             elif operator == "not_endswith":
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
-                return not str(field_value).endswith(str(expected_value))
+                # Case-insensitive comparison to match post-processor behavior
+                return not str(field_value).lower().endswith(str(expected_value).lower())
             elif operator == "not_regexp":
                 # Unwrap single-element lists for string operators
                 if isinstance(expected_value, list) and len(expected_value) == 1:
                     expected_value = expected_value[0]
-                return not bool(re.search(str(expected_value), str(field_value)))
+                # Use cached regex compilation for performance
+                try:
+                    pattern = self._compile_regex(str(expected_value))
+                    return not bool(pattern.search(str(field_value)))
+                except (re.error, TypeError):
+                    # Invalid regex pattern, fall back to match (not regexp succeeds)
+                    return True
             elif operator == "not_cidr":
                 # Unwrap single-element lists for CIDR
                 if isinstance(expected_value, list) and len(expected_value) == 1:

tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

tellaro-query-language 0.2.2py3-none-any.whl → 0.2.5py3-none-any.whl