tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tql/core.py CHANGED
@@ -4,12 +4,19 @@ This module provides the main TQL class that serves as the primary interface
4
4
  for parsing and executing TQL queries against different backends.
5
5
  """
6
6
 
7
- from typing import Any, Dict, List, Optional, Union
7
+ from typing import Any, Dict, Generator, List, Optional, Union
8
8
 
9
9
  from .analyzer import EnhancedFieldMapping
10
10
  from .core_components import FileOperations, OpenSearchOperations, StatsOperations, ValidationOperations
11
11
  from .evaluator import TQLEvaluator
12
- from .exceptions import TQLOperatorError, TQLParseError, TQLSyntaxError, TQLTypeError, TQLValidationError
12
+ from .exceptions import (
13
+ TQLExecutionError,
14
+ TQLOperatorError,
15
+ TQLParseError,
16
+ TQLSyntaxError,
17
+ TQLTypeError,
18
+ TQLValidationError,
19
+ )
13
20
  from .mutator_analyzer import MutatorAnalysisResult
14
21
  from .parser import TQLParser
15
22
  from .stats_evaluator import TQLStatsEvaluator
@@ -27,7 +34,7 @@ class TQL:
27
34
  >>> results = tql.query(data, query)
28
35
  """
29
36
 
30
- def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
37
+ def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None): # noqa: C901
31
38
  """Initialize TQL instance.
32
39
 
33
40
  Args:
@@ -100,8 +107,26 @@ class TQL:
100
107
  # This is an OpenSearch-style mapping, map field to itself
101
108
  self._simple_mappings[k] = k
102
109
  else:
103
- # Extract the first key as the simple mapping
104
- self._simple_mappings[k] = next(iter(v.keys()))
110
+ # Intelligent field mapping extraction for complex mappings
111
+ # Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
112
+
113
+ if k in v:
114
+ # Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
115
+ self._simple_mappings[k] = k
116
+ else:
117
+ # Find primary field (keys without dots, not starting with underscore)
118
+ primary_fields = [
119
+ field_key
120
+ for field_key in v.keys()
121
+ if "." not in field_key and not field_key.startswith("_")
122
+ ]
123
+
124
+ if primary_fields:
125
+ # Use first primary field
126
+ self._simple_mappings[k] = primary_fields[0]
127
+ else:
128
+ # Fallback to first key (maintain backward compatibility)
129
+ self._simple_mappings[k] = next(iter(v.keys()))
105
130
  else:
106
131
  # Default to mapping field to itself
107
132
  self._simple_mappings[k] = k
@@ -1032,6 +1057,237 @@ class TQL:
1032
1057
  """
1033
1058
  return self.stats_ops.analyze_stats_query(query)
1034
1059
 
1060
+ def query_file_streaming(
1061
+ self,
1062
+ file_path: str,
1063
+ query: str,
1064
+ input_format: str = "auto",
1065
+ csv_delimiter: str = ",",
1066
+ csv_headers: Optional[List[str]] = None,
1067
+ no_header: bool = False,
1068
+ field_types: Optional[Dict[str, str]] = None,
1069
+ sample_size: int = 100,
1070
+ ) -> Generator[Dict[str, Any], None, None]:
1071
+ """Execute a TQL query against a file in streaming mode.
1072
+
1073
+ This method processes files line-by-line with minimal memory usage,
1074
+ yielding matching records as they are found.
1075
+
1076
+ Args:
1077
+ file_path: Path to file
1078
+ query: TQL query string (filter query only, not stats)
1079
+ input_format: File format ('json', 'jsonl', 'csv', 'auto')
1080
+ csv_delimiter: CSV delimiter character
1081
+ csv_headers: Manual CSV header names
1082
+ no_header: Force CSV to be treated as having no header
1083
+ field_types: Manual field type mappings
1084
+ sample_size: Number of records to sample for type inference
1085
+
1086
+ Yields:
1087
+ Matching records as dictionaries
1088
+
1089
+ Raises:
1090
+ TQLParseError: If query parsing fails
1091
+ TQLExecutionError: If file processing fails
1092
+ """
1093
+ from .streaming_file_processor import StreamingFileProcessor
1094
+
1095
+ # Parse the query
1096
+ ast = self.parse(query)
1097
+
1098
+ # Validate query type (only filter queries supported for streaming)
1099
+ query_type = ast.get("type")
1100
+ if query_type in ["stats_expr", "query_with_stats"]:
1101
+ raise TQLExecutionError("Stats queries not supported in streaming mode. Use query_file_stats() instead.")
1102
+
1103
+ # Create streaming processor
1104
+ processor = StreamingFileProcessor(
1105
+ sample_size=sample_size,
1106
+ csv_delimiter=csv_delimiter,
1107
+ field_types=field_types,
1108
+ csv_headers=csv_headers,
1109
+ no_header=no_header,
1110
+ )
1111
+
1112
+ # Process file and evaluate query on each record
1113
+ for record in processor.process_file(file_path, input_format):
1114
+ if self.evaluator._evaluate_node(ast, record, self._simple_mappings):
1115
+ yield record
1116
+
1117
+ def query_file_stats(
1118
+ self,
1119
+ file_path: str,
1120
+ query: str,
1121
+ input_format: str = "auto",
1122
+ csv_delimiter: str = ",",
1123
+ csv_headers: Optional[List[str]] = None,
1124
+ no_header: bool = False,
1125
+ field_types: Optional[Dict[str, str]] = None,
1126
+ sample_size: int = 100,
1127
+ ) -> Dict[str, Any]:
1128
+ """Execute a TQL stats query against a file in streaming mode.
1129
+
1130
+ This method processes files line-by-line with accumulator-based stats
1131
+ calculations for memory efficiency.
1132
+
1133
+ Args:
1134
+ file_path: Path to file
1135
+ query: TQL query string (can include filters and stats)
1136
+ input_format: File format ('json', 'jsonl', 'csv', 'auto')
1137
+ csv_delimiter: CSV delimiter character
1138
+ csv_headers: Manual CSV header names
1139
+ no_header: Force CSV to be treated as having no header
1140
+ field_types: Manual field type mappings
1141
+ sample_size: Number of records to sample for type inference
1142
+
1143
+ Returns:
1144
+ Dictionary containing aggregation results
1145
+
1146
+ Raises:
1147
+ TQLParseError: If query parsing fails
1148
+ TQLExecutionError: If file processing fails
1149
+ """
1150
+ from .streaming_file_processor import StreamingFileProcessor
1151
+
1152
+ # Parse the query
1153
+ ast = self.parse(query)
1154
+ query_type = ast.get("type")
1155
+
1156
+ # Create streaming processor
1157
+ processor = StreamingFileProcessor(
1158
+ sample_size=sample_size,
1159
+ csv_delimiter=csv_delimiter,
1160
+ field_types=field_types,
1161
+ csv_headers=csv_headers,
1162
+ no_header=no_header,
1163
+ )
1164
+
1165
+ # Handle different query types
1166
+ if query_type == "stats_expr":
1167
+ # Pure stats query - process all records
1168
+ record_iter = processor.process_file(file_path, input_format)
1169
+ return self.stats_evaluator.evaluate_stats_streaming(record_iter, ast, self.field_mappings)
1170
+
1171
+ elif query_type == "query_with_stats":
1172
+ # Filter + stats query
1173
+ filter_ast = ast["filter"]
1174
+ stats_ast = ast["stats"]
1175
+
1176
+ # Create filtered iterator
1177
+ def filtered_records():
1178
+ for record in processor.process_file(file_path, input_format):
1179
+ if self.evaluator._evaluate_node(filter_ast, record, self._simple_mappings):
1180
+ yield record
1181
+
1182
+ return self.stats_evaluator.evaluate_stats_streaming(filtered_records(), stats_ast, self.field_mappings)
1183
+
1184
+ else:
1185
+ # Regular filter query - shouldn't use stats method
1186
+ raise TQLExecutionError("Use query_file_streaming() for filter queries without stats aggregations.")
1187
+
1188
+ def query_folder(
1189
+ self,
1190
+ folder_path: str,
1191
+ query: str,
1192
+ pattern: str = "*",
1193
+ input_format: str = "auto",
1194
+ recursive: bool = False,
1195
+ parallel: int = 4,
1196
+ csv_delimiter: str = ",",
1197
+ csv_headers: Optional[List[str]] = None,
1198
+ no_header: bool = False,
1199
+ field_types: Optional[Dict[str, str]] = None,
1200
+ sample_size: int = 100,
1201
+ ) -> Dict[str, Any]:
1202
+ """Execute a TQL query against multiple files in a folder.
1203
+
1204
+ This method processes all matching files and aggregates results,
1205
+ supporting both filter queries (with records) and stats queries.
1206
+
1207
+ Args:
1208
+ folder_path: Path to folder
1209
+ query: TQL query string
1210
+ pattern: Glob pattern for file matching
1211
+ input_format: File format ('json', 'jsonl', 'csv', 'auto')
1212
+ recursive: Process subdirectories recursively
1213
+ parallel: Number of parallel workers
1214
+ csv_delimiter: CSV delimiter character
1215
+ csv_headers: Manual CSV header names
1216
+ no_header: Force CSV to be treated as having no header
1217
+ field_types: Manual field type mappings
1218
+ sample_size: Number of records to sample for type inference
1219
+
1220
+ Returns:
1221
+ Dictionary containing results and/or stats aggregated across all files
1222
+
1223
+ Raises:
1224
+ TQLParseError: If query parsing fails
1225
+ TQLExecutionError: If folder processing fails
1226
+ """
1227
+ from .streaming_file_processor import StreamingFileProcessor
1228
+
1229
+ # Parse the query
1230
+ ast = self.parse(query)
1231
+ query_type = ast.get("type")
1232
+
1233
+ # Create streaming processor
1234
+ processor = StreamingFileProcessor(
1235
+ sample_size=sample_size,
1236
+ csv_delimiter=csv_delimiter,
1237
+ field_types=field_types,
1238
+ csv_headers=csv_headers,
1239
+ no_header=no_header,
1240
+ )
1241
+
1242
+ # Process folder based on query type
1243
+ if query_type == "stats_expr":
1244
+ # Pure stats query - aggregate across all files
1245
+
1246
+ def all_records():
1247
+ for _file_path, record in processor.process_folder(
1248
+ folder_path, pattern, input_format, recursive, parallel
1249
+ ):
1250
+ yield record
1251
+
1252
+ stats_result = self.stats_evaluator.evaluate_stats_streaming(all_records(), ast, self.field_mappings)
1253
+ return {"stats": stats_result, "files_processed": "multiple"}
1254
+
1255
+ elif query_type == "query_with_stats":
1256
+ # Filter + stats query
1257
+ filter_ast = ast["filter"]
1258
+ stats_ast = ast["stats"]
1259
+
1260
+ def filtered_records():
1261
+ for _file_path, record in processor.process_folder(
1262
+ folder_path, pattern, input_format, recursive, parallel
1263
+ ):
1264
+ if self.evaluator._evaluate_node(filter_ast, record, self._simple_mappings):
1265
+ yield record
1266
+
1267
+ stats_result = self.stats_evaluator.evaluate_stats_streaming(
1268
+ filtered_records(), stats_ast, self.field_mappings
1269
+ )
1270
+ return {"stats": stats_result, "files_processed": "multiple"}
1271
+
1272
+ else:
1273
+ # Regular filter query - collect matching records from all files
1274
+ matched_records = []
1275
+ files_processed = 0
1276
+ files_with_matches = 0
1277
+
1278
+ for file_path, record in processor.process_folder(folder_path, pattern, input_format, recursive, parallel):
1279
+ files_processed += 1
1280
+ if self.evaluator._evaluate_node(ast, record, self._simple_mappings):
1281
+ matched_records.append({"_source_file": file_path, **record})
1282
+ files_with_matches += 1
1283
+
1284
+ return {
1285
+ "results": matched_records,
1286
+ "total": len(matched_records),
1287
+ "files_processed": files_processed,
1288
+ "files_with_matches": files_with_matches,
1289
+ }
1290
+
1035
1291
  def _apply_mutators_to_record(self, ast: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]:
1036
1292
  """Apply any mutators in the AST to enrich the record.
1037
1293
 
@@ -239,7 +239,7 @@ class OpenSearchOperations:
239
239
  analysis_result = self.analyze_opensearch_query(query)
240
240
  has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
241
241
  needs_post_processing_for_stats = (
242
- has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
242
+ has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False # type: ignore[union-attr]
243
243
  )
244
244
 
245
245
  # Handle stats queries differently
@@ -258,7 +258,7 @@ class OpenSearchOperations:
258
258
  if filter_ast:
259
259
  # Use the optimized AST if we have mutators
260
260
  if has_mutators and needs_post_processing_for_stats:
261
- filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
261
+ filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"] # type: ignore[union-attr]
262
262
  else:
263
263
  filter_query = backend.convert(filter_ast)["query"]
264
264
  else:
@@ -529,6 +529,8 @@ class OpenSearchOperations:
529
529
  stats_evaluator = TQLStatsEvaluator()
530
530
 
531
531
  # Execute the stats aggregation in memory
532
+ if stats_ast_for_post_processing is None:
533
+ raise ValueError("Stats AST is None but phase2 processing was requested")
532
534
  stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
533
535
 
534
536
  # Format response for stats-only (no documents)
@@ -547,7 +549,7 @@ class OpenSearchOperations:
547
549
  "performance_impact": {
548
550
  "overhead_ms": 0, # Would need timing to calculate
549
551
  "documents_processed": len(all_documents),
550
- "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
552
+ "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0, # type: ignore[union-attr]
551
553
  },
552
554
  "opensearch_query": complete_opensearch_query,
553
555
  }
@@ -580,6 +582,8 @@ class OpenSearchOperations:
580
582
  translator = OpenSearchStatsTranslator()
581
583
 
582
584
  # Transform the response using the translator
585
+ if stats_ast is None:
586
+ raise ValueError("Stats AST is None but grouping was detected")
583
587
  transformed_response = translator.transform_response(response, stats_ast)
584
588
 
585
589
  # The transformed response already has the correct structure
@@ -925,6 +929,21 @@ class OpenSearchOperations:
925
929
  # Get opensearch total before filtering
926
930
  opensearch_total = total_hits
927
931
 
932
+ # Track optimization features used in this query
933
+ optimizations_applied = []
934
+ if scan_all:
935
+ optimizations_applied.append("scroll_api")
936
+ if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
937
+ optimizations_applied.append("auto_pagination")
938
+ if request_cache:
939
+ optimizations_applied.append("request_cache")
940
+ if preference:
941
+ optimizations_applied.append("preference_routing")
942
+ if routing:
943
+ optimizations_applied.append("custom_routing")
944
+ if terminate_after:
945
+ optimizations_applied.append("early_termination")
946
+
928
947
  result = {
929
948
  "results": results,
930
949
  "total": len(results),
@@ -934,7 +953,7 @@ class OpenSearchOperations:
934
953
  "health_status": health_status,
935
954
  "health_reasons": health_reasons,
936
955
  "performance_impact": performance_impact,
937
- "optimizations_applied": [], # TODO: Track actual optimizations # noqa: W0511
956
+ "optimizations_applied": optimizations_applied,
938
957
  "opensearch_query": (
939
958
  complete_opensearch_query if "complete_opensearch_query" in locals() else {}
940
959
  ), # Include the full query body
tql/evaluator.py CHANGED
@@ -67,7 +67,7 @@ class TQLEvaluator:
67
67
  field_mappings = field_mappings or {}
68
68
  return self._evaluate_node(ast, record, field_mappings)
69
69
 
70
- def _evaluate_node(self, node: Any, record: Dict[str, Any], field_mappings: Dict[str, str]) -> bool:
70
+ def _evaluate_node(self, node: Any, record: Dict[str, Any], field_mappings: Dict[str, str]) -> bool: # noqa: C901
71
71
  """Evaluate a single AST node against a record.
72
72
 
73
73
  Args:
@@ -350,6 +350,8 @@ class TQLEvaluator:
350
350
  return left_missing or right_missing
351
351
  elif node_type == "unary_op":
352
352
  # Don't recurse through NOT operators - they handle missing fields themselves
353
+ # The NOT operator has special logic at lines 213-254 that handles missing fields correctly
354
+ # Recursing here would cause double-handling and incorrect results
353
355
  return False
354
356
  elif node_type == "collection_op":
355
357
  field_name = node["field"]
@@ -15,15 +15,27 @@ class SpecialExpressionEvaluator:
15
15
  # Sentinel value to distinguish missing fields from None values
16
16
  _MISSING_FIELD = object()
17
17
 
18
- def __init__(self, get_field_value_func, evaluate_node_func):
18
+ def __init__(self, get_field_value_func, evaluate_node_func, set_field_value_func=None):
19
19
  """Initialize the special expression evaluator.
20
20
 
21
21
  Args:
22
22
  get_field_value_func: Function to get field values from records
23
23
  evaluate_node_func: Function to evaluate AST nodes
24
+ set_field_value_func: Optional function to set field values in records
24
25
  """
25
26
  self._get_field_value = get_field_value_func
26
27
  self._evaluate_node = evaluate_node_func
28
+ self._set_field_value = set_field_value_func or self._default_set_field_value
29
+
30
+ def _default_set_field_value(self, record: Dict[str, Any], field_path: str, value: Any) -> None:
31
+ """Default implementation of set_field_value for nested field assignment."""
32
+ parts = field_path.split(".")
33
+ current = record
34
+ for part in parts[:-1]:
35
+ if part not in current:
36
+ current[part] = {}
37
+ current = current[part]
38
+ current[parts[-1]] = value
27
39
 
28
40
  def evaluate_geo_expr( # noqa: C901
29
41
  self, node: Dict[str, Any], record: Dict[str, Any], field_mappings: Dict[str, str]
@@ -106,19 +118,26 @@ class SpecialExpressionEvaluator:
106
118
  elif "as" in record:
107
119
  geo_data["as"] = record["as"]
108
120
  else:
109
- # Default locations
121
+ # Default locations (ECS style)
110
122
  if "." in actual_field:
111
- # For nested fields like destination.ip, check destination.geo
123
+ # For nested fields like destination.ip, check destination.geo and destination.as
112
124
  parent_path = actual_field.rsplit(".", 1)[0]
113
125
  parent = self._get_field_value(record, parent_path)
114
- if isinstance(parent, dict) and "geo" in parent:
115
- # Found geo data under parent
116
- geo_data = parent
126
+ if isinstance(parent, dict) and ("geo" in parent or "as" in parent):
127
+ # Found geo/as data under parent
128
+ geo_data = {}
129
+ if "geo" in parent:
130
+ geo_data["geo"] = parent["geo"]
131
+ if "as" in parent:
132
+ geo_data["as"] = parent["as"]
117
133
  else:
118
- # For top-level fields, check enrichment.geo
119
- if "enrichment" in record and isinstance(record["enrichment"], dict):
120
- if "geo" in record["enrichment"]:
121
- geo_data = record["enrichment"]
134
+ # For top-level fields like ip, check top-level geo and as fields (ECS style)
135
+ if "geo" in record or "as" in record:
136
+ geo_data = {}
137
+ if "geo" in record:
138
+ geo_data["geo"] = record["geo"]
139
+ if "as" in record:
140
+ geo_data["as"] = record["as"]
122
141
 
123
142
  # Check if we should use existing geo data or force a new lookup
124
143
  force_lookup = geo_params.get("force", False)
@@ -148,6 +167,39 @@ class SpecialExpressionEvaluator:
148
167
  # Apply geo lookup
149
168
  geo_data = apply_mutators(field_value, [geo_mutator], actual_field, record)
150
169
 
170
+ # Always include enrichment in query results (save=True adds to record for output)
171
+ # Note: This does not modify source files - enrichment only appears in query results
172
+ save_enrichment = geo_params.get("save", True)
173
+ if save_enrichment and geo_data and isinstance(geo_data, dict):
174
+ # Determine where to save the enrichment
175
+ if custom_field:
176
+ # Save to custom field location
177
+ self._set_field_value(record, custom_field, geo_data.get("geo"))
178
+ if "as" in geo_data:
179
+ # Save AS data as sibling to geo field
180
+ if "." in custom_field:
181
+ as_parent_path = custom_field.rsplit(".", 1)[0]
182
+ parent = self._get_field_value(record, as_parent_path)
183
+ if isinstance(parent, dict):
184
+ parent["as"] = geo_data["as"]
185
+ else:
186
+ record["as"] = geo_data["as"]
187
+ elif "." in actual_field:
188
+ # For nested fields like destination.ip, save to destination.geo and destination.as (ECS style)
189
+ parent_path = actual_field.rsplit(".", 1)[0]
190
+ parent = self._get_field_value(record, parent_path)
191
+ if isinstance(parent, dict):
192
+ if "geo" in geo_data:
193
+ parent["geo"] = geo_data["geo"]
194
+ if "as" in geo_data:
195
+ parent["as"] = geo_data["as"]
196
+ else:
197
+ # For top-level fields like ip, save to top-level geo and as fields (ECS style)
198
+ if "geo" in geo_data:
199
+ record["geo"] = geo_data["geo"]
200
+ if "as" in geo_data:
201
+ record["as"] = geo_data["as"]
202
+
151
203
  # Now evaluate the conditions against the geo data
152
204
  if conditions:
153
205
  # Handle None geo_data (e.g., private IPs or lookup failures)
@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
6
6
 
7
7
  import ipaddress
8
8
  import re
9
+ from functools import lru_cache
9
10
  from typing import Any
10
11
 
11
12
 
@@ -15,6 +16,23 @@ class ValueComparator:
15
16
  # Sentinel value to distinguish missing fields from None values
16
17
  _MISSING_FIELD = object()
17
18
 
19
+ @staticmethod
20
+ @lru_cache(maxsize=256)
21
+ def _compile_regex(pattern: str) -> re.Pattern:
22
+ """Compile and cache regex patterns for performance.
23
+
24
+ Args:
25
+ pattern: Regex pattern string
26
+
27
+ Returns:
28
+ Compiled regex pattern
29
+
30
+ Note:
31
+ Uses LRU cache with max 256 patterns. This significantly improves
32
+ performance when the same regex patterns are used repeatedly in queries.
33
+ """
34
+ return re.compile(pattern)
35
+
18
36
  def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool: # noqa: C901
19
37
  """Compare a field value against an expected value using the given operator.
20
38
 
@@ -49,9 +67,17 @@ class ValueComparator:
49
67
  return False
50
68
 
51
69
  # Handle None field values (field exists but is None)
70
+ # IMPORTANT: None is a valid value, distinct from missing fields.
71
+ # For 'exists' operator: This code path should NOT be reached because 'exists'
72
+ # checks field presence in the record, not the value. The evaluator handles
73
+ # 'exists' before calling compare_values. If we reach here with None, it means
74
+ # the field exists but has None value, which should NOT match 'exists'.
52
75
  if field_value is None:
53
76
  if operator in ["exists"]:
54
- return True # Field exists, even if value is None
77
+ # Field key exists in record but value is None
78
+ # Semantics: 'exists' means "field has a non-null value"
79
+ # This matches database behavior where NULL != EXISTS
80
+ return False # None value does not satisfy 'exists'
55
81
  elif operator in ["is"]:
56
82
  # Check for null comparison - expected_value can be None or "null"
57
83
  return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
@@ -68,6 +94,20 @@ class ValueComparator:
68
94
  if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
69
95
  field_value = field_value.lower() == "true"
70
96
 
97
+ # Type compatibility check for numeric operators
98
+ # If operator requires numeric comparison, both values must be numeric
99
+ # Exception: Arrays are handled specially in the operator logic below
100
+ if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
101
+ # Skip check if field_value is an array - handled by array logic below
102
+ if not isinstance(field_value, (list, tuple)):
103
+ field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
104
+ expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
105
+
106
+ if not (field_is_numeric and expected_is_numeric):
107
+ # At least one value failed numeric conversion
108
+ # Cannot perform numeric comparison - return False
109
+ return False
110
+
71
111
  try:
72
112
  if operator in ["eq", "="]:
73
113
  # Handle array fields - check if ANY element equals expected value
@@ -104,27 +144,30 @@ class ValueComparator:
104
144
  if isinstance(expected_value, list) and len(expected_value) == 1:
105
145
  expected_value = expected_value[0]
106
146
  # Handle list fields by checking if ANY element contains the expected value
147
+ # Case-insensitive comparison to match post-processor behavior
107
148
  if isinstance(field_value, list):
108
149
  # For arrays, check if ANY element contains the expected value
109
- return any(str(expected_value) in str(elem) for elem in field_value)
150
+ return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
110
151
  else:
111
- return str(expected_value) in str(field_value)
152
+ return str(expected_value).lower() in str(field_value).lower()
112
153
  elif operator == "startswith":
113
154
  # Unwrap single-element lists for string operators
114
155
  if isinstance(expected_value, list) and len(expected_value) == 1:
115
156
  expected_value = expected_value[0]
116
157
  # Handle array fields - check if ANY element starts with expected value
158
+ # Case-insensitive comparison to match post-processor behavior
117
159
  if isinstance(field_value, (list, tuple)):
118
- return any(str(elem).startswith(str(expected_value)) for elem in field_value)
119
- return str(field_value).startswith(str(expected_value))
160
+ return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
161
+ return str(field_value).lower().startswith(str(expected_value).lower())
120
162
  elif operator == "endswith":
121
163
  # Unwrap single-element lists for string operators
122
164
  if isinstance(expected_value, list) and len(expected_value) == 1:
123
165
  expected_value = expected_value[0]
124
166
  # Handle array fields - check if ANY element ends with expected value
167
+ # Case-insensitive comparison to match post-processor behavior
125
168
  if isinstance(field_value, (list, tuple)):
126
- return any(str(elem).endswith(str(expected_value)) for elem in field_value)
127
- return str(field_value).endswith(str(expected_value))
169
+ return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
170
+ return str(field_value).lower().endswith(str(expected_value).lower())
128
171
  elif operator == "in":
129
172
  if isinstance(expected_value, list):
130
173
  if len(expected_value) == 1 and isinstance(field_value, list):
@@ -143,7 +186,13 @@ class ValueComparator:
143
186
  # Unwrap single-element lists for string operators
144
187
  if isinstance(expected_value, list) and len(expected_value) == 1:
145
188
  expected_value = expected_value[0]
146
- return bool(re.search(str(expected_value), str(field_value)))
189
+ # Use cached regex compilation for performance
190
+ try:
191
+ pattern = self._compile_regex(str(expected_value))
192
+ return bool(pattern.search(str(field_value)))
193
+ except (re.error, TypeError):
194
+ # Invalid regex pattern, fall back to no match
195
+ return False
147
196
  elif operator == "cidr":
148
197
  # Unwrap single-element lists for CIDR
149
198
  if isinstance(expected_value, list) and len(expected_value) == 1:
@@ -194,22 +243,31 @@ class ValueComparator:
194
243
  # Unwrap single-element lists for string operators
195
244
  if isinstance(expected_value, list) and len(expected_value) == 1:
196
245
  expected_value = expected_value[0]
197
- return str(expected_value) not in str(field_value)
246
+ # Case-insensitive comparison to match post-processor behavior
247
+ return str(expected_value).lower() not in str(field_value).lower()
198
248
  elif operator == "not_startswith":
199
249
  # Unwrap single-element lists for string operators
200
250
  if isinstance(expected_value, list) and len(expected_value) == 1:
201
251
  expected_value = expected_value[0]
202
- return not str(field_value).startswith(str(expected_value))
252
+ # Case-insensitive comparison to match post-processor behavior
253
+ return not str(field_value).lower().startswith(str(expected_value).lower())
203
254
  elif operator == "not_endswith":
204
255
  # Unwrap single-element lists for string operators
205
256
  if isinstance(expected_value, list) and len(expected_value) == 1:
206
257
  expected_value = expected_value[0]
207
- return not str(field_value).endswith(str(expected_value))
258
+ # Case-insensitive comparison to match post-processor behavior
259
+ return not str(field_value).lower().endswith(str(expected_value).lower())
208
260
  elif operator == "not_regexp":
209
261
  # Unwrap single-element lists for string operators
210
262
  if isinstance(expected_value, list) and len(expected_value) == 1:
211
263
  expected_value = expected_value[0]
212
- return not bool(re.search(str(expected_value), str(field_value)))
264
+ # Use cached regex compilation for performance
265
+ try:
266
+ pattern = self._compile_regex(str(expected_value))
267
+ return not bool(pattern.search(str(field_value)))
268
+ except (re.error, TypeError):
269
+ # Invalid regex pattern, fall back to match (not regexp succeeds)
270
+ return True
213
271
  elif operator == "not_cidr":
214
272
  # Unwrap single-element lists for CIDR
215
273
  if isinstance(expected_value, list) and len(expected_value) == 1: