tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
- tql/core.py +225 -54
- tql/core_components/opensearch_operations.py +415 -99
- tql/core_components/stats_operations.py +11 -1
- tql/evaluator.py +39 -2
- tql/evaluator_components/special_expressions.py +25 -6
- tql/evaluator_components/value_comparison.py +31 -3
- tql/mutator_analyzer.py +640 -242
- tql/mutators/__init__.py +5 -1
- tql/mutators/dns.py +76 -53
- tql/mutators/security.py +101 -100
- tql/mutators/string.py +74 -0
- tql/opensearch_components/field_mapping.py +9 -3
- tql/opensearch_components/lucene_converter.py +12 -0
- tql/opensearch_components/query_converter.py +134 -25
- tql/opensearch_mappings.py +2 -2
- tql/opensearch_stats.py +170 -39
- tql/parser.py +92 -37
- tql/parser_components/ast_builder.py +37 -1
- tql/parser_components/field_extractor.py +9 -1
- tql/parser_components/grammar.py +32 -8
- tql/post_processor.py +489 -31
- tql/stats_evaluator.py +170 -12
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
- {tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0
tql/mutator_analyzer.py
CHANGED
|
@@ -30,6 +30,7 @@ MUTATOR_CLASSIFICATIONS: Dict[str, MutatorType] = {
|
|
|
30
30
|
"uppercase": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
|
|
31
31
|
"trim": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
|
|
32
32
|
"split": MutatorType.POST_PROCESSABLE, # Always post-process (returns array)
|
|
33
|
+
"replace": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
|
|
33
34
|
"nslookup": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
|
|
34
35
|
"geoip_lookup": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
|
|
35
36
|
"geo": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
|
|
@@ -41,6 +42,18 @@ MUTATOR_CLASSIFICATIONS: Dict[str, MutatorType] = {
|
|
|
41
42
|
"urldecode": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
|
|
42
43
|
"is_private": MutatorType.POST_PROCESSABLE, # Always post-process (returns bool)
|
|
43
44
|
"is_global": MutatorType.POST_PROCESSABLE, # Always post-process (returns bool)
|
|
45
|
+
"any": MutatorType.POST_PROCESSABLE, # Always post-process (array evaluation)
|
|
46
|
+
"all": MutatorType.POST_PROCESSABLE, # Always post-process (array evaluation)
|
|
47
|
+
"none": MutatorType.POST_PROCESSABLE, # Always post-process (array evaluation)
|
|
48
|
+
"avg": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
49
|
+
"average": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
50
|
+
"sum": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
51
|
+
"min": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
52
|
+
"max": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
53
|
+
"count": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
54
|
+
"unique": MutatorType.POST_PROCESSABLE, # Always post-process (array computation)
|
|
55
|
+
"first": MutatorType.POST_PROCESSABLE, # Always post-process (array access)
|
|
56
|
+
"last": MutatorType.POST_PROCESSABLE, # Always post-process (array access)
|
|
44
57
|
}
|
|
45
58
|
|
|
46
59
|
|
|
@@ -52,8 +65,8 @@ class PostProcessingRequirement:
|
|
|
52
65
|
mapped_field_name: str # Field name used in OpenSearch query
|
|
53
66
|
mutators: List[Dict[str, Any]] # List of mutator specifications
|
|
54
67
|
applies_to: Literal[
|
|
55
|
-
"field", "value", "geo_expr", "nslookup_expr"
|
|
56
|
-
] # Whether this applies to field, value mutators, geo, or
|
|
68
|
+
"field", "value", "geo_expr", "nslookup_expr", "logical_expression"
|
|
69
|
+
] # Whether this applies to field, value mutators, geo, nslookup, or logical expressions
|
|
57
70
|
metadata: Optional[Dict[str, Any]] = None # Additional metadata for special processing
|
|
58
71
|
|
|
59
72
|
|
|
@@ -73,6 +86,8 @@ class MutatorAnalysisResult:
|
|
|
73
86
|
class MutatorAnalyzer:
|
|
74
87
|
"""Analyzes TQL queries to determine mutator processing requirements."""
|
|
75
88
|
|
|
89
|
+
context: Optional[str] = None # Temporary storage for execution context
|
|
90
|
+
|
|
76
91
|
def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
|
|
77
92
|
"""Initialize the analyzer.
|
|
78
93
|
|
|
@@ -134,9 +149,27 @@ class MutatorAnalyzer:
|
|
|
134
149
|
# Track if enrichment saving is requested
|
|
135
150
|
save_enrichment_requested = False
|
|
136
151
|
|
|
152
|
+
# Store context temporarily for use in _analyze_node
|
|
153
|
+
self.context = context
|
|
154
|
+
|
|
155
|
+
# Check if this is a stats query
|
|
156
|
+
is_stats_query = ast.get("type") in ["stats_expr", "query_with_stats"]
|
|
157
|
+
|
|
137
158
|
# Analyze the AST recursively
|
|
138
159
|
self._analyze_node(optimized_ast, post_processing_requirements, health_reasons, optimizations_applied)
|
|
139
160
|
|
|
161
|
+
# Clean up context
|
|
162
|
+
self.context = None
|
|
163
|
+
|
|
164
|
+
# Clean up nodes marked for removal
|
|
165
|
+
cleaned_ast = self._clean_ast(optimized_ast)
|
|
166
|
+
|
|
167
|
+
# If the entire AST was removed (e.g., just "field | any eq value"), return match_all
|
|
168
|
+
if cleaned_ast is None:
|
|
169
|
+
optimized_ast = {"type": "match_all"}
|
|
170
|
+
else:
|
|
171
|
+
optimized_ast = cleaned_ast
|
|
172
|
+
|
|
140
173
|
# Check if any mutator requested enrichment saving
|
|
141
174
|
for req in post_processing_requirements:
|
|
142
175
|
for mutator in req.mutators:
|
|
@@ -158,7 +191,29 @@ class MutatorAnalyzer:
|
|
|
158
191
|
|
|
159
192
|
# Determine overall health status based on context
|
|
160
193
|
health_status: Literal["green", "yellow", "red"] = "green"
|
|
161
|
-
|
|
194
|
+
|
|
195
|
+
# Special handling for stats queries with post-processing in OpenSearch context
|
|
196
|
+
if is_stats_query and context == "opensearch" and post_processing_requirements:
|
|
197
|
+
# Stats queries that require post-processing have extremely poor performance
|
|
198
|
+
health_status = "red"
|
|
199
|
+
health_reasons.append(
|
|
200
|
+
{
|
|
201
|
+
"status": "red",
|
|
202
|
+
"query_part": "stats with post-processing",
|
|
203
|
+
"reason": "Stats query requires fetching all documents for post-processing mutators. "
|
|
204
|
+
"This will have extremely poor performance on large datasets. "
|
|
205
|
+
"Consider pre-processing data or using OpenSearch-compatible operations.",
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# For in_memory context, we need to evaluate health considering ALL mutators
|
|
210
|
+
# (both those in post-processing and those remaining in the AST)
|
|
211
|
+
elif context == "in_memory":
|
|
212
|
+
# Pass the optimized AST to health evaluation for in_memory context
|
|
213
|
+
health_eval = self._evaluate_health_for_context(post_processing_requirements, context, optimized_ast)
|
|
214
|
+
health_status = health_eval["health_status"] # type: ignore[assignment]
|
|
215
|
+
health_reasons.extend(health_eval["health_reasons"])
|
|
216
|
+
elif post_processing_requirements:
|
|
162
217
|
# Evaluate health based on context
|
|
163
218
|
health_eval = self._evaluate_health_for_context(post_processing_requirements, context)
|
|
164
219
|
health_status = health_eval["health_status"] # type: ignore[assignment]
|
|
@@ -179,6 +234,166 @@ class MutatorAnalyzer:
|
|
|
179
234
|
save_enrichment_requested=save_enrichment_requested,
|
|
180
235
|
)
|
|
181
236
|
|
|
237
|
+
def _clean_ast(self, node: Any) -> Any: # noqa: C901
|
|
238
|
+
"""Remove nodes marked for removal from the AST.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
node: AST node to clean
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Cleaned AST node or None if node should be removed
|
|
245
|
+
"""
|
|
246
|
+
if not isinstance(node, dict):
|
|
247
|
+
return node
|
|
248
|
+
|
|
249
|
+
# Check if this node should be removed
|
|
250
|
+
if node.get("_remove_from_query"):
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
# Clean child nodes
|
|
254
|
+
if node.get("type") == "logical_op":
|
|
255
|
+
operator = node.get("operator", "").lower()
|
|
256
|
+
left = self._clean_ast(node.get("left"))
|
|
257
|
+
right = self._clean_ast(node.get("right"))
|
|
258
|
+
|
|
259
|
+
# Special handling for OR with removed nodes
|
|
260
|
+
if operator == "or" and (left is None or right is None):
|
|
261
|
+
# If either side of OR has array operators (was removed),
|
|
262
|
+
# we need to return match_all and handle everything in post-processing
|
|
263
|
+
if left is None or right is None:
|
|
264
|
+
return {"type": "match_all"}
|
|
265
|
+
|
|
266
|
+
# Regular handling for AND
|
|
267
|
+
if left is None and right is None:
|
|
268
|
+
return None
|
|
269
|
+
elif left is None:
|
|
270
|
+
return right
|
|
271
|
+
elif right is None:
|
|
272
|
+
return left
|
|
273
|
+
else:
|
|
274
|
+
node["left"] = left
|
|
275
|
+
node["right"] = right
|
|
276
|
+
return node
|
|
277
|
+
elif node.get("type") == "unary_op":
|
|
278
|
+
operand = self._clean_ast(node.get("operand"))
|
|
279
|
+
if operand is None:
|
|
280
|
+
return None
|
|
281
|
+
node["operand"] = operand
|
|
282
|
+
return node
|
|
283
|
+
|
|
284
|
+
# For other node types, check if it should be converted to match_all
|
|
285
|
+
if node.get("_convert_to_match_all"):
|
|
286
|
+
return {"type": "match_all"}
|
|
287
|
+
|
|
288
|
+
return node
|
|
289
|
+
|
|
290
|
+
def _has_array_operators(self, node: Any) -> bool:
|
|
291
|
+
"""Check if an AST node contains array operators (any, all, none).
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
node: AST node to check
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
True if node contains array operators
|
|
298
|
+
"""
|
|
299
|
+
if not isinstance(node, dict):
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
node_type = node.get("type")
|
|
303
|
+
|
|
304
|
+
if node_type == "comparison":
|
|
305
|
+
# Check field mutators for array operators
|
|
306
|
+
field_mutators = node.get("field_mutators", [])
|
|
307
|
+
for mutator in field_mutators:
|
|
308
|
+
if mutator.get("name", "").lower() in ["any", "all", "none"]:
|
|
309
|
+
return True
|
|
310
|
+
return False
|
|
311
|
+
elif node_type == "logical_op":
|
|
312
|
+
# Check both sides
|
|
313
|
+
return self._has_array_operators(node.get("left", {})) or self._has_array_operators(node.get("right", {}))
|
|
314
|
+
elif node_type == "unary_op":
|
|
315
|
+
# Check operand
|
|
316
|
+
return self._has_array_operators(node.get("operand", {}))
|
|
317
|
+
|
|
318
|
+
return False
|
|
319
|
+
|
|
320
|
+
def _has_transform_mutators_with_filtering(self, node: Any) -> bool:
|
|
321
|
+
"""Check if an AST node contains transform mutators with filtering operations.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
node: AST node to check
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
True if node contains transform mutators with filtering operations
|
|
328
|
+
"""
|
|
329
|
+
if not isinstance(node, dict):
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
node_type = node.get("type")
|
|
333
|
+
|
|
334
|
+
if node_type == "comparison":
|
|
335
|
+
# Check if this is a filtering operation
|
|
336
|
+
operator = node.get("operator", "")
|
|
337
|
+
is_filtering = operator in [
|
|
338
|
+
"eq",
|
|
339
|
+
"=",
|
|
340
|
+
"ne",
|
|
341
|
+
"!=",
|
|
342
|
+
"gt",
|
|
343
|
+
">",
|
|
344
|
+
"gte",
|
|
345
|
+
">=",
|
|
346
|
+
"lt",
|
|
347
|
+
"<",
|
|
348
|
+
"lte",
|
|
349
|
+
"<=",
|
|
350
|
+
"contains",
|
|
351
|
+
"not_contains",
|
|
352
|
+
"startswith",
|
|
353
|
+
"endswith",
|
|
354
|
+
"not_startswith",
|
|
355
|
+
"not_endswith",
|
|
356
|
+
"in",
|
|
357
|
+
"not_in",
|
|
358
|
+
]
|
|
359
|
+
|
|
360
|
+
if not is_filtering:
|
|
361
|
+
return False
|
|
362
|
+
|
|
363
|
+
# Check field mutators for transform mutators or type-changing mutators that need post-processing
|
|
364
|
+
field_mutators = node.get("field_mutators", [])
|
|
365
|
+
for mutator in field_mutators:
|
|
366
|
+
mutator_name = mutator.get("name", "").lower()
|
|
367
|
+
# Transform mutators that modify the value OR type-changing mutators
|
|
368
|
+
if mutator_name in [
|
|
369
|
+
"lowercase",
|
|
370
|
+
"uppercase",
|
|
371
|
+
"trim",
|
|
372
|
+
"replace",
|
|
373
|
+
"refang",
|
|
374
|
+
"defang",
|
|
375
|
+
"b64encode",
|
|
376
|
+
"b64decode",
|
|
377
|
+
"urldecode",
|
|
378
|
+
# Type-changing mutators that need post-processing
|
|
379
|
+
"length",
|
|
380
|
+
"is_private",
|
|
381
|
+
"is_global",
|
|
382
|
+
"split",
|
|
383
|
+
]:
|
|
384
|
+
return True
|
|
385
|
+
return False
|
|
386
|
+
elif node_type == "logical_op":
|
|
387
|
+
# Check both sides
|
|
388
|
+
return self._has_transform_mutators_with_filtering(
|
|
389
|
+
node.get("left", {})
|
|
390
|
+
) or self._has_transform_mutators_with_filtering(node.get("right", {}))
|
|
391
|
+
elif node_type == "unary_op":
|
|
392
|
+
# Check operand
|
|
393
|
+
return self._has_transform_mutators_with_filtering(node.get("operand", {}))
|
|
394
|
+
|
|
395
|
+
return False
|
|
396
|
+
|
|
182
397
|
def _analyze_node( # noqa: C901
|
|
183
398
|
self,
|
|
184
399
|
node: Dict[str, Any],
|
|
@@ -204,98 +419,191 @@ class MutatorAnalyzer:
|
|
|
204
419
|
elif node_type == "collection_op":
|
|
205
420
|
self._analyze_collection_node(node, post_processing_reqs, health_reasons, optimizations)
|
|
206
421
|
elif node_type == "logical_op":
|
|
207
|
-
|
|
422
|
+
operator = node.get("operator", "").lower()
|
|
423
|
+
|
|
424
|
+
# Check if this is an OR with array operators OR transform mutators with filtering
|
|
425
|
+
# BEFORE analyzing children (because analyzing children might modify the nodes)
|
|
426
|
+
needs_logical_expression = False
|
|
427
|
+
metadata_type = None
|
|
428
|
+
|
|
429
|
+
if operator == "or":
|
|
430
|
+
if self._has_array_operators(node):
|
|
431
|
+
needs_logical_expression = True
|
|
432
|
+
metadata_type = "or_with_array_operators"
|
|
433
|
+
elif self._has_transform_mutators_with_filtering(node):
|
|
434
|
+
needs_logical_expression = True
|
|
435
|
+
metadata_type = "or_with_transform_mutators"
|
|
436
|
+
|
|
437
|
+
if needs_logical_expression:
|
|
438
|
+
# We need to evaluate the entire OR in post-processing
|
|
439
|
+
# But we still want the base query to run (without array operators)
|
|
440
|
+
|
|
441
|
+
# Deep copy the original expression before it gets modified
|
|
442
|
+
original_expression = copy.deepcopy(node)
|
|
443
|
+
|
|
444
|
+
# Add a special requirement for the entire logical expression
|
|
445
|
+
post_processing_reqs.append(
|
|
446
|
+
PostProcessingRequirement(
|
|
447
|
+
field_name="_logical_expression",
|
|
448
|
+
mapped_field_name="_logical_expression",
|
|
449
|
+
mutators=[],
|
|
450
|
+
applies_to="logical_expression",
|
|
451
|
+
metadata={"expression": original_expression, "type": metadata_type},
|
|
452
|
+
)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Always analyze both sides
|
|
208
456
|
self._analyze_node(node.get("left", {}), post_processing_reqs, health_reasons, optimizations)
|
|
209
457
|
self._analyze_node(node.get("right", {}), post_processing_reqs, health_reasons, optimizations)
|
|
210
458
|
elif node_type == "unary_op":
|
|
459
|
+
operator = node.get("operator", "").lower()
|
|
460
|
+
|
|
461
|
+
# Check if this is a NOT with transform mutators that need filtering
|
|
462
|
+
if operator == "not" and self._has_transform_mutators_with_filtering(node.get("operand", {})):
|
|
463
|
+
# We need to evaluate the entire NOT in post-processing
|
|
464
|
+
# Deep copy the original expression before it gets modified
|
|
465
|
+
original_expression = copy.deepcopy(node)
|
|
466
|
+
|
|
467
|
+
# Add a special requirement for the entire logical expression
|
|
468
|
+
post_processing_reqs.append(
|
|
469
|
+
PostProcessingRequirement(
|
|
470
|
+
field_name="_logical_expression",
|
|
471
|
+
mapped_field_name="_logical_expression",
|
|
472
|
+
mutators=[],
|
|
473
|
+
applies_to="logical_expression",
|
|
474
|
+
metadata={"expression": original_expression, "type": "not_with_transform_mutators"},
|
|
475
|
+
)
|
|
476
|
+
)
|
|
477
|
+
|
|
211
478
|
# Analyze the operand
|
|
212
479
|
self._analyze_node(node.get("operand", {}), post_processing_reqs, health_reasons, optimizations)
|
|
213
480
|
elif node_type == "geo_expr":
|
|
214
|
-
# Geo expressions always require post-processing since they involve geoip_lookup
|
|
215
481
|
field_name = node.get("field")
|
|
216
482
|
conditions = node.get("conditions")
|
|
217
483
|
geo_params = node.get("geo_params", {})
|
|
218
484
|
|
|
219
485
|
if field_name:
|
|
220
|
-
#
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
"conditions": conditions,
|
|
239
|
-
"node_type": "geo_expr",
|
|
240
|
-
"geo_params": geo_params, # Include geo parameters
|
|
241
|
-
},
|
|
242
|
-
)
|
|
243
|
-
post_processing_reqs.append(geo_requirement)
|
|
244
|
-
|
|
245
|
-
# Mark the node for post-processing
|
|
246
|
-
node["requires_post_processing"] = True
|
|
247
|
-
node["post_process_type"] = "geo_expr"
|
|
248
|
-
|
|
249
|
-
if conditions:
|
|
250
|
-
optimizations.append(
|
|
251
|
-
f"Geo expression on field '{field_name}' with conditions requires post-processing"
|
|
486
|
+
# For OpenSearch context, geo expressions require post-processing
|
|
487
|
+
if self.context == "opensearch":
|
|
488
|
+
# Create a post-processing requirement for the geo expression
|
|
489
|
+
# Build the geoip_lookup mutator
|
|
490
|
+
mutator_params = []
|
|
491
|
+
for param_name, param_value in geo_params.items():
|
|
492
|
+
mutator_params.append([param_name, param_value])
|
|
493
|
+
|
|
494
|
+
geo_mutator = {"name": "geoip_lookup"}
|
|
495
|
+
if mutator_params:
|
|
496
|
+
geo_mutator["params"] = mutator_params
|
|
497
|
+
|
|
498
|
+
# Create the requirement
|
|
499
|
+
req = PostProcessingRequirement(
|
|
500
|
+
field_name=field_name,
|
|
501
|
+
mapped_field_name=field_name, # Will be mapped during processing
|
|
502
|
+
mutators=[geo_mutator],
|
|
503
|
+
applies_to="geo_expr",
|
|
504
|
+
metadata={"conditions": conditions, "geo_params": geo_params},
|
|
252
505
|
)
|
|
506
|
+
post_processing_reqs.append(req)
|
|
507
|
+
|
|
508
|
+
if conditions:
|
|
509
|
+
optimizations.append(
|
|
510
|
+
f"Geo expression on field '{field_name}' with conditions requires post-processing"
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
optimizations.append(
|
|
514
|
+
f"Geo expression on field '{field_name}' for enrichment requires post-processing"
|
|
515
|
+
)
|
|
253
516
|
else:
|
|
254
|
-
|
|
517
|
+
# For in-memory evaluation, handled during evaluation phase
|
|
518
|
+
if conditions:
|
|
519
|
+
optimizations.append(
|
|
520
|
+
f"Geo expression on field '{field_name}' with conditions handled during evaluation"
|
|
521
|
+
)
|
|
522
|
+
else:
|
|
523
|
+
optimizations.append(
|
|
524
|
+
f"Geo expression on field '{field_name}' for enrichment handled during evaluation"
|
|
525
|
+
)
|
|
255
526
|
|
|
256
527
|
# Don't analyze conditions recursively - they're part of the geo expression
|
|
257
528
|
elif node_type == "nslookup_expr":
|
|
258
|
-
# NSLookup expressions always require post-processing since they involve DNS lookups
|
|
259
529
|
field_name = node.get("field")
|
|
260
530
|
conditions = node.get("conditions")
|
|
261
531
|
nslookup_params = node.get("nslookup_params", {})
|
|
262
532
|
|
|
263
533
|
if field_name:
|
|
264
|
-
#
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
"
|
|
282
|
-
"nslookup_params": nslookup_params,
|
|
283
|
-
},
|
|
284
|
-
)
|
|
285
|
-
post_processing_reqs.append(nslookup_requirement)
|
|
286
|
-
|
|
287
|
-
# Mark the node for post-processing
|
|
288
|
-
node["requires_post_processing"] = True
|
|
289
|
-
node["post_process_type"] = "nslookup_expr"
|
|
290
|
-
|
|
291
|
-
if conditions:
|
|
292
|
-
optimizations.append(
|
|
293
|
-
f"NSLookup expression on field '{field_name}' with conditions requires post-processing"
|
|
534
|
+
# For OpenSearch context, nslookup expressions require post-processing
|
|
535
|
+
if self.context == "opensearch":
|
|
536
|
+
# Create a post-processing requirement for the nslookup expression
|
|
537
|
+
# Build the nslookup mutator
|
|
538
|
+
mutator_params = []
|
|
539
|
+
for param_name, param_value in nslookup_params.items():
|
|
540
|
+
mutator_params.append([param_name, param_value])
|
|
541
|
+
|
|
542
|
+
nslookup_mutator = {"name": "nslookup"}
|
|
543
|
+
if mutator_params:
|
|
544
|
+
nslookup_mutator["params"] = mutator_params
|
|
545
|
+
|
|
546
|
+
# Create the requirement
|
|
547
|
+
req = PostProcessingRequirement(
|
|
548
|
+
field_name=field_name,
|
|
549
|
+
mapped_field_name=field_name, # Will be mapped during processing
|
|
550
|
+
mutators=[nslookup_mutator],
|
|
551
|
+
applies_to="nslookup_expr",
|
|
552
|
+
metadata={"conditions": conditions, "nslookup_params": nslookup_params},
|
|
294
553
|
)
|
|
554
|
+
post_processing_reqs.append(req)
|
|
555
|
+
|
|
556
|
+
if conditions:
|
|
557
|
+
optimizations.append(
|
|
558
|
+
f"NSLookup expression on field '{field_name}' with conditions requires post-processing"
|
|
559
|
+
)
|
|
560
|
+
else:
|
|
561
|
+
optimizations.append(
|
|
562
|
+
f"NSLookup expression on field '{field_name}' for enrichment requires post-processing"
|
|
563
|
+
)
|
|
295
564
|
else:
|
|
296
|
-
|
|
565
|
+
# For in-memory evaluation, handled during evaluation phase
|
|
566
|
+
if conditions:
|
|
567
|
+
optimizations.append(
|
|
568
|
+
f"NSLookup expression on field '{field_name}' with conditions handled during evaluation"
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
optimizations.append(
|
|
572
|
+
f"NSLookup expression on field '{field_name}' for enrichment handled during evaluation"
|
|
573
|
+
)
|
|
297
574
|
|
|
298
575
|
# Don't analyze conditions recursively - they're part of the nslookup expression
|
|
576
|
+
elif node_type == "query_with_stats":
|
|
577
|
+
# Handle query_with_stats node by analyzing the filter part
|
|
578
|
+
filter_node = node.get("filter")
|
|
579
|
+
if filter_node:
|
|
580
|
+
self._analyze_node(filter_node, post_processing_reqs, health_reasons, optimizations)
|
|
581
|
+
|
|
582
|
+
# Analyze the stats part if it contains mutators (though this is rare)
|
|
583
|
+
stats_node = node.get("stats")
|
|
584
|
+
if stats_node:
|
|
585
|
+
self._analyze_node(stats_node, post_processing_reqs, health_reasons, optimizations)
|
|
586
|
+
|
|
587
|
+
elif node_type == "stats_expr":
|
|
588
|
+
# Handle pure stats expressions - they typically don't have mutators
|
|
589
|
+
# but check aggregations and group_by fields for any field transformations
|
|
590
|
+
aggregations = node.get("aggregations", [])
|
|
591
|
+
for agg in aggregations:
|
|
592
|
+
# In case aggregations have field mutators in the future
|
|
593
|
+
if isinstance(agg, dict) and agg.get("field_mutators"):
|
|
594
|
+
# Analyze field mutators within aggregations if they exist
|
|
595
|
+
field_mutators = agg.get("field_mutators", [])
|
|
596
|
+
if field_mutators:
|
|
597
|
+
field_name = agg.get("field", "*")
|
|
598
|
+
# Add post-processing requirement for mutators in aggregations
|
|
599
|
+
post_processing_reqs.append(
|
|
600
|
+
PostProcessingRequirement(
|
|
601
|
+
field_name=field_name,
|
|
602
|
+
mapped_field_name=field_name,
|
|
603
|
+
mutators=field_mutators,
|
|
604
|
+
applies_to="field",
|
|
605
|
+
)
|
|
606
|
+
)
|
|
299
607
|
|
|
300
608
|
def _analyze_comparison_node( # noqa: C901
|
|
301
609
|
self,
|
|
@@ -315,37 +623,226 @@ class MutatorAnalyzer:
|
|
|
315
623
|
field_name = node.get("field")
|
|
316
624
|
operator = node.get("operator")
|
|
317
625
|
field_mutators = node.get("field_mutators", [])
|
|
318
|
-
value_mutators = node.get("value_mutators", [])
|
|
319
626
|
|
|
320
627
|
if not field_name or not operator:
|
|
321
628
|
return
|
|
322
629
|
|
|
323
630
|
# Analyze field mutators
|
|
324
631
|
if field_mutators:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if
|
|
329
|
-
|
|
330
|
-
|
|
632
|
+
# Special case: if the last mutator is any/all/none and we have a comparison operator,
|
|
633
|
+
# treat it as an array comparison operator, not a regular mutator
|
|
634
|
+
last_mutator = field_mutators[-1] if field_mutators else None
|
|
635
|
+
if (
|
|
636
|
+
last_mutator
|
|
637
|
+
and last_mutator.get("name", "").lower() in ["any", "all", "none"]
|
|
638
|
+
and operator
|
|
639
|
+
in [
|
|
640
|
+
"eq",
|
|
641
|
+
"=",
|
|
642
|
+
"ne",
|
|
643
|
+
"!=",
|
|
644
|
+
"gt",
|
|
645
|
+
">",
|
|
646
|
+
"lt",
|
|
647
|
+
"<",
|
|
648
|
+
"gte",
|
|
649
|
+
">=",
|
|
650
|
+
"lte",
|
|
651
|
+
"<=",
|
|
652
|
+
"contains",
|
|
653
|
+
"not_contains",
|
|
654
|
+
"startswith",
|
|
655
|
+
"endswith",
|
|
656
|
+
"not_startswith",
|
|
657
|
+
"not_endswith",
|
|
658
|
+
]
|
|
659
|
+
):
|
|
660
|
+
|
|
661
|
+
# Extract the array operator
|
|
662
|
+
array_operator = last_mutator["name"].lower()
|
|
663
|
+
|
|
664
|
+
# Process any mutators before the array operator
|
|
665
|
+
remaining_mutators = field_mutators[:-1]
|
|
666
|
+
if remaining_mutators:
|
|
667
|
+
result = self._analyze_field_mutators(field_name, remaining_mutators, operator)
|
|
668
|
+
|
|
669
|
+
# Update node with optimized mutators
|
|
670
|
+
if result.optimized_mutators != remaining_mutators:
|
|
671
|
+
if result.optimized_mutators:
|
|
672
|
+
node["field_mutators"] = result.optimized_mutators
|
|
673
|
+
else:
|
|
674
|
+
# Remove field_mutators if all were optimized away
|
|
675
|
+
node.pop("field_mutators", None)
|
|
676
|
+
optimizations.extend(result.optimizations)
|
|
677
|
+
|
|
678
|
+
# Add post-processing requirements for the remaining mutators
|
|
679
|
+
if result.post_processing_mutators:
|
|
680
|
+
post_processing_reqs.append(
|
|
681
|
+
PostProcessingRequirement(
|
|
682
|
+
field_name=field_name,
|
|
683
|
+
mapped_field_name=result.selected_field or field_name,
|
|
684
|
+
mutators=result.post_processing_mutators,
|
|
685
|
+
applies_to="field",
|
|
686
|
+
)
|
|
687
|
+
)
|
|
331
688
|
else:
|
|
332
|
-
#
|
|
689
|
+
# No other mutators, remove field_mutators from node
|
|
333
690
|
node.pop("field_mutators", None)
|
|
334
691
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
# Add post-processing requirements
|
|
338
|
-
if result.post_processing_mutators:
|
|
692
|
+
# Add post-processing requirement for the array comparison
|
|
339
693
|
post_processing_reqs.append(
|
|
340
694
|
PostProcessingRequirement(
|
|
341
695
|
field_name=field_name,
|
|
342
|
-
mapped_field_name=
|
|
343
|
-
mutators=
|
|
696
|
+
mapped_field_name=field_name,
|
|
697
|
+
mutators=[], # No mutators, just operator-based filtering
|
|
344
698
|
applies_to="field",
|
|
345
|
-
metadata={
|
|
699
|
+
metadata={
|
|
700
|
+
"operator": array_operator,
|
|
701
|
+
"comparison_operator": operator,
|
|
702
|
+
"value": node.get("value"),
|
|
703
|
+
},
|
|
346
704
|
)
|
|
347
705
|
)
|
|
348
706
|
|
|
707
|
+
# Array operators should not affect the OpenSearch query at all
|
|
708
|
+
# They are purely post-processing filters
|
|
709
|
+
# Store the original node info in the post-processing requirement
|
|
710
|
+
if post_processing_reqs and post_processing_reqs[-1].metadata is not None:
|
|
711
|
+
post_processing_reqs[-1].metadata["original_node"] = {
|
|
712
|
+
"type": "comparison",
|
|
713
|
+
"field": field_name,
|
|
714
|
+
"operator": operator,
|
|
715
|
+
"value": node.get("value"),
|
|
716
|
+
"field_mutators": [{"name": array_operator}],
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
# Array operators should be completely removed from the OpenSearch query
|
|
720
|
+
# Mark this node for removal
|
|
721
|
+
node["_remove_from_query"] = True
|
|
722
|
+
|
|
723
|
+
# Don't mark the node for post-processing - let the query be generated normally
|
|
724
|
+
# The array operator is applied as a post-processing filter on top of the results
|
|
725
|
+
|
|
726
|
+
optimizations.append(
|
|
727
|
+
f"Array operator '{array_operator}' with '{operator}' will be applied in post-processing"
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
# Skip the regular mutator processing that follows
|
|
731
|
+
return
|
|
732
|
+
|
|
733
|
+
else:
|
|
734
|
+
# Regular mutator processing
|
|
735
|
+
result = self._analyze_field_mutators(field_name, field_mutators, operator)
|
|
736
|
+
|
|
737
|
+
# For in-memory context, keep mutators in AST for evaluation
|
|
738
|
+
if self.context == "in_memory":
|
|
739
|
+
# Don't remove mutators from AST for in-memory queries
|
|
740
|
+
# They need to be applied during evaluation
|
|
741
|
+
pass
|
|
742
|
+
else:
|
|
743
|
+
# Update node with optimized mutators for OpenSearch context
|
|
744
|
+
if result.optimized_mutators != field_mutators:
|
|
745
|
+
if result.optimized_mutators:
|
|
746
|
+
node["field_mutators"] = result.optimized_mutators
|
|
747
|
+
else:
|
|
748
|
+
# Remove field_mutators if all were optimized away
|
|
749
|
+
node.pop("field_mutators", None)
|
|
750
|
+
|
|
751
|
+
optimizations.extend(result.optimizations)
|
|
752
|
+
|
|
753
|
+
# Add post-processing requirements
|
|
754
|
+
if result.post_processing_mutators:
|
|
755
|
+
# For in-memory context, we need special handling
|
|
756
|
+
if self.context == "in_memory":
|
|
757
|
+
# Check if any mutators are transform mutators that need to be applied to results
|
|
758
|
+
transform_mutators = []
|
|
759
|
+
for mutator in result.post_processing_mutators:
|
|
760
|
+
mutator_name = mutator.get("name", "").lower()
|
|
761
|
+
# Transform mutators that modify the result
|
|
762
|
+
if mutator_name in [
|
|
763
|
+
"split",
|
|
764
|
+
"lowercase",
|
|
765
|
+
"uppercase",
|
|
766
|
+
"trim",
|
|
767
|
+
"replace",
|
|
768
|
+
"refang",
|
|
769
|
+
"defang",
|
|
770
|
+
]:
|
|
771
|
+
transform_mutators.append(mutator)
|
|
772
|
+
|
|
773
|
+
# If we have transform mutators, add them as post-processing for result transformation
|
|
774
|
+
if transform_mutators:
|
|
775
|
+
post_processing_reqs.append(
|
|
776
|
+
PostProcessingRequirement(
|
|
777
|
+
field_name=field_name,
|
|
778
|
+
mapped_field_name=field_name,
|
|
779
|
+
mutators=transform_mutators,
|
|
780
|
+
applies_to="field",
|
|
781
|
+
metadata={"transform_only": True}, # Mark as transform-only
|
|
782
|
+
)
|
|
783
|
+
)
|
|
784
|
+
else:
|
|
785
|
+
# Always include operator and value in metadata for post-processing filtering
|
|
786
|
+
metadata = {"operator": operator, "value": node.get("value")}
|
|
787
|
+
# Include original comparison info if it exists
|
|
788
|
+
if node.get("_original_comparison"):
|
|
789
|
+
metadata["_original_comparison"] = node["_original_comparison"]
|
|
790
|
+
|
|
791
|
+
post_processing_reqs.append(
|
|
792
|
+
PostProcessingRequirement(
|
|
793
|
+
field_name=field_name,
|
|
794
|
+
mapped_field_name=result.selected_field or field_name,
|
|
795
|
+
mutators=result.post_processing_mutators,
|
|
796
|
+
applies_to="field",
|
|
797
|
+
metadata=metadata,
|
|
798
|
+
)
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
# Check if we have transform mutators with filtering operators
|
|
802
|
+
# These need special handling in query conversion
|
|
803
|
+
TRANSFORM_MUTATORS = {
|
|
804
|
+
"lowercase",
|
|
805
|
+
"uppercase",
|
|
806
|
+
"trim",
|
|
807
|
+
"replace",
|
|
808
|
+
"refang",
|
|
809
|
+
"defang",
|
|
810
|
+
"b64encode",
|
|
811
|
+
"b64decode",
|
|
812
|
+
"urldecode",
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
has_transform_with_filter = False
|
|
816
|
+
for mutator in result.post_processing_mutators:
|
|
817
|
+
if mutator.get("name", "").lower() in TRANSFORM_MUTATORS:
|
|
818
|
+
has_transform_with_filter = True
|
|
819
|
+
break
|
|
820
|
+
|
|
821
|
+
if has_transform_with_filter and operator in [
|
|
822
|
+
"eq",
|
|
823
|
+
"=",
|
|
824
|
+
"ne",
|
|
825
|
+
"!=",
|
|
826
|
+
"contains",
|
|
827
|
+
"not_contains",
|
|
828
|
+
"startswith",
|
|
829
|
+
"endswith",
|
|
830
|
+
"not_startswith",
|
|
831
|
+
"not_endswith",
|
|
832
|
+
">",
|
|
833
|
+
">=",
|
|
834
|
+
"<",
|
|
835
|
+
"<=",
|
|
836
|
+
"gt",
|
|
837
|
+
"gte",
|
|
838
|
+
"lt",
|
|
839
|
+
"lte",
|
|
840
|
+
"between",
|
|
841
|
+
"not_between",
|
|
842
|
+
]:
|
|
843
|
+
# Mark the node so query converter knows to use exists query
|
|
844
|
+
node["has_transform_mutators_with_filter"] = True
|
|
845
|
+
|
|
349
846
|
# Check if any mutators change the field type
|
|
350
847
|
has_type_changing_mutator = any(
|
|
351
848
|
mutator.get("name", "").lower()
|
|
@@ -374,6 +871,12 @@ class MutatorAnalyzer:
|
|
|
374
871
|
# Also mark if we have type-changing mutators
|
|
375
872
|
if has_type_changing_mutator:
|
|
376
873
|
node["has_type_changing_mutators"] = True
|
|
874
|
+
|
|
875
|
+
# For in-memory queries with type-changing mutators, DON'T convert to exists check
|
|
876
|
+
# The mutators should be applied during evaluation
|
|
877
|
+
if self.context == "in_memory":
|
|
878
|
+
# Keep the original comparison intact for in-memory evaluation
|
|
879
|
+
pass
|
|
377
880
|
elif has_type_changing_mutator:
|
|
378
881
|
# For type-changing mutators with numeric operators, mark for special handling
|
|
379
882
|
node["has_type_changing_mutators"] = True
|
|
@@ -385,53 +888,11 @@ class MutatorAnalyzer:
|
|
|
385
888
|
if result.selected_field and result.selected_field != field_name:
|
|
386
889
|
node["field"] = result.selected_field
|
|
387
890
|
|
|
388
|
-
#
|
|
389
|
-
if operator in ["all", "not_all"]:
|
|
390
|
-
# These operators need post-processing for array fields
|
|
391
|
-
post_processing_reqs.append(
|
|
392
|
-
PostProcessingRequirement(
|
|
393
|
-
field_name=field_name,
|
|
394
|
-
mapped_field_name=field_name,
|
|
395
|
-
mutators=[], # No mutators, just operator-based filtering
|
|
396
|
-
applies_to="field",
|
|
397
|
-
metadata={"operator": operator, "value": node.get("value")},
|
|
398
|
-
)
|
|
399
|
-
)
|
|
400
|
-
# Mark for special handling in OpenSearch
|
|
401
|
-
node["post_process_value"] = True
|
|
402
|
-
|
|
403
|
-
# Analyze value mutators (these are typically post-processing)
|
|
404
|
-
if value_mutators:
|
|
405
|
-
post_processing_value_mutators = []
|
|
406
|
-
|
|
407
|
-
for mutator in value_mutators:
|
|
408
|
-
mutator_name = mutator.get("name", "").lower()
|
|
409
|
-
classification = MUTATOR_CLASSIFICATIONS.get(mutator_name, MutatorType.POST_PROCESSABLE)
|
|
410
|
-
|
|
411
|
-
if classification in [MutatorType.POST_PROCESSABLE, MutatorType.CONDITIONAL]:
|
|
412
|
-
post_processing_value_mutators.append(mutator)
|
|
413
|
-
|
|
414
|
-
if post_processing_value_mutators:
|
|
415
|
-
post_processing_reqs.append(
|
|
416
|
-
PostProcessingRequirement(
|
|
417
|
-
field_name=field_name,
|
|
418
|
-
mapped_field_name=field_name, # Value mutators don't affect field mapping
|
|
419
|
-
mutators=post_processing_value_mutators,
|
|
420
|
-
applies_to="value",
|
|
421
|
-
)
|
|
422
|
-
)
|
|
891
|
+
# Note: ALL and NOT_ALL operators are handled during evaluation, not post-processing
|
|
423
892
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
# Mark the node to indicate it needs special handling in OpenSearch
|
|
428
|
-
node["post_process_value"] = True
|
|
429
|
-
# Keep the original value for reference
|
|
430
|
-
node["original_value"] = node.get("value")
|
|
431
|
-
|
|
432
|
-
# Remove value mutators from AST since they'll be post-processed
|
|
433
|
-
node.pop("value_mutators", None)
|
|
434
|
-
optimizations.append(f"Moved {len(post_processing_value_mutators)} value mutator(s) to post-processing")
|
|
893
|
+
# Value mutators are handled during evaluation, not post-processing
|
|
894
|
+
# The evaluator's _evaluate_comparison method applies value mutators before comparison
|
|
895
|
+
# So we don't need to treat them as post-processing requirements
|
|
435
896
|
|
|
436
897
|
def _analyze_collection_node(
|
|
437
898
|
self,
|
|
@@ -450,7 +911,6 @@ class MutatorAnalyzer:
|
|
|
450
911
|
"""
|
|
451
912
|
field_name = node.get("field")
|
|
452
913
|
field_mutators = node.get("field_mutators", [])
|
|
453
|
-
value_mutators = node.get("value_mutators", [])
|
|
454
914
|
|
|
455
915
|
if not field_name:
|
|
456
916
|
return
|
|
@@ -487,21 +947,14 @@ class MutatorAnalyzer:
|
|
|
487
947
|
f"post-processing for collection operation"
|
|
488
948
|
)
|
|
489
949
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
post_processing_reqs.append(
|
|
493
|
-
PostProcessingRequirement(
|
|
494
|
-
field_name=field_name, mapped_field_name=field_name, mutators=value_mutators, applies_to="value"
|
|
495
|
-
)
|
|
496
|
-
)
|
|
497
|
-
|
|
498
|
-
node.pop("value_mutators", None)
|
|
499
|
-
optimizations.append(
|
|
500
|
-
f"Moved {len(value_mutators)} value mutator(s) to post-processing for collection operation"
|
|
501
|
-
)
|
|
950
|
+
# Value mutators are handled during evaluation for collection operations too
|
|
951
|
+
# The evaluator applies them before comparison in _evaluate_collection_comparison
|
|
502
952
|
|
|
503
953
|
def _evaluate_health_for_context( # noqa: C901
|
|
504
|
-
self,
|
|
954
|
+
self,
|
|
955
|
+
post_processing_requirements: List[PostProcessingRequirement],
|
|
956
|
+
context: str,
|
|
957
|
+
ast: Optional[Dict[str, Any]] = None,
|
|
505
958
|
) -> Dict[str, Any]:
|
|
506
959
|
"""Evaluate health status based on context and mutator performance characteristics.
|
|
507
960
|
|
|
@@ -518,9 +971,10 @@ class MutatorAnalyzer:
|
|
|
518
971
|
slow_mutators = []
|
|
519
972
|
all_mutators = []
|
|
520
973
|
|
|
521
|
-
#
|
|
522
|
-
|
|
523
|
-
|
|
974
|
+
# Helper function to process mutators
|
|
975
|
+
def process_mutators(mutator_list):
|
|
976
|
+
nonlocal fast_count, moderate_count, slow_count
|
|
977
|
+
for mutator_spec in mutator_list:
|
|
524
978
|
mutator_name = mutator_spec.get("name", "")
|
|
525
979
|
all_mutators.append(mutator_name)
|
|
526
980
|
|
|
@@ -540,6 +994,28 @@ class MutatorAnalyzer:
|
|
|
540
994
|
# If we can't create the mutator, assume moderate performance
|
|
541
995
|
moderate_count += 1
|
|
542
996
|
|
|
997
|
+
# Collect all mutators from post-processing requirements
|
|
998
|
+
for req in post_processing_requirements:
|
|
999
|
+
process_mutators(req.mutators)
|
|
1000
|
+
|
|
1001
|
+
# For in_memory context with AST, also collect mutators from the AST
|
|
1002
|
+
if context == "in_memory" and ast:
|
|
1003
|
+
|
|
1004
|
+
def collect_ast_mutators(node):
|
|
1005
|
+
if isinstance(node, dict):
|
|
1006
|
+
# Check for field mutators
|
|
1007
|
+
if "field_mutators" in node:
|
|
1008
|
+
process_mutators(node["field_mutators"])
|
|
1009
|
+
# Check for value mutators
|
|
1010
|
+
if "value_mutators" in node:
|
|
1011
|
+
process_mutators(node["value_mutators"])
|
|
1012
|
+
# Recurse into child nodes
|
|
1013
|
+
for key, value in node.items():
|
|
1014
|
+
if key in ["left", "right", "operand"]:
|
|
1015
|
+
collect_ast_mutators(value)
|
|
1016
|
+
|
|
1017
|
+
collect_ast_mutators(ast)
|
|
1018
|
+
|
|
543
1019
|
# Determine health status based on context
|
|
544
1020
|
health_status = "green"
|
|
545
1021
|
health_reasons = []
|
|
@@ -692,85 +1168,19 @@ class FieldMutatorAnalyzer:
|
|
|
692
1168
|
self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
|
|
693
1169
|
) -> "MutatorOptimizationResult":
|
|
694
1170
|
"""Try to optimize a lowercase mutator using field mappings."""
|
|
695
|
-
#
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
selected_field=lowercase_field,
|
|
704
|
-
post_process_mutator=None,
|
|
705
|
-
optimization_description=f"Using field '{lowercase_field}' with lowercase analyzer instead of mutator",
|
|
706
|
-
)
|
|
707
|
-
elif standard_field:
|
|
708
|
-
# Standard analyzer might handle lowercase - use it but also post-process
|
|
709
|
-
return MutatorOptimizationResult(
|
|
710
|
-
can_optimize=False,
|
|
711
|
-
selected_field=standard_field,
|
|
712
|
-
post_process_mutator=mutator,
|
|
713
|
-
optimization_description=f"Using text field '{standard_field}' but post-processing lowercase mutator",
|
|
714
|
-
)
|
|
715
|
-
elif field_mapping.keyword_field:
|
|
716
|
-
# Only keyword field available - check operator compatibility
|
|
717
|
-
if operator in [
|
|
718
|
-
"eq",
|
|
719
|
-
"=",
|
|
720
|
-
"ne",
|
|
721
|
-
"!=",
|
|
722
|
-
"in",
|
|
723
|
-
"not_in",
|
|
724
|
-
"contains",
|
|
725
|
-
"not_contains",
|
|
726
|
-
"startswith",
|
|
727
|
-
"endswith",
|
|
728
|
-
"not_startswith",
|
|
729
|
-
"not_endswith",
|
|
730
|
-
]:
|
|
731
|
-
# These operators will work with post-processing
|
|
732
|
-
return MutatorOptimizationResult(
|
|
733
|
-
can_optimize=False,
|
|
734
|
-
selected_field=field_mapping.keyword_field,
|
|
735
|
-
post_process_mutator=mutator,
|
|
736
|
-
optimization_description=f"Using keyword field '{field_mapping.keyword_field}' "
|
|
737
|
-
f"with post-processing",
|
|
738
|
-
health_issue={
|
|
739
|
-
"status": "yellow",
|
|
740
|
-
"query_part": f"{field_mapping.base_field_name} | lowercase",
|
|
741
|
-
"reason": "Keyword field used with lowercase mutator requires post-processing",
|
|
742
|
-
},
|
|
743
|
-
)
|
|
744
|
-
else:
|
|
745
|
-
# Range operators don't make sense with lowercase
|
|
746
|
-
return MutatorOptimizationResult(
|
|
747
|
-
can_optimize=False,
|
|
748
|
-
selected_field=None,
|
|
749
|
-
post_process_mutator=None,
|
|
750
|
-
optimization_description="",
|
|
751
|
-
health_issue={
|
|
752
|
-
"status": "red",
|
|
753
|
-
"query_part": f"{field_mapping.base_field_name} | lowercase {operator}",
|
|
754
|
-
"reason": (
|
|
755
|
-
f"Field '{field_mapping.base_field_name}' does not support case-insensitive "
|
|
756
|
-
f"searching with operator '{operator}'. Available: {field_mapping.keyword_field} (keyword)"
|
|
757
|
-
),
|
|
758
|
-
},
|
|
759
|
-
)
|
|
760
|
-
else:
|
|
761
|
-
# No suitable fields
|
|
762
|
-
return MutatorOptimizationResult(
|
|
763
|
-
can_optimize=False,
|
|
764
|
-
selected_field=None,
|
|
765
|
-
post_process_mutator=mutator,
|
|
766
|
-
optimization_description="No suitable field mappings for lowercase optimization",
|
|
767
|
-
)
|
|
1171
|
+
# Per requirement: lowercase should always be post-processing
|
|
1172
|
+
# Even if we have a lowercase analyzer field, we don't optimize
|
|
1173
|
+
return MutatorOptimizationResult(
|
|
1174
|
+
can_optimize=False,
|
|
1175
|
+
selected_field=None,
|
|
1176
|
+
post_process_mutator=mutator,
|
|
1177
|
+
optimization_description="Lowercase mutator always requires post-processing",
|
|
1178
|
+
)
|
|
768
1179
|
|
|
769
1180
|
def _optimize_uppercase_mutator(
|
|
770
1181
|
self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
|
|
771
1182
|
) -> "MutatorOptimizationResult":
|
|
772
1183
|
"""Try to optimize an uppercase mutator using field mappings."""
|
|
773
|
-
# Check if we actually have an uppercase analyzer
|
|
774
1184
|
# We need to check the text_fields dict directly to ensure we have the specific analyzer
|
|
775
1185
|
if "uppercase" in field_mapping.text_fields:
|
|
776
1186
|
uppercase_field = field_mapping.text_fields["uppercase"]
|
|
@@ -793,26 +1203,14 @@ class FieldMutatorAnalyzer:
|
|
|
793
1203
|
self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
|
|
794
1204
|
) -> "MutatorOptimizationResult":
|
|
795
1205
|
"""Try to optimize a trim mutator using field mappings."""
|
|
796
|
-
#
|
|
797
|
-
#
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
selected_field=text_field,
|
|
805
|
-
post_process_mutator=None,
|
|
806
|
-
optimization_description=f"Assuming field '{text_field}' analyzer handles trimming",
|
|
807
|
-
)
|
|
808
|
-
else:
|
|
809
|
-
# No text field - requires post-processing
|
|
810
|
-
return MutatorOptimizationResult(
|
|
811
|
-
can_optimize=False,
|
|
812
|
-
selected_field=None,
|
|
813
|
-
post_process_mutator=mutator,
|
|
814
|
-
optimization_description="No text field available for trim optimization",
|
|
815
|
-
)
|
|
1206
|
+
# Trim should always require post-processing to ensure consistent behavior
|
|
1207
|
+
# We can't reliably know if an analyzer trims whitespace
|
|
1208
|
+
return MutatorOptimizationResult(
|
|
1209
|
+
can_optimize=False,
|
|
1210
|
+
selected_field=None,
|
|
1211
|
+
post_process_mutator=mutator,
|
|
1212
|
+
optimization_description="Trim mutator always requires post-processing",
|
|
1213
|
+
)
|
|
816
1214
|
|
|
817
1215
|
|
|
818
1216
|
@dataclass
|