tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,830 @@
1
+ """Mutator analysis for determining pre vs post-processing requirements.
2
+
3
+ This module analyzes TQL queries with mutators to determine which mutators can be
4
+ handled by OpenSearch field mappings/analyzers (pre-processing) and which must be
5
+ applied to results after they return from OpenSearch (post-processing).
6
+ """
7
+
8
+ import copy
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Any, Dict, List, Literal, Optional, Union
12
+
13
+ # from .exceptions import TQLFieldError # Reserved for future use
14
+ from .mutators import create_mutator
15
+ from .mutators.base import PerformanceClass
16
+ from .opensearch import FieldMapping
17
+
18
+
19
+ class MutatorType(Enum):
20
+ """Classification of mutator processing types."""
21
+
22
+ PRE_PROCESSABLE = "pre" # Can be handled by field mappings/analyzers
23
+ POST_PROCESSABLE = "post" # Must be applied to results
24
+ CONDITIONAL = "conditional" # Depends on field mapping availability
25
+
26
+
27
+ # Classification of built-in mutators
28
+ MUTATOR_CLASSIFICATIONS: Dict[str, MutatorType] = {
29
+ "lowercase": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
30
+ "uppercase": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
31
+ "trim": MutatorType.POST_PROCESSABLE, # Always post-process (transforms result)
32
+ "split": MutatorType.POST_PROCESSABLE, # Always post-process (returns array)
33
+ "nslookup": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
34
+ "geoip_lookup": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
35
+ "geo": MutatorType.POST_PROCESSABLE, # Always post-process (enrichment)
36
+ "length": MutatorType.POST_PROCESSABLE, # Always post-process (returns int)
37
+ "refang": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
38
+ "defang": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
39
+ "b64encode": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
40
+ "b64decode": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
41
+ "urldecode": MutatorType.POST_PROCESSABLE, # Always post-process (modifies value)
42
+ "is_private": MutatorType.POST_PROCESSABLE, # Always post-process (returns bool)
43
+ "is_global": MutatorType.POST_PROCESSABLE, # Always post-process (returns bool)
44
+ }
45
+
46
+
47
+ @dataclass
48
+ class PostProcessingRequirement:
49
+ """Represents a mutator that needs to be applied after OpenSearch query execution."""
50
+
51
+ field_name: str # Original field name from query
52
+ mapped_field_name: str # Field name used in OpenSearch query
53
+ mutators: List[Dict[str, Any]] # List of mutator specifications
54
+ applies_to: Literal[
55
+ "field", "value", "geo_expr", "nslookup_expr"
56
+ ] # Whether this applies to field, value mutators, geo, or nslookup expressions
57
+ metadata: Optional[Dict[str, Any]] = None # Additional metadata for special processing
58
+
59
+
60
+ @dataclass
61
+ class MutatorAnalysisResult:
62
+ """Result of analyzing mutators in a TQL query."""
63
+
64
+ optimized_ast: Dict[str, Any] # AST with pre-processable mutators removed
65
+ post_processing_requirements: List[PostProcessingRequirement] # Post-processing needed
66
+ health_status: Literal["green", "yellow", "red"] # Health status
67
+ health_reasons: List[Dict[str, str]] # Health issues found
68
+ optimizations_applied: List[str] # List of optimizations applied
69
+ query_dsl: Optional[Dict[str, Any]] = None # OpenSearch query DSL (added by core TQL class)
70
+ save_enrichment_requested: bool = False # Whether any mutator requested enrichment saving
71
+
72
+
73
+ class MutatorAnalyzer:
74
+ """Analyzes TQL queries to determine mutator processing requirements."""
75
+
76
+ def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
77
+ """Initialize the analyzer.
78
+
79
+ Args:
80
+ field_mappings: Field mappings for intelligent analysis
81
+ """
82
+ self.field_mappings = field_mappings or {}
83
+ self.intelligent_mappings = {}
84
+
85
+ # Parse field mappings into FieldMapping objects
86
+ for field_name, mapping in self.field_mappings.items():
87
+ if isinstance(mapping, dict):
88
+ # Check if this is an OpenSearch-style mapping
89
+ if "type" in mapping and not any(k for k in mapping.keys() if k not in ["type", "fields", "analyzer"]):
90
+ # OpenSearch-style mapping for a single field
91
+ field_mapping = FieldMapping(mapping)
92
+ field_mapping.set_base_field_name(field_name)
93
+ self.intelligent_mappings[field_name] = field_mapping
94
+ else:
95
+ # Traditional intelligent mapping with multiple field variants
96
+ field_mapping = FieldMapping(mapping)
97
+ if not field_mapping.base_field_name:
98
+ field_mapping.base_field_name = field_name
99
+ self.intelligent_mappings[field_name] = field_mapping
100
+ elif isinstance(mapping, str):
101
+ # Check if this looks like a type specification
102
+ if mapping in [
103
+ "keyword",
104
+ "text",
105
+ "long",
106
+ "integer",
107
+ "short",
108
+ "byte",
109
+ "double",
110
+ "float",
111
+ "boolean",
112
+ "date",
113
+ "ip",
114
+ ]:
115
+ # Type specification, create intelligent mapping
116
+ self.intelligent_mappings[field_name] = FieldMapping({field_name: mapping})
117
+
118
+ def analyze_ast(self, ast: Dict[str, Any], context: str = "opensearch") -> MutatorAnalysisResult: # noqa: C901
119
+ """Analyze an AST to determine mutator processing requirements.
120
+
121
+ Args:
122
+ ast: The parsed TQL query AST
123
+ context: Execution context ("opensearch" or "in_memory")
124
+
125
+ Returns:
126
+ Analysis result with optimized AST and post-processing requirements
127
+ """
128
+ # Deep copy AST to avoid modifying original
129
+ optimized_ast = copy.deepcopy(ast)
130
+ post_processing_requirements: List[PostProcessingRequirement] = []
131
+ health_reasons: List[Dict[str, str]] = []
132
+ optimizations_applied: List[str] = []
133
+
134
+ # Track if enrichment saving is requested
135
+ save_enrichment_requested = False
136
+
137
+ # Analyze the AST recursively
138
+ self._analyze_node(optimized_ast, post_processing_requirements, health_reasons, optimizations_applied)
139
+
140
+ # Check if any mutator requested enrichment saving
141
+ for req in post_processing_requirements:
142
+ for mutator in req.mutators:
143
+ if mutator.get("params"):
144
+ for param in mutator["params"]:
145
+ if isinstance(param, list) and len(param) == 2 and param[0] == "save" and param[1]:
146
+ save_enrichment_requested = True
147
+ break
148
+ # Also check geo_params in metadata
149
+ if req.metadata and "geo_params" in req.metadata:
150
+ geo_params = req.metadata["geo_params"]
151
+ if geo_params.get("save"):
152
+ save_enrichment_requested = True
153
+ # Also check nslookup_params in metadata
154
+ if req.metadata and "nslookup_params" in req.metadata:
155
+ nslookup_params = req.metadata["nslookup_params"]
156
+ if nslookup_params.get("save"):
157
+ save_enrichment_requested = True
158
+
159
+ # Determine overall health status based on context
160
+ health_status: Literal["green", "yellow", "red"] = "green"
161
+ if post_processing_requirements:
162
+ # Evaluate health based on context
163
+ health_eval = self._evaluate_health_for_context(post_processing_requirements, context)
164
+ health_status = health_eval["health_status"] # type: ignore[assignment]
165
+ health_reasons.extend(health_eval["health_reasons"])
166
+
167
+ # Check for red health conditions (errors)
168
+ for reason in health_reasons:
169
+ if reason["status"] == "red":
170
+ health_status = "red"
171
+ break
172
+
173
+ return MutatorAnalysisResult(
174
+ optimized_ast=optimized_ast,
175
+ post_processing_requirements=post_processing_requirements,
176
+ health_status=health_status,
177
+ health_reasons=health_reasons,
178
+ optimizations_applied=optimizations_applied,
179
+ save_enrichment_requested=save_enrichment_requested,
180
+ )
181
+
182
+ def _analyze_node( # noqa: C901
183
+ self,
184
+ node: Dict[str, Any],
185
+ post_processing_reqs: List[PostProcessingRequirement],
186
+ health_reasons: List[Dict[str, str]],
187
+ optimizations: List[str],
188
+ ) -> None:
189
+ """Recursively analyze an AST node for mutator processing.
190
+
191
+ Args:
192
+ node: Current AST node
193
+ post_processing_reqs: List to append post-processing requirements
194
+ health_reasons: List to append health issues
195
+ optimizations: List to append optimization descriptions
196
+ """
197
+ if not isinstance(node, dict):
198
+ return
199
+
200
+ node_type = node.get("type")
201
+
202
+ if node_type == "comparison":
203
+ self._analyze_comparison_node(node, post_processing_reqs, health_reasons, optimizations)
204
+ elif node_type == "collection_op":
205
+ self._analyze_collection_node(node, post_processing_reqs, health_reasons, optimizations)
206
+ elif node_type == "logical_op":
207
+ # Recursively analyze both sides
208
+ self._analyze_node(node.get("left", {}), post_processing_reqs, health_reasons, optimizations)
209
+ self._analyze_node(node.get("right", {}), post_processing_reqs, health_reasons, optimizations)
210
+ elif node_type == "unary_op":
211
+ # Analyze the operand
212
+ self._analyze_node(node.get("operand", {}), post_processing_reqs, health_reasons, optimizations)
213
+ elif node_type == "geo_expr":
214
+ # Geo expressions always require post-processing since they involve geoip_lookup
215
+ field_name = node.get("field")
216
+ conditions = node.get("conditions")
217
+ geo_params = node.get("geo_params", {})
218
+
219
+ if field_name:
220
+ # Create a special post-processing requirement for geo expressions
221
+ # that includes both the enrichment and the filtering
222
+
223
+ # Build mutator params list from geo_params
224
+ mutator_params = []
225
+ for param_name, param_value in geo_params.items():
226
+ mutator_params.append([param_name, param_value])
227
+
228
+ geo_requirement = PostProcessingRequirement(
229
+ field_name=field_name,
230
+ mapped_field_name=field_name,
231
+ mutators=(
232
+ [{"name": "geoip_lookup", "params": mutator_params}]
233
+ if mutator_params
234
+ else [{"name": "geoip_lookup"}]
235
+ ),
236
+ applies_to="geo_expr", # Special type for geo expressions
237
+ metadata={
238
+ "conditions": conditions, # Include the conditions for filtering
239
+ "node_type": "geo_expr",
240
+ "geo_params": geo_params, # Include geo parameters
241
+ },
242
+ )
243
+ post_processing_reqs.append(geo_requirement)
244
+
245
+ # Mark the node for post-processing
246
+ node["requires_post_processing"] = True
247
+ node["post_process_type"] = "geo_expr"
248
+
249
+ if conditions:
250
+ optimizations.append(
251
+ f"Geo expression on field '{field_name}' with conditions requires post-processing"
252
+ )
253
+ else:
254
+ optimizations.append(f"Geo expression on field '{field_name}' for enrichment only")
255
+
256
+ # Don't analyze conditions recursively - they're part of the geo expression
257
+ elif node_type == "nslookup_expr":
258
+ # NSLookup expressions always require post-processing since they involve DNS lookups
259
+ field_name = node.get("field")
260
+ conditions = node.get("conditions")
261
+ nslookup_params = node.get("nslookup_params", {})
262
+
263
+ if field_name:
264
+ # Create a special post-processing requirement for nslookup expressions
265
+ # that includes both the enrichment and the filtering
266
+
267
+ # Build mutator params list from nslookup_params
268
+ mutator_params = []
269
+ for param_name, param_value in nslookup_params.items():
270
+ mutator_params.append([param_name, param_value])
271
+
272
+ nslookup_requirement = PostProcessingRequirement(
273
+ field_name=field_name,
274
+ mapped_field_name=field_name,
275
+ mutators=(
276
+ [{"name": "nslookup", "params": mutator_params}] if mutator_params else [{"name": "nslookup"}]
277
+ ),
278
+ applies_to="nslookup_expr", # Special type for nslookup expressions
279
+ metadata={
280
+ "conditions": conditions, # Include the conditions for filtering
281
+ "node_type": "nslookup_expr",
282
+ "nslookup_params": nslookup_params, # Include nslookup parameters
283
+ },
284
+ )
285
+ post_processing_reqs.append(nslookup_requirement)
286
+
287
+ # Mark the node for post-processing
288
+ node["requires_post_processing"] = True
289
+ node["post_process_type"] = "nslookup_expr"
290
+
291
+ if conditions:
292
+ optimizations.append(
293
+ f"NSLookup expression on field '{field_name}' with conditions requires post-processing"
294
+ )
295
+ else:
296
+ optimizations.append(f"NSLookup expression on field '{field_name}' for enrichment only")
297
+
298
+ # Don't analyze conditions recursively - they're part of the nslookup expression
299
+
300
+ def _analyze_comparison_node( # noqa: C901
301
+ self,
302
+ node: Dict[str, Any],
303
+ post_processing_reqs: List[PostProcessingRequirement],
304
+ health_reasons: List[Dict[str, str]],
305
+ optimizations: List[str],
306
+ ) -> None:
307
+ """Analyze a comparison node for mutator processing.
308
+
309
+ Args:
310
+ node: Comparison AST node
311
+ post_processing_reqs: List to append post-processing requirements
312
+ health_reasons: List to append health issues
313
+ optimizations: List to append optimization descriptions
314
+ """
315
+ field_name = node.get("field")
316
+ operator = node.get("operator")
317
+ field_mutators = node.get("field_mutators", [])
318
+ value_mutators = node.get("value_mutators", [])
319
+
320
+ if not field_name or not operator:
321
+ return
322
+
323
+ # Analyze field mutators
324
+ if field_mutators:
325
+ result = self._analyze_field_mutators(field_name, field_mutators, operator)
326
+
327
+ # Update node with optimized mutators
328
+ if result.optimized_mutators != field_mutators:
329
+ if result.optimized_mutators:
330
+ node["field_mutators"] = result.optimized_mutators
331
+ else:
332
+ # Remove field_mutators if all were optimized away
333
+ node.pop("field_mutators", None)
334
+
335
+ optimizations.extend(result.optimizations)
336
+
337
+ # Add post-processing requirements
338
+ if result.post_processing_mutators:
339
+ post_processing_reqs.append(
340
+ PostProcessingRequirement(
341
+ field_name=field_name,
342
+ mapped_field_name=result.selected_field or field_name,
343
+ mutators=result.post_processing_mutators,
344
+ applies_to="field",
345
+ metadata={"operator": operator, "value": node.get("value")},
346
+ )
347
+ )
348
+
349
+ # Check if any mutators change the field type
350
+ has_type_changing_mutator = any(
351
+ mutator.get("name", "").lower()
352
+ in ["length", "avg", "average", "sum", "max", "min", "any", "all", "is_private", "is_global"]
353
+ for mutator in result.post_processing_mutators
354
+ )
355
+
356
+ # For field mutators on certain operations, we need to make the query less restrictive
357
+ # This allows post-processing to correctly filter results
358
+ if operator in [
359
+ "eq",
360
+ "=",
361
+ "ne",
362
+ "!=",
363
+ "contains",
364
+ "not_contains",
365
+ "startswith",
366
+ "endswith",
367
+ "not_startswith",
368
+ "not_endswith",
369
+ ]:
370
+ # Mark the node to indicate it needs special handling in OpenSearch
371
+ node["post_process_value"] = True
372
+ # Keep the original value for reference
373
+ node["original_value"] = node.get("value")
374
+ # Also mark if we have type-changing mutators
375
+ if has_type_changing_mutator:
376
+ node["has_type_changing_mutators"] = True
377
+ elif has_type_changing_mutator:
378
+ # For type-changing mutators with numeric operators, mark for special handling
379
+ node["has_type_changing_mutators"] = True
380
+
381
+ # Add health reasons
382
+ health_reasons.extend(result.health_reasons)
383
+
384
+ # Update field name if optimized
385
+ if result.selected_field and result.selected_field != field_name:
386
+ node["field"] = result.selected_field
387
+
388
+ # Check if operator requires post-processing (e.g., ALL operator on arrays)
389
+ if operator in ["all", "not_all"]:
390
+ # These operators need post-processing for array fields
391
+ post_processing_reqs.append(
392
+ PostProcessingRequirement(
393
+ field_name=field_name,
394
+ mapped_field_name=field_name,
395
+ mutators=[], # No mutators, just operator-based filtering
396
+ applies_to="field",
397
+ metadata={"operator": operator, "value": node.get("value")},
398
+ )
399
+ )
400
+ # Mark for special handling in OpenSearch
401
+ node["post_process_value"] = True
402
+
403
+ # Analyze value mutators (these are typically post-processing)
404
+ if value_mutators:
405
+ post_processing_value_mutators = []
406
+
407
+ for mutator in value_mutators:
408
+ mutator_name = mutator.get("name", "").lower()
409
+ classification = MUTATOR_CLASSIFICATIONS.get(mutator_name, MutatorType.POST_PROCESSABLE)
410
+
411
+ if classification in [MutatorType.POST_PROCESSABLE, MutatorType.CONDITIONAL]:
412
+ post_processing_value_mutators.append(mutator)
413
+
414
+ if post_processing_value_mutators:
415
+ post_processing_reqs.append(
416
+ PostProcessingRequirement(
417
+ field_name=field_name,
418
+ mapped_field_name=field_name, # Value mutators don't affect field mapping
419
+ mutators=post_processing_value_mutators,
420
+ applies_to="value",
421
+ )
422
+ )
423
+
424
+ # For value mutators on equality operations, we need to make the query less restrictive
425
+ # This allows post-processing to correctly filter results
426
+ if operator in ["eq", "=", "ne", "!="]:
427
+ # Mark the node to indicate it needs special handling in OpenSearch
428
+ node["post_process_value"] = True
429
+ # Keep the original value for reference
430
+ node["original_value"] = node.get("value")
431
+
432
+ # Remove value mutators from AST since they'll be post-processed
433
+ node.pop("value_mutators", None)
434
+ optimizations.append(f"Moved {len(post_processing_value_mutators)} value mutator(s) to post-processing")
435
+
436
+ def _analyze_collection_node(
437
+ self,
438
+ node: Dict[str, Any],
439
+ post_processing_reqs: List[PostProcessingRequirement],
440
+ health_reasons: List[Dict[str, str]],
441
+ optimizations: List[str],
442
+ ) -> None:
443
+ """Analyze a collection operation node for mutator processing.
444
+
445
+ Args:
446
+ node: Collection operation AST node
447
+ post_processing_reqs: List to append post-processing requirements
448
+ health_reasons: List to append health issues
449
+ optimizations: List to append optimization descriptions
450
+ """
451
+ field_name = node.get("field")
452
+ field_mutators = node.get("field_mutators", [])
453
+ value_mutators = node.get("value_mutators", [])
454
+
455
+ if not field_name:
456
+ return
457
+
458
+ # For collection operations, handle mutators similar to comparison nodes
459
+ # but be more conservative about optimizations
460
+
461
+ if field_mutators:
462
+ # For collection ops, we're more conservative - most field mutators go to post-processing
463
+ post_processing_field_mutators = []
464
+
465
+ for mutator in field_mutators:
466
+ mutator_name = mutator.get("name", "").lower()
467
+ classification = MUTATOR_CLASSIFICATIONS.get(mutator_name, MutatorType.POST_PROCESSABLE)
468
+
469
+ # For collection operations, be conservative and post-process most mutators
470
+ if classification != MutatorType.PRE_PROCESSABLE:
471
+ post_processing_field_mutators.append(mutator)
472
+
473
+ if post_processing_field_mutators:
474
+ post_processing_reqs.append(
475
+ PostProcessingRequirement(
476
+ field_name=field_name,
477
+ mapped_field_name=field_name,
478
+ mutators=post_processing_field_mutators,
479
+ applies_to="field",
480
+ )
481
+ )
482
+
483
+ # Remove field mutators from AST
484
+ node.pop("field_mutators", None)
485
+ optimizations.append(
486
+ f"Moved {len(post_processing_field_mutators)} field mutator(s) to "
487
+ f"post-processing for collection operation"
488
+ )
489
+
490
+ if value_mutators:
491
+ # Value mutators always go to post-processing for collection operations
492
+ post_processing_reqs.append(
493
+ PostProcessingRequirement(
494
+ field_name=field_name, mapped_field_name=field_name, mutators=value_mutators, applies_to="value"
495
+ )
496
+ )
497
+
498
+ node.pop("value_mutators", None)
499
+ optimizations.append(
500
+ f"Moved {len(value_mutators)} value mutator(s) to post-processing for collection operation"
501
+ )
502
+
503
+ def _evaluate_health_for_context( # noqa: C901
504
+ self, post_processing_requirements: List[PostProcessingRequirement], context: str
505
+ ) -> Dict[str, Any]:
506
+ """Evaluate health status based on context and mutator performance characteristics.
507
+
508
+ Args:
509
+ post_processing_requirements: List of post-processing requirements
510
+ context: Execution context ("opensearch" or "in_memory")
511
+
512
+ Returns:
513
+ Dictionary with health_status and health_reasons
514
+ """
515
+ fast_count = 0
516
+ moderate_count = 0
517
+ slow_count = 0
518
+ slow_mutators = []
519
+ all_mutators = []
520
+
521
+ # Collect all mutators and their performance classes
522
+ for req in post_processing_requirements:
523
+ for mutator_spec in req.mutators:
524
+ mutator_name = mutator_spec.get("name", "")
525
+ all_mutators.append(mutator_name)
526
+
527
+ try:
528
+ # Create mutator instance to get its performance characteristics
529
+ mutator = create_mutator(mutator_name, mutator_spec.get("params"))
530
+ perf_class = mutator.get_performance_class(context)
531
+
532
+ if perf_class == PerformanceClass.FAST:
533
+ fast_count += 1
534
+ elif perf_class == PerformanceClass.MODERATE:
535
+ moderate_count += 1
536
+ elif perf_class == PerformanceClass.SLOW:
537
+ slow_count += 1
538
+ slow_mutators.append(mutator_name)
539
+ except Exception:
540
+ # If we can't create the mutator, assume moderate performance
541
+ moderate_count += 1
542
+
543
+ # Determine health status based on context
544
+ health_status = "green"
545
+ health_reasons = []
546
+
547
+ if context == "in_memory":
548
+ # In-memory context: only slow mutators significantly impact health
549
+ if slow_count > 0:
550
+ health_status = "yellow"
551
+ if slow_count > 2:
552
+ health_status = "red"
553
+ health_reasons.append(
554
+ {
555
+ "status": health_status,
556
+ "query_part": f"mutators: {', '.join(slow_mutators)}",
557
+ "reason": f"{slow_count} slow mutator(s) ({', '.join(slow_mutators)}) may impact performance",
558
+ }
559
+ )
560
+ elif moderate_count > 5:
561
+ # Many moderate mutators can also impact performance
562
+ health_status = "yellow"
563
+ health_reasons.append(
564
+ {
565
+ "status": "yellow",
566
+ "query_part": "multiple mutators",
567
+ "reason": f"{moderate_count} moderate-performance mutators may impact "
568
+ f"performance when combined",
569
+ }
570
+ )
571
+ # Fast mutators don't impact health in memory context
572
+
573
+ elif context == "opensearch":
574
+ # OpenSearch context: post-processing always impacts performance
575
+ if slow_count > 0 or moderate_count > 0 or fast_count > 0:
576
+ health_status = "yellow"
577
+ if slow_count > 0:
578
+ health_status = "red" if slow_count > 1 else "yellow"
579
+
580
+ reason_parts = []
581
+ if fast_count > 0:
582
+ reason_parts.append(f"{fast_count} mutator(s)")
583
+ if moderate_count > 0:
584
+ reason_parts.append(f"{moderate_count} moderate mutator(s)")
585
+ if slow_count > 0:
586
+ reason_parts.append(f"{slow_count} slow mutator(s) [{', '.join(slow_mutators)}]")
587
+
588
+ health_reasons.append(
589
+ {
590
+ "status": health_status,
591
+ "query_part": "post-processing required",
592
+ "reason": (
593
+ f"Post-processing required for {' + '.join(reason_parts)}, "
594
+ "which impacts performance with large result sets"
595
+ ),
596
+ }
597
+ )
598
+
599
+ return {"health_status": health_status, "health_reasons": health_reasons}
600
+
601
+ def _analyze_field_mutators(
602
+ self, field_name: str, mutators: List[Dict[str, Any]], operator: str
603
+ ) -> "FieldMutatorAnalysisResult":
604
+ """Analyze field mutators for a specific field."""
605
+ analyzer = FieldMutatorAnalyzer(self.intelligent_mappings)
606
+ return analyzer.analyze(field_name, mutators, operator)
607
+
608
+
609
+ @dataclass
610
+ class FieldMutatorAnalysisResult:
611
+ """Result of analyzing field mutators for a specific field."""
612
+
613
+ optimized_mutators: List[Dict[str, Any]] # Mutators that remain in AST
614
+ post_processing_mutators: List[Dict[str, Any]] # Mutators for post-processing
615
+ selected_field: Optional[str] # Field name to use in OpenSearch query
616
+ optimizations: List[str] # Descriptions of optimizations applied
617
+ health_reasons: List[Dict[str, str]] # Health issues found
618
+
619
+
620
+ class FieldMutatorAnalyzer:
621
+ """Analyzes field mutators for a specific field."""
622
+
623
+ def __init__(self, field_mappings: Dict[str, FieldMapping]):
624
+ """Initialize with intelligent field mappings."""
625
+ self.field_mappings = field_mappings
626
+
627
+ def analyze(self, field_name: str, mutators: List[Dict[str, Any]], operator: str) -> FieldMutatorAnalysisResult:
628
+ """Analyze field mutators for optimization opportunities.
629
+
630
+ Args:
631
+ field_name: Name of the field
632
+ mutators: List of mutator specifications
633
+ operator: The operator being used in the comparison
634
+
635
+ Returns:
636
+ Analysis result with optimization recommendations
637
+ """
638
+ optimized_mutators: List[Dict[str, Any]] = []
639
+ post_processing_mutators = []
640
+ selected_field = None
641
+ optimizations = []
642
+ health_reasons = []
643
+
644
+ # Check if we have intelligent mapping for this field
645
+ if field_name in self.field_mappings:
646
+ field_mapping = self.field_mappings[field_name]
647
+
648
+ # Try to optimize mutators using field mapping
649
+ for mutator in mutators:
650
+ mutator_name = mutator.get("name", "").lower()
651
+
652
+ if mutator_name == "lowercase":
653
+ optimization_result = self._optimize_lowercase_mutator(field_mapping, operator, mutator)
654
+ elif mutator_name == "uppercase":
655
+ optimization_result = self._optimize_uppercase_mutator(field_mapping, operator, mutator)
656
+ elif mutator_name == "trim":
657
+ optimization_result = self._optimize_trim_mutator(field_mapping, operator, mutator)
658
+ else:
659
+ # Unknown or non-optimizable mutator - goes to post-processing
660
+ optimization_result = MutatorOptimizationResult(
661
+ can_optimize=False,
662
+ selected_field=None,
663
+ post_process_mutator=mutator,
664
+ optimization_description=f"Mutator '{mutator_name}' requires post-processing",
665
+ )
666
+
667
+ # Apply optimization result
668
+ if optimization_result.can_optimize:
669
+ if optimization_result.selected_field:
670
+ selected_field = optimization_result.selected_field
671
+ optimizations.append(optimization_result.optimization_description)
672
+ # Don't add to optimized_mutators if fully optimized
673
+ else:
674
+ if optimization_result.post_process_mutator:
675
+ post_processing_mutators.append(optimization_result.post_process_mutator)
676
+ if optimization_result.health_issue:
677
+ health_reasons.append(optimization_result.health_issue)
678
+ else:
679
+ # No intelligent mapping - all mutators go to post-processing
680
+ post_processing_mutators = mutators
681
+ optimizations.append(f"No field mapping for '{field_name}' - all mutators require post-processing")
682
+
683
+ return FieldMutatorAnalysisResult(
684
+ optimized_mutators=optimized_mutators,
685
+ post_processing_mutators=post_processing_mutators,
686
+ selected_field=selected_field,
687
+ optimizations=optimizations,
688
+ health_reasons=health_reasons,
689
+ )
690
+
691
+ def _optimize_lowercase_mutator(
692
+ self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
693
+ ) -> "MutatorOptimizationResult":
694
+ """Try to optimize a lowercase mutator using field mappings."""
695
+ # Check if we have a text field with lowercase analyzer
696
+ lowercase_field = field_mapping.text_fields.get("lowercase")
697
+ standard_field = field_mapping.text_fields.get("standard")
698
+
699
+ if lowercase_field:
700
+ # Perfect match - we have a lowercase analyzer
701
+ return MutatorOptimizationResult(
702
+ can_optimize=True,
703
+ selected_field=lowercase_field,
704
+ post_process_mutator=None,
705
+ optimization_description=f"Using field '{lowercase_field}' with lowercase analyzer instead of mutator",
706
+ )
707
+ elif standard_field:
708
+ # Standard analyzer might handle lowercase - use it but also post-process
709
+ return MutatorOptimizationResult(
710
+ can_optimize=False,
711
+ selected_field=standard_field,
712
+ post_process_mutator=mutator,
713
+ optimization_description=f"Using text field '{standard_field}' but post-processing lowercase mutator",
714
+ )
715
+ elif field_mapping.keyword_field:
716
+ # Only keyword field available - check operator compatibility
717
+ if operator in [
718
+ "eq",
719
+ "=",
720
+ "ne",
721
+ "!=",
722
+ "in",
723
+ "not_in",
724
+ "contains",
725
+ "not_contains",
726
+ "startswith",
727
+ "endswith",
728
+ "not_startswith",
729
+ "not_endswith",
730
+ ]:
731
+ # These operators will work with post-processing
732
+ return MutatorOptimizationResult(
733
+ can_optimize=False,
734
+ selected_field=field_mapping.keyword_field,
735
+ post_process_mutator=mutator,
736
+ optimization_description=f"Using keyword field '{field_mapping.keyword_field}' "
737
+ f"with post-processing",
738
+ health_issue={
739
+ "status": "yellow",
740
+ "query_part": f"{field_mapping.base_field_name} | lowercase",
741
+ "reason": "Keyword field used with lowercase mutator requires post-processing",
742
+ },
743
+ )
744
+ else:
745
+ # Range operators don't make sense with lowercase
746
+ return MutatorOptimizationResult(
747
+ can_optimize=False,
748
+ selected_field=None,
749
+ post_process_mutator=None,
750
+ optimization_description="",
751
+ health_issue={
752
+ "status": "red",
753
+ "query_part": f"{field_mapping.base_field_name} | lowercase {operator}",
754
+ "reason": (
755
+ f"Field '{field_mapping.base_field_name}' does not support case-insensitive "
756
+ f"searching with operator '{operator}'. Available: {field_mapping.keyword_field} (keyword)"
757
+ ),
758
+ },
759
+ )
760
+ else:
761
+ # No suitable fields
762
+ return MutatorOptimizationResult(
763
+ can_optimize=False,
764
+ selected_field=None,
765
+ post_process_mutator=mutator,
766
+ optimization_description="No suitable field mappings for lowercase optimization",
767
+ )
768
+
769
+ def _optimize_uppercase_mutator(
770
+ self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
771
+ ) -> "MutatorOptimizationResult":
772
+ """Try to optimize an uppercase mutator using field mappings."""
773
+ # Check if we actually have an uppercase analyzer
774
+ # We need to check the text_fields dict directly to ensure we have the specific analyzer
775
+ if "uppercase" in field_mapping.text_fields:
776
+ uppercase_field = field_mapping.text_fields["uppercase"]
777
+ return MutatorOptimizationResult(
778
+ can_optimize=True,
779
+ selected_field=uppercase_field,
780
+ post_process_mutator=None,
781
+ optimization_description=f"Using field '{uppercase_field}' with uppercase analyzer instead of mutator",
782
+ )
783
+ else:
784
+ # No uppercase analyzer - requires post-processing
785
+ return MutatorOptimizationResult(
786
+ can_optimize=False,
787
+ selected_field=None,
788
+ post_process_mutator=mutator,
789
+ optimization_description="No uppercase analyzer available - requires post-processing",
790
+ )
791
+
792
+ def _optimize_trim_mutator(
793
+ self, field_mapping: FieldMapping, operator: str, mutator: Dict[str, Any]
794
+ ) -> "MutatorOptimizationResult":
795
+ """Try to optimize a trim mutator using field mappings."""
796
+ # Check if any text field might handle trimming
797
+ # Most analyzers include trimming by default, but we can't be sure
798
+ text_field = field_mapping.text_fields.get("standard")
799
+
800
+ if text_field:
801
+ # Assume standard analyzer handles trimming (common case)
802
+ return MutatorOptimizationResult(
803
+ can_optimize=True,
804
+ selected_field=text_field,
805
+ post_process_mutator=None,
806
+ optimization_description=f"Assuming field '{text_field}' analyzer handles trimming",
807
+ )
808
+ else:
809
+ # No text field - requires post-processing
810
+ return MutatorOptimizationResult(
811
+ can_optimize=False,
812
+ selected_field=None,
813
+ post_process_mutator=mutator,
814
+ optimization_description="No text field available for trim optimization",
815
+ )
816
+
817
+
818
+ @dataclass
819
+ class MutatorOptimizationResult:
820
+ """Result of attempting to optimize a single mutator."""
821
+
822
+ can_optimize: bool # Whether mutator can be optimized away
823
+ selected_field: Optional[str] # Field to use in OpenSearch query
824
+ post_process_mutator: Optional[Dict[str, Any]] # Mutator for post-processing (if needed)
825
+ optimization_description: str # Description of what was done
826
+ health_issue: Optional[Dict[str, str]] = None # Health issue if any
827
+
828
+
829
+ # Monkey patch the analyzer into MutatorAnalyzer
830
+ # Method moved into class