tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,775 @@
1
+ """Query conversion logic for OpenSearch backend.
2
+
3
+ This module handles the conversion of TQL AST nodes to OpenSearch Query DSL.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from ..exceptions import TQLUnsupportedOperationError, TQLValidationError
9
+ from .field_mapping import FieldMapping
10
+
11
+
12
+ class QueryConverter:
13
+ """Handles conversion of TQL AST to OpenSearch Query DSL."""
14
+
15
+ def __init__(self, field_mappings: Dict[str, FieldMapping], simple_mappings: Dict[str, str]):
16
+ """Initialize query converter.
17
+
18
+ Args:
19
+ field_mappings: Intelligent field mappings
20
+ simple_mappings: Simple field name mappings
21
+ """
22
+ self.intelligent_mappings = field_mappings
23
+ self.simple_mappings = simple_mappings
24
+
25
+ def convert_node(self, node: Any) -> Dict[str, Any]:
26
+ """Convert a single AST node to OpenSearch query fragment."""
27
+ if isinstance(node, dict):
28
+ node_type = node.get("type")
29
+
30
+ if node_type == "comparison":
31
+ return self._convert_comparison(node)
32
+ elif node_type == "logical_op":
33
+ return self._convert_logical_op(node)
34
+ elif node_type == "unary_op":
35
+ return self._convert_unary_op(node)
36
+ elif node_type == "collection_op":
37
+ return self._convert_collection_op(node)
38
+ elif node_type == "geo_expr":
39
+ return self._convert_geo_expr(node)
40
+ elif node_type == "nslookup_expr":
41
+ return self._convert_nslookup_expr(node)
42
+
43
+ raise TQLValidationError(f"Unknown node type: {node}")
44
+
45
+ def _get_effective_field_type(self, field_name: str, mutators: List[Dict[str, Any]]) -> Optional[str]:
46
+ """Determine the effective field type after applying mutators.
47
+
48
+ Args:
49
+ field_name: Original field name
50
+ mutators: List of mutators applied to the field
51
+
52
+ Returns:
53
+ The effective field type after mutator transformations, or None if unchanged
54
+ """
55
+ if not mutators:
56
+ return None
57
+
58
+ # Define mutators that change field types
59
+ type_changing_mutators = {
60
+ "length": "integer", # Returns integer count
61
+ "avg": "float", # Returns float average
62
+ "average": "float", # Alias for avg
63
+ "sum": "float", # Returns numeric sum
64
+ "max": "float", # Returns maximum value
65
+ "min": "float", # Returns minimum value
66
+ "any": "boolean", # Returns boolean
67
+ "all": "boolean", # Returns boolean
68
+ "is_private": "boolean", # Returns boolean
69
+ "is_global": "boolean", # Returns boolean
70
+ }
71
+
72
+ # Check mutators from left to right to find final type
73
+ for mutator in mutators:
74
+ mutator_name = mutator.get("name", "").lower()
75
+ if mutator_name in type_changing_mutators:
76
+ return type_changing_mutators[mutator_name]
77
+ elif mutator_name == "split":
78
+ # Split converts to array, but we need to know what comes after
79
+ continue
80
+
81
+ return None
82
+
83
+ def _convert_comparison(self, node: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
84
+ """Convert a comparison operation to OpenSearch query."""
85
+ field_name = node["field"]
86
+ operator = node["operator"]
87
+ value = node["value"]
88
+ field_mutators = node.get("field_mutators", [])
89
+
90
+ # Check if mutators change the field type
91
+ effective_field_type = self._get_effective_field_type(field_name, field_mutators)
92
+
93
+ # Check if field has mutators that will be post-processed
94
+ has_post_process_mutators = bool(field_mutators)
95
+
96
+ # Check if node has type-changing mutators (marked by mutator analyzer)
97
+ has_type_changing_mutators = node.get("has_type_changing_mutators", False)
98
+
99
+ # Check for intelligent mappings and validate type compatibility
100
+ if (
101
+ field_name in self.intelligent_mappings
102
+ and effective_field_type is None
103
+ and not has_post_process_mutators
104
+ and not has_type_changing_mutators
105
+ ):
106
+ # Only validate original field type if no type-changing mutators and no post-processing
107
+ mapping = self.intelligent_mappings[field_name]
108
+ # This will raise TQLTypeError if incompatible
109
+ mapping.validate_operator_for_field_type(operator)
110
+
111
+ # Get the actual field name to use (could be enhanced to extract analyzer from query context)
112
+ # For type-changing mutators, bypass field resolution since the field type doesn't matter
113
+ if has_type_changing_mutators:
114
+ # Just use the field name as-is since it will be post-processed
115
+ opensearch_field = field_name
116
+ use_wildcard = False
117
+ else:
118
+ opensearch_field, use_wildcard = self._resolve_field_name(field_name, operator)
119
+
120
+ # Convert value types for OpenSearch
121
+ value = self._convert_value(value)
122
+
123
+ # Check if this comparison requires post-processing due to value mutators or type-changing mutators
124
+ # Note: ALL and NOT_ALL operators are handled with script queries and don't need post-processing
125
+ requires_post_processing = node.get("post_process_value", False) or has_type_changing_mutators
126
+
127
+ if requires_post_processing:
128
+ # For operations that require post-processing, we need to query more broadly
129
+ # to ensure we get all potentially matching documents
130
+ if operator in [
131
+ "eq",
132
+ "=",
133
+ "ne",
134
+ "!=",
135
+ "contains",
136
+ "not_contains",
137
+ "startswith",
138
+ "endswith",
139
+ "not_startswith",
140
+ "not_endswith",
141
+ ">",
142
+ ">=",
143
+ "<",
144
+ "<=",
145
+ "gt",
146
+ "gte",
147
+ "lt",
148
+ "lte",
149
+ "between",
150
+ "not_between",
151
+ ]:
152
+ # For these operators, use exists query to get all docs with the field
153
+ # The actual filtering will happen in post-processing
154
+ return {"exists": {"field": opensearch_field}}
155
+
156
+ # Handle special wildcard conversion for keyword fields
157
+ if use_wildcard and operator == "contains":
158
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
159
+
160
+ # Convert operator to OpenSearch query
161
+ if operator in ["eq", "="]:
162
+ # Check if we're using a text field
163
+ is_text_field = self._is_text_field(field_name, opensearch_field)
164
+
165
+ # Use match query for text fields, term for others
166
+ if is_text_field:
167
+ return {"match": {opensearch_field: value}}
168
+ else:
169
+ return {"term": {opensearch_field: value}}
170
+ elif operator in ["ne", "!="]:
171
+ # Check if we're using a text field
172
+ is_text_field = self._is_text_field(field_name, opensearch_field)
173
+
174
+ # Use match query for text fields, term for others
175
+ if is_text_field:
176
+ return {"bool": {"must_not": {"match": {opensearch_field: value}}}}
177
+ else:
178
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
179
+ elif operator in ["gt", ">"]:
180
+ return {"range": {opensearch_field: {"gt": value}}}
181
+ elif operator in ["gte", ">="]:
182
+ return {"range": {opensearch_field: {"gte": value}}}
183
+ elif operator in ["lt", "<"]:
184
+ return {"range": {opensearch_field: {"lt": value}}}
185
+ elif operator in ["lte", "<="]:
186
+ return {"range": {opensearch_field: {"lte": value}}}
187
+ elif operator == "contains":
188
+ # Unwrap single-element lists for string operators
189
+ if isinstance(value, list) and len(value) == 1:
190
+ value = value[0]
191
+ if use_wildcard:
192
+ # Keyword field needs wildcard conversion
193
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
194
+ else:
195
+ # For unmapped fields or when we have a text field, decide based on context
196
+ # If we have intelligent mapping and selected a text field, use match
197
+ # Otherwise default to wildcard for broader compatibility
198
+ if field_name in self.intelligent_mappings:
199
+ # Use match query for text fields in intelligent mappings
200
+ return {"match": {opensearch_field: value}}
201
+ else:
202
+ # Default to wildcard for unmapped fields
203
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
204
+ elif operator == "startswith":
205
+ # Unwrap single-element lists for string operators
206
+ if isinstance(value, list) and len(value) == 1:
207
+ value = value[0]
208
+ # For text fields, use wildcard query as prefix doesn't work well with analyzed text
209
+ if field_name in self.intelligent_mappings:
210
+ field_mapping = self.intelligent_mappings[field_name]
211
+ if isinstance(field_mapping, FieldMapping):
212
+ # Check if we're using a text field
213
+ selected_field = field_mapping.get_field_for_operator(operator)
214
+ if selected_field in field_mapping.text_fields.values():
215
+ # Use wildcard for analyzed text fields with lowercase value
216
+ # Text analyzers typically lowercase the text
217
+ return {"wildcard": {opensearch_field: f"{value.lower()}*"}}
218
+ return {"prefix": {opensearch_field: value}}
219
+ elif operator == "endswith":
220
+ # Unwrap single-element lists for string operators
221
+ if isinstance(value, list) and len(value) == 1:
222
+ value = value[0]
223
+ # For text fields, lowercase the value as text analyzers typically lowercase
224
+ if field_name in self.intelligent_mappings:
225
+ field_mapping = self.intelligent_mappings[field_name]
226
+ if isinstance(field_mapping, FieldMapping):
227
+ selected_field = field_mapping.get_field_for_operator(operator)
228
+ if selected_field in field_mapping.text_fields.values():
229
+ return {"wildcard": {opensearch_field: f"*{value.lower()}"}}
230
+ return {"wildcard": {opensearch_field: f"*{value}"}}
231
+ elif operator == "in":
232
+ if isinstance(value, list):
233
+ return {"terms": {opensearch_field: value}}
234
+ else:
235
+ return {"term": {opensearch_field: value}}
236
+ elif operator == "regexp":
237
+ # Unwrap single-element lists for string operators
238
+ if isinstance(value, list) and len(value) == 1:
239
+ value = value[0]
240
+ return {"regexp": {opensearch_field: value}}
241
+ elif operator == "exists":
242
+ return {"exists": {"field": opensearch_field}}
243
+ elif operator == "is":
244
+ if value is None:
245
+ return {"bool": {"must_not": {"exists": {"field": opensearch_field}}}}
246
+ else:
247
+ return {"term": {opensearch_field: value}}
248
+ elif operator == "between":
249
+ if isinstance(value, list) and len(value) == 2:
250
+ # Convert values to appropriate types
251
+ val1 = self._convert_value(value[0])
252
+ val2 = self._convert_value(value[1])
253
+
254
+ # Allow values in any order
255
+ lower = (
256
+ min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
257
+ )
258
+ upper = (
259
+ max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
260
+ )
261
+
262
+ # For non-numeric values (like dates), we use the original order if we can't determine min/max
263
+ if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
264
+ try:
265
+ # If values can be compared (like strings), try to determine order
266
+ if val1 > val2:
267
+ lower, upper = val2, val1
268
+ else:
269
+ lower, upper = val1, val2
270
+ except TypeError:
271
+ # If comparison fails, use the original order
272
+ lower, upper = value[0], value[1]
273
+
274
+ return {"range": {opensearch_field: {"gte": lower, "lte": upper}}}
275
+ else:
276
+ raise TQLValidationError(f"Between operator requires a list with two values, got: {value}")
277
+ elif operator == "cidr":
278
+ # Unwrap single-element lists for CIDR
279
+ if isinstance(value, list) and len(value) == 1:
280
+ value = value[0]
281
+ # OpenSearch uses special syntax for CIDR queries on IP fields
282
+ # Format: field: "192.168.0.0/24"
283
+ return {"term": {opensearch_field: value}}
284
+ # Negated operators
285
+ elif operator == "not_in":
286
+ if isinstance(value, list):
287
+ return {"bool": {"must_not": {"terms": {opensearch_field: value}}}}
288
+ else:
289
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
290
+ elif operator == "not_contains":
291
+ # Unwrap single-element lists for string operators
292
+ if isinstance(value, list) and len(value) == 1:
293
+ value = value[0]
294
+ if use_wildcard:
295
+ return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
296
+ else:
297
+ if field_name in self.intelligent_mappings:
298
+ return {"bool": {"must_not": {"match": {opensearch_field: value}}}}
299
+ else:
300
+ return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
301
+ elif operator == "not_startswith":
302
+ # Unwrap single-element lists for string operators
303
+ if isinstance(value, list) and len(value) == 1:
304
+ value = value[0]
305
+ return {"bool": {"must_not": {"prefix": {opensearch_field: value}}}}
306
+ elif operator == "not_endswith":
307
+ # Unwrap single-element lists for string operators
308
+ if isinstance(value, list) and len(value) == 1:
309
+ value = value[0]
310
+ return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
311
+ elif operator == "not_regexp":
312
+ # Unwrap single-element lists for string operators
313
+ if isinstance(value, list) and len(value) == 1:
314
+ value = value[0]
315
+ return {"bool": {"must_not": {"regexp": {opensearch_field: value}}}}
316
+ elif operator == "not_exists":
317
+ return {"bool": {"must_not": {"exists": {"field": opensearch_field}}}}
318
+ elif operator == "not_between":
319
+ if isinstance(value, list) and len(value) == 2:
320
+ val1 = self._convert_value(value[0])
321
+ val2 = self._convert_value(value[1])
322
+ lower = (
323
+ min(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[0]
324
+ )
325
+ upper = (
326
+ max(val1, val2) if isinstance(val1, (int, float)) and isinstance(val2, (int, float)) else value[1]
327
+ )
328
+ if not isinstance(val1, (int, float)) or not isinstance(val2, (int, float)):
329
+ try:
330
+ if val1 > val2:
331
+ lower, upper = val2, val1
332
+ else:
333
+ lower, upper = val1, val2
334
+ except TypeError:
335
+ lower, upper = value[0], value[1]
336
+ return {"bool": {"must_not": {"range": {opensearch_field: {"gte": lower, "lte": upper}}}}}
337
+ else:
338
+ raise TQLValidationError(f"Not between operator requires a list with two values, got: {value}")
339
+ elif operator == "not_cidr":
340
+ # Unwrap single-element lists for CIDR
341
+ if isinstance(value, list) and len(value) == 1:
342
+ value = value[0]
343
+ # Negated CIDR query
344
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
345
+ elif operator == "is_not":
346
+ if value is None:
347
+ return {"exists": {"field": opensearch_field}}
348
+ else:
349
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
350
+ elif operator == "any":
351
+ # ANY operator - matches if any element equals the value (default OpenSearch behavior)
352
+ # Works for both single values and arrays
353
+ # Handle case where value might be wrapped in a list from parsing
354
+ if isinstance(value, list) and len(value) == 1:
355
+ value = value[0]
356
+ return {"term": {opensearch_field: value}}
357
+ elif operator == "all":
358
+ # ALL operator - for arrays, all elements must match
359
+ # OpenSearch doesn't have a native "all elements must equal X" query
360
+ # We can use a script query to check this
361
+ return {
362
+ "script": {
363
+ "script": {
364
+ "source": """
365
+ if (!doc.containsKey(params.field) || doc[params.field].size() == 0) {
366
+ return false;
367
+ }
368
+ for (value in doc[params.field]) {
369
+ if (value != params.value) {
370
+ return false;
371
+ }
372
+ }
373
+ return true;
374
+ """,
375
+ "params": {"field": opensearch_field, "value": value},
376
+ }
377
+ }
378
+ }
379
+ elif operator == "not_any":
380
+ # NOT ANY - no element should match
381
+ # Handle case where value might be wrapped in a list from parsing
382
+ if isinstance(value, list) and len(value) == 1:
383
+ value = value[0]
384
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
385
+ elif operator == "not_all":
386
+ # NOT ALL - not all elements equal the value
387
+ # This means: field doesn't exist OR array is empty OR at least one element is different
388
+ # Handle case where value might be wrapped in a list from parsing
389
+ if isinstance(value, list) and len(value) == 1:
390
+ value = value[0]
391
+ return {
392
+ "script": {
393
+ "script": {
394
+ "source": """
395
+ // Check if field exists in the document mapping
396
+ if (!doc.containsKey(params.field)) {
397
+ // Field doesn't exist, so NOT ALL is true
398
+ return true;
399
+ }
400
+
401
+ // Get field values
402
+ def values = doc[params.field];
403
+
404
+ // Empty array means not all elements are the value (vacuously true)
405
+ if (values.size() == 0) {
406
+ return true;
407
+ }
408
+
409
+ // Check if all elements match
410
+ for (value in values) {
411
+ if (value != params.value) {
412
+ // Found an element that doesn't match
413
+ return true;
414
+ }
415
+ }
416
+
417
+ // All elements match, so NOT all is false
418
+ return false;
419
+ """,
420
+ "params": {"field": opensearch_field, "value": value},
421
+ }
422
+ }
423
+ }
424
+ else:
425
+ raise TQLUnsupportedOperationError(f"Operator '{operator}' not supported for OpenSearch")
426
+
427
+ def _convert_logical_op(self, node: Dict[str, Any]) -> Dict[str, Any]:
428
+ """Convert a logical operation to OpenSearch query."""
429
+ operator = node["operator"]
430
+ left_query = self.convert_node(node["left"])
431
+ right_query = self.convert_node(node["right"])
432
+
433
+ if operator == "and":
434
+ return {"bool": {"must": [left_query, right_query]}}
435
+ elif operator == "or":
436
+ return {"bool": {"should": [left_query, right_query], "minimum_should_match": 1}}
437
+ else:
438
+ raise TQLUnsupportedOperationError(f"Logical operator '{operator}' not supported for OpenSearch")
439
+
440
+ def _convert_unary_op(self, node: Dict[str, Any]) -> Dict[str, Any]:
441
+ """Convert a unary operation to OpenSearch query."""
442
+ operator = node["operator"]
443
+
444
+ if operator == "not":
445
+ operand = node["operand"]
446
+
447
+ # Optimize double negation: NOT (NOT X) -> X
448
+ if isinstance(operand, dict) and operand.get("type") == "unary_op" and operand.get("operator") == "not":
449
+ return self.convert_node(operand["operand"])
450
+
451
+ # Optimize negated operators: NOT (field not_in [values]) -> field in [values]
452
+ if isinstance(operand, dict) and operand.get("type") == "comparison":
453
+ op = operand.get("operator")
454
+ if op == "not_in":
455
+ # Convert NOT (field not_in values) to (field in values)
456
+ field = operand["field"]
457
+ value = operand["value"]
458
+ opensearch_field, _ = self._resolve_field_name(field, "in")
459
+ if isinstance(value, list):
460
+ return {"terms": {opensearch_field: value}}
461
+ else:
462
+ return {"term": {opensearch_field: value}}
463
+ elif op == "not_contains":
464
+ # Convert NOT (field not_contains value) to (field contains value)
465
+ field = operand["field"]
466
+ value = operand["value"]
467
+ # Unwrap single-element lists for string operators
468
+ if isinstance(value, list) and len(value) == 1:
469
+ value = value[0]
470
+ opensearch_field, use_wildcard = self._resolve_field_name(field, "contains")
471
+ if use_wildcard:
472
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
473
+ else:
474
+ if field in self.intelligent_mappings:
475
+ return {"match": {opensearch_field: value}}
476
+ else:
477
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
478
+ # Add more optimizations for other negated operators as needed
479
+
480
+ operand_query = self.convert_node(operand)
481
+ return {"bool": {"must_not": operand_query}}
482
+ else:
483
+ raise TQLUnsupportedOperationError(f"Unary operator '{operator}' not supported for OpenSearch")
484
+
485
+ def _convert_collection_op(self, node: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
486
+ """Convert a collection operation (ANY/ALL) to OpenSearch query."""
487
+ operator = node["operator"]
488
+ field_name = node["field"]
489
+ comparison_operator = node["comparison_operator"]
490
+ value = node["value"]
491
+
492
+ # Get the mapped field name
493
+ opensearch_field, _ = self._resolve_field_name(field_name, comparison_operator)
494
+
495
+ # For OpenSearch, we're essentially doing a nested query or terms lookup
496
+ # This would ideally use the nested query type, but we'll create a simplified version
497
+ # that works for basic array fields
498
+
499
+ if operator == "any":
500
+ # ANY operator is like checking if any array element matches
501
+ # For basic equality/comparison, we can use a term/terms query directly
502
+ if comparison_operator in ["eq", "="]:
503
+ return {"term": {opensearch_field: value}}
504
+ elif comparison_operator in ["ne", "!="]:
505
+ return {"bool": {"must_not": {"term": {opensearch_field: value}}}}
506
+ elif comparison_operator in ["in"]:
507
+ if isinstance(value, list):
508
+ return {"terms": {opensearch_field: value}}
509
+ else:
510
+ return {"term": {opensearch_field: value}}
511
+ # For other comparisons, we create a range query
512
+ elif comparison_operator in ["gt", ">"]:
513
+ return {"range": {opensearch_field: {"gt": value}}}
514
+ elif comparison_operator in ["gte", ">="]:
515
+ return {"range": {opensearch_field: {"gte": value}}}
516
+ elif comparison_operator in ["lt", "<"]:
517
+ return {"range": {opensearch_field: {"lt": value}}}
518
+ elif comparison_operator in ["lte", "<="]:
519
+ return {"range": {opensearch_field: {"lte": value}}}
520
+ # For string operations, we use the appropriate query type
521
+ elif comparison_operator == "contains":
522
+ return {"wildcard": {opensearch_field: f"*{value}*"}}
523
+ elif comparison_operator == "startswith":
524
+ return {"prefix": {opensearch_field: value}}
525
+ elif comparison_operator == "endswith":
526
+ return {"wildcard": {opensearch_field: f"*{value}"}}
527
+ elif comparison_operator == "regexp":
528
+ return {"regexp": {opensearch_field: value}}
529
+ else:
530
+ raise TQLUnsupportedOperationError(
531
+ f"Operator '{comparison_operator}' not supported for ANY collection operator in OpenSearch"
532
+ )
533
+ elif operator == "all":
534
+ # ALL operator is more complex as we need to ensure all elements match
535
+ # We'll use a must_not exists approach with a filter for elements that don't match
536
+
537
+ # Create the negated condition
538
+ if comparison_operator in ["eq", "="]:
539
+ negated_condition = {"bool": {"must_not": {"term": {opensearch_field: value}}}}
540
+ elif comparison_operator in ["ne", "!="]:
541
+ negated_condition = {"term": {opensearch_field: value}}
542
+ elif comparison_operator in ["in"]:
543
+ if isinstance(value, list):
544
+ negated_condition = {"bool": {"must_not": {"terms": {opensearch_field: value}}}}
545
+ else:
546
+ negated_condition = {"bool": {"must_not": {"term": {opensearch_field: value}}}}
547
+ elif comparison_operator in ["gt", ">"]:
548
+ negated_condition = {"range": {opensearch_field: {"lte": value}}}
549
+ elif comparison_operator in ["gte", ">="]:
550
+ negated_condition = {"range": {opensearch_field: {"lt": value}}}
551
+ elif comparison_operator in ["lt", "<"]:
552
+ negated_condition = {"range": {opensearch_field: {"gte": value}}}
553
+ elif comparison_operator in ["lte", "<="]:
554
+ negated_condition = {"range": {opensearch_field: {"gt": value}}}
555
+ elif comparison_operator == "contains":
556
+ negated_condition = {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}*"}}}}
557
+ elif comparison_operator == "startswith":
558
+ negated_condition = {"bool": {"must_not": {"prefix": {opensearch_field: value}}}}
559
+ elif comparison_operator == "endswith":
560
+ negated_condition = {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
561
+ elif comparison_operator == "regexp":
562
+ negated_condition = {"bool": {"must_not": {"regexp": {opensearch_field: value}}}}
563
+ else:
564
+ raise TQLUnsupportedOperationError(
565
+ f"Operator '{comparison_operator}' not supported for ALL collection operator in OpenSearch"
566
+ )
567
+
568
+ # For ALL to be true, there must not be any elements that don't match the condition
569
+ return {"bool": {"must_not": negated_condition}}
570
+ else:
571
+ raise TQLUnsupportedOperationError(f"Collection operator '{operator}' not supported for OpenSearch")
572
+
573
+ def _has_filtering_conditions(self, node: Any) -> bool:
574
+ """Check if an AST node contains actual filtering conditions.
575
+
576
+ Args:
577
+ node: AST node to check
578
+
579
+ Returns:
580
+ True if the node contains filtering conditions, False otherwise
581
+ """
582
+ if not isinstance(node, dict):
583
+ return False
584
+
585
+ node_type = node.get("type")
586
+
587
+ if node_type == "comparison":
588
+ # All comparisons are filtering conditions
589
+ return True
590
+ elif node_type == "logical_op":
591
+ # Check both sides of logical operation
592
+ left_has = self._has_filtering_conditions(node.get("left"))
593
+ right_has = self._has_filtering_conditions(node.get("right"))
594
+ return left_has or right_has
595
+ elif node_type == "unary_op":
596
+ # Check the operand
597
+ return self._has_filtering_conditions(node.get("operand"))
598
+ elif node_type == "collection_op":
599
+ # Collection operations are filtering conditions
600
+ return True
601
+ elif node_type == "geo_expr":
602
+ # Check nested geo conditions
603
+ return self._has_filtering_conditions(node.get("conditions"))
604
+ elif node_type == "nslookup_expr":
605
+ # Check nested nslookup conditions
606
+ return self._has_filtering_conditions(node.get("conditions"))
607
+
608
+ return False
609
+
610
+ def _convert_geo_expr(self, node: Dict[str, Any]) -> Dict[str, Any]:
611
+ """Convert a geo expression to OpenSearch query.
612
+
613
+ Note: Geo expressions require post-processing since the geoip_lookup
614
+ must be applied to results after they return from OpenSearch.
615
+
616
+ The OpenSearch query depends on whether there are geo conditions:
617
+ - If there are geo conditions, we need an exists query on the IP field
618
+ (since we can only apply geo filters to IPs that exist)
619
+ - If there are no conditions (just enrichment), we return match_all
620
+
621
+ Args:
622
+ node: Geo expression AST node
623
+
624
+ Returns:
625
+ OpenSearch query
626
+ """
627
+ field_name = node["field"]
628
+ conditions = node.get("conditions")
629
+
630
+ # Check if there are actual filtering conditions
631
+ if conditions and self._has_filtering_conditions(conditions):
632
+ # We have geo conditions that will filter results, so we need exists query
633
+ # Try to resolve the field name, but if it fails, use the original
634
+ try:
635
+ opensearch_field, _ = self._resolve_field_name(field_name, "exists")
636
+ except TQLUnsupportedOperationError:
637
+ # Field might not have mappings or exists might not be supported
638
+ # Use the original field name
639
+ opensearch_field = field_name
640
+
641
+ return {"exists": {"field": opensearch_field}}
642
+ else:
643
+ # No filtering conditions, just enrichment - match all documents
644
+ return {"match_all": {}}
645
+
646
+ def _convert_nslookup_expr(self, node: Dict[str, Any]) -> Dict[str, Any]:
647
+ """Convert an nslookup expression to OpenSearch query.
648
+
649
+ Note: NSLookup expressions require post-processing since the DNS lookup
650
+ must be applied to results after they return from OpenSearch.
651
+
652
+ The OpenSearch query depends on whether there are DNS conditions:
653
+ - If there are DNS conditions, we need an exists query on the field
654
+ (since we can only apply DNS filters to fields that exist)
655
+ - If there are no conditions (just enrichment), we return match_all
656
+
657
+ Args:
658
+ node: NSLookup expression AST node
659
+
660
+ Returns:
661
+ OpenSearch query
662
+ """
663
+ field_name = node["field"]
664
+ conditions = node.get("conditions")
665
+
666
+ # Check if there are actual filtering conditions
667
+ if conditions and self._has_filtering_conditions(conditions):
668
+ # We have DNS conditions that will filter results, so we need exists query
669
+ # Try to resolve the field name, but if it fails, use the original
670
+ try:
671
+ opensearch_field, _ = self._resolve_field_name(field_name, "exists")
672
+ except TQLUnsupportedOperationError:
673
+ # Field might not have mappings or exists might not be supported
674
+ # Use the original field name
675
+ opensearch_field = field_name
676
+
677
+ return {"exists": {"field": opensearch_field}}
678
+ else:
679
+ # No filtering conditions, just enrichment - match all documents
680
+ return {"match_all": {}}
681
+
682
+ def _resolve_field_name(
683
+ self, field_name: str, operator: str, preferred_analyzer: Optional[str] = None
684
+ ) -> tuple[str, bool]:
685
+ """Resolve field name based on mappings and operator.
686
+
687
+ Args:
688
+ field_name: The TQL field name
689
+ operator: The operator being used
690
+ preferred_analyzer: Preferred analyzer for text operations
691
+
692
+ Returns:
693
+ Tuple of (resolved_field_name, use_wildcard_conversion)
694
+ """
695
+ # Check intelligent mappings first
696
+ if field_name in self.intelligent_mappings:
697
+ field_mapping = self.intelligent_mappings[field_name]
698
+ resolved_field = field_mapping.get_field_for_operator(operator, preferred_analyzer)
699
+ use_wildcard = field_mapping.needs_wildcard_conversion(operator, preferred_analyzer)
700
+ # If resolved field is empty, use the original field name
701
+ if not resolved_field:
702
+ resolved_field = field_name
703
+ return resolved_field, use_wildcard
704
+
705
+ # Check simple mappings
706
+ elif field_name in self.simple_mappings:
707
+ return self.simple_mappings[field_name], False
708
+
709
+ # No mapping, use field name as-is
710
+ else:
711
+ return field_name, False
712
+
713
+ def _convert_value(self, value: Any) -> Any:
714
+ """Convert value types for OpenSearch compatibility.
715
+
716
+ Args:
717
+ value: Value to convert
718
+
719
+ Returns:
720
+ Converted value (bool, None, or original)
721
+ """
722
+ if isinstance(value, str):
723
+ if value.lower() == "true":
724
+ return True
725
+ elif value.lower() == "false":
726
+ return False
727
+ elif value.lower() == "null":
728
+ return None
729
+ return value
730
+
731
+ def _is_text_field(self, field_name: str, opensearch_field: str) -> bool:
732
+ """Check if the resolved field is a text field.
733
+
734
+ Args:
735
+ field_name: Original field name
736
+ opensearch_field: Resolved OpenSearch field name
737
+
738
+ Returns:
739
+ True if it's a text field, False otherwise
740
+ """
741
+ # Method 1: Check if field is in intelligent mappings
742
+ if field_name in self.intelligent_mappings:
743
+ mapping = self.intelligent_mappings[field_name]
744
+ # Check if the selected field is a text field
745
+ field_type = mapping.field_types.get(opensearch_field, "keyword")
746
+ if field_type == "text":
747
+ return True
748
+
749
+ # Method 2: Check if the opensearch_field is a variant of a mapped field
750
+ # Extract base field name (e.g., "winlog.computer_name" from "winlog.computer_name.text")
751
+ base_field = opensearch_field
752
+ field_suffix = ""
753
+
754
+ if "." in opensearch_field:
755
+ parts = opensearch_field.rsplit(".", 1)
756
+ possible_base = parts[0]
757
+ possible_suffix = parts[1]
758
+
759
+ # Check if this looks like a field variant
760
+ if possible_suffix in ["text", "keyword", "lowercase", "english", "standard"]:
761
+ base_field = possible_base
762
+ field_suffix = possible_suffix
763
+
764
+ # Check if base field is in mappings
765
+ if base_field in self.intelligent_mappings:
766
+ mapping = self.intelligent_mappings[base_field]
767
+ # Check the field type of the specific variant
768
+ variant_type = mapping.field_types.get(opensearch_field, None)
769
+ if variant_type == "text":
770
+ return True
771
+ elif field_suffix == "text" and variant_type is None:
772
+ # If suffix is "text" and we don't have explicit type info, assume it's a text field
773
+ return True
774
+
775
+ return False