tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
tql/parser.py ADDED
@@ -0,0 +1,1363 @@
1
+ """Parser module for Tellaro Query Language (TQL).
2
+
3
+ This module provides the main TQLParser class that orchestrates parsing
4
+ using the modular parser components.
5
+ """
6
+
7
+ from typing import Any, Dict, List
8
+
9
+ from pyparsing import ParseException, ParserElement
10
+
11
+ from .exceptions import TQLOperatorError, TQLParseError, TQLSyntaxError, TQLValueError
12
+ from .parser_components.ast_builder import ASTBuilder
13
+ from .parser_components.error_analyzer import ErrorAnalyzer
14
+ from .parser_components.field_extractor import FieldExtractor
15
+ from .parser_components.grammar import TQLGrammar
16
+
17
+ ParserElement.enablePackrat()
18
+
19
+
20
+ class TQLParser:
21
+ """TQL query parser.
22
+
23
+ Parses TQL query strings into an Abstract Syntax Tree (AST) that can be
24
+ evaluated against data or converted to backend-specific query formats.
25
+ """
26
+
27
+ def __init__(self):
28
+ """Initialize the parser with TQL grammar."""
29
+ self.grammar = TQLGrammar()
30
+ self.ast_builder = ASTBuilder()
31
+ self.error_analyzer = ErrorAnalyzer()
32
+ self.field_extractor = FieldExtractor()
33
+
34
+ def parse(self, query: str) -> Dict[str, Any]:
35
+ """Parse a TQL query string into an AST.
36
+
37
+ Args:
38
+ query: The TQL query string to parse
39
+
40
+ Returns:
41
+ Dictionary representing the parsed query AST
42
+
43
+ Raises:
44
+ TQLParseError: If the query has invalid syntax
45
+ """
46
+ try:
47
+ # Parse the query
48
+ parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
49
+
50
+ # Convert to our AST format
51
+ return self._build_ast(parsed_result.asList()[0])
52
+
53
+ except ParseException as e:
54
+ # Extract position and context from pyparsing exception
55
+ position = e.col - 1 if hasattr(e, "col") else e.loc
56
+
57
+ # Check for unclosed quotes first
58
+ if query.count('"') % 2 != 0:
59
+ last_quote_pos = query.rfind('"')
60
+ raise TQLSyntaxError(
61
+ f"Unterminated string literal starting at position {last_quote_pos}",
62
+ position=last_quote_pos,
63
+ query=query,
64
+ suggestions=[],
65
+ )
66
+
67
+ if query.count("'") % 2 != 0:
68
+ last_quote_pos = query.rfind("'")
69
+ raise TQLSyntaxError(
70
+ f"Unterminated string literal starting at position {last_quote_pos}",
71
+ position=last_quote_pos,
72
+ query=query,
73
+ suggestions=[],
74
+ )
75
+
76
+ # Analyze the error to provide better feedback
77
+ error_msg, suggestions = self.error_analyzer.analyze_parse_error(query, position, str(e))
78
+
79
+ raise TQLSyntaxError(error_msg, position=position, query=query, suggestions=suggestions)
80
+ except TQLOperatorError as e:
81
+ # Re-raise operator errors with query context
82
+ e.query = query
83
+ raise e
84
+ except ValueError as e:
85
+ # Handle value errors from our own validation
86
+ raise TQLValueError(str(e), query=query)
87
+ except Exception as e:
88
+ # Generic parse error for unexpected exceptions
89
+ raise TQLParseError(f"Invalid TQL syntax: {str(e)}", query=query)
90
+
91
+ def extract_fields(self, query: str) -> List[str]:
92
+ """Extract all unique field references from a TQL query.
93
+
94
+ This method parses the query and traverses the AST to find all field names
95
+ referenced in the query. Field mappings are not applied.
96
+
97
+ Args:
98
+ query: The TQL query string
99
+
100
+ Returns:
101
+ Sorted list of unique field names referenced in the query
102
+
103
+ Raises:
104
+ TQLParseError: If the query has invalid syntax
105
+ """
106
+ # Parse the query into an AST
107
+ ast = self.parse(query)
108
+
109
+ # Extract fields using the field extractor
110
+ return self.field_extractor.extract_fields(ast)
111
+
112
+ def _build_ast(self, parsed: Any) -> Dict[str, Any]: # noqa: C901
113
+ """Build AST from parsed pyparsing result.
114
+
115
+ Args:
116
+ parsed: The parsed result from pyparsing
117
+
118
+ Returns:
119
+ Dictionary representing the AST node
120
+ """
121
+ if isinstance(parsed, list):
122
+ if len(parsed) == 1:
123
+ # Single item, check if it's a field with is_private/is_global mutator
124
+ item = parsed[0]
125
+ if isinstance(item, list):
126
+ # Could be a typed_field
127
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(item)
128
+ if field_mutators:
129
+ # Check if the last mutator is is_private or is_global
130
+ last_mutator = field_mutators[-1] if field_mutators else None
131
+ if last_mutator and last_mutator.get("name", "").lower() in ["is_private", "is_global"]:
132
+ # This is field | is_private or field | is_global without operator
133
+ # Default to eq true
134
+ result = {
135
+ "type": "comparison",
136
+ "field": field_name,
137
+ "type_hint": type_hint,
138
+ "operator": "eq",
139
+ "value": "true",
140
+ }
141
+ if field_mutators:
142
+ result["field_mutators"] = field_mutators
143
+ return result
144
+ # Single item, unwrap it
145
+ return self._build_ast(parsed[0])
146
+ elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
147
+ # This is a stats expression without filter (applies to all records)
148
+ return self._build_stats_ast(parsed)
149
+ elif len(parsed) == 2:
150
+ # Could be unary logical operator (NOT), unary comparison (field exists), stats expression, or empty geo expression
151
+ first, second = parsed
152
+
153
+ # Check for stats expression: | stats ...
154
+ if isinstance(first, str) and first == "|" and isinstance(second, list) and len(second) > 0:
155
+ # Check if this is a stats expression
156
+ if isinstance(second[0], str) and second[0].lower() == "stats":
157
+ # This is | stats expression
158
+ return self._build_stats_ast(second)
159
+
160
+ # Check for empty geo expression: field | geo
161
+ if isinstance(second, str) and second.lower() in ["geo", "geoip_lookup"]:
162
+ # This is an empty geo expression: field | geo()
163
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
164
+
165
+ result = {
166
+ "type": "geo_expr",
167
+ "field": field_name,
168
+ "type_hint": type_hint,
169
+ "field_mutators": field_mutators,
170
+ "conditions": None, # No conditions for enrichment-only
171
+ }
172
+
173
+ return result
174
+
175
+ # Check for empty nslookup expression: field | nslookup
176
+ elif isinstance(second, str) and second.lower() == "nslookup":
177
+ # This is an empty nslookup expression: field | nslookup()
178
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
179
+
180
+ result = {
181
+ "type": "nslookup_expr",
182
+ "field": field_name,
183
+ "type_hint": type_hint,
184
+ "field_mutators": field_mutators,
185
+ "conditions": None, # No conditions for enrichment-only
186
+ }
187
+
188
+ return result
189
+
190
+ # Check for is_private/is_global without operator (defaults to eq true)
191
+ # This happens when we have a field with is_private/is_global as the last mutator
192
+ elif isinstance(first, str) and isinstance(second, list) and len(second) == 1:
193
+ # This could be field | mutator structure
194
+ mutator_name = second[0] if isinstance(second[0], str) else None
195
+ if mutator_name and mutator_name.lower() in ["is_private", "is_global"]:
196
+ # Build a typed_field from these components
197
+ typed_field = [first, second]
198
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(typed_field)
199
+ # This is field | is_private or field | is_global without operator
200
+ # Default to eq true
201
+ result = {
202
+ "type": "comparison",
203
+ "field": field_name,
204
+ "type_hint": type_hint,
205
+ "operator": "eq",
206
+ "value": "true",
207
+ }
208
+ if field_mutators:
209
+ result["field_mutators"] = field_mutators
210
+ return result
211
+
212
+ elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
213
+ # Unary logical operator (NOT or !)
214
+ return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
215
+ elif isinstance(second, str) and (second.lower() == "exists" or second.lower() == "!exists"):
216
+ # Unary comparison operation (field exists or !exists)
217
+ field_name, type_hint, mutators = self.ast_builder.extract_field_info(first)
218
+ operator = "not_exists" if second.lower() == "!exists" else "exists"
219
+ result = {
220
+ "type": "comparison",
221
+ "field": field_name,
222
+ "type_hint": type_hint,
223
+ "operator": operator,
224
+ "value": None, # No value for unary operators
225
+ }
226
+ if mutators:
227
+ result["field_mutators"] = mutators
228
+ return result
229
+ elif isinstance(first, list) and isinstance(second, list):
230
+ # This could be filter + stats
231
+ # Check if second element starts with 'stats'
232
+ if len(second) >= 2 and isinstance(second[0], str) and second[0].lower() == "stats":
233
+ # This is filter | stats
234
+ return {
235
+ "type": "query_with_stats",
236
+ "filter": self._build_ast(first),
237
+ "stats": self._build_stats_ast(second),
238
+ }
239
+ else:
240
+ # Fallback to treating as unary logical operator
241
+ return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
242
+ elif len(parsed) >= 3:
243
+ # Check if this is a field with multiple mutators ending in is_private/is_global
244
+ if isinstance(parsed[0], str) and all(isinstance(item, list) and len(item) == 1 for item in parsed[1:]):
245
+ # This looks like field | mutator1 | mutator2 | ...
246
+ last_mutator_list = parsed[-1]
247
+ if (
248
+ len(last_mutator_list) == 1
249
+ and isinstance(last_mutator_list[0], str)
250
+ and last_mutator_list[0].lower() in ["is_private", "is_global"]
251
+ ):
252
+ # This is a field with mutators ending in is_private/is_global
253
+ # Build the typed_field structure and default to eq true
254
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed)
255
+ result = {
256
+ "type": "comparison",
257
+ "field": field_name,
258
+ "type_hint": type_hint,
259
+ "operator": "eq",
260
+ "value": "true",
261
+ }
262
+ if field_mutators:
263
+ result["field_mutators"] = field_mutators
264
+ return result
265
+
266
+ if len(parsed) == 4:
267
+ # Check for ANY/ALL operators: ANY field op value
268
+ first, field, operator, value = parsed
269
+
270
+ if isinstance(first, str) and first.lower() in ["any", "all"]:
271
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
272
+ value_extracted, value_mutators = self.ast_builder.extract_value_info(value)
273
+ result = {
274
+ "type": "collection_op",
275
+ "operator": first.lower(),
276
+ "field": field_name,
277
+ "type_hint": type_hint,
278
+ "comparison_operator": operator.lower(),
279
+ "value": value_extracted,
280
+ }
281
+ if field_mutators:
282
+ result["field_mutators"] = field_mutators
283
+ if value_mutators:
284
+ result["value_mutators"] = value_mutators
285
+ return result
286
+ else:
287
+ # Handle other 4-element cases like "field is not value", "field not in value", or geo expressions
288
+ first, second, third, fourth = parsed
289
+
290
+ # Check for negated operators like "field not none value"
291
+ if (
292
+ isinstance(second, str)
293
+ and (second.lower() == "not" or second == "!")
294
+ and isinstance(third, str)
295
+ ):
296
+ # This is a negated operator
297
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
298
+ # Handle 'not none' -> 'any' (double negative)
299
+ if third.lower() == "none":
300
+ normalized_operator = "any"
301
+ else:
302
+ normalized_operator = f"not_{third.lower()}"
303
+ result = {
304
+ "type": "comparison",
305
+ "field": field_name,
306
+ "type_hint": type_hint,
307
+ "operator": normalized_operator,
308
+ "value": fourth,
309
+ }
310
+ if field_mutators:
311
+ result["field_mutators"] = field_mutators
312
+ return result
313
+
314
+ # Check for geo() expression with parameters: field geo params...
315
+ if isinstance(second, str) and second.lower() in ["geo", "geoip_lookup"]:
316
+ # This is a geo expression: field | geo(params...)
317
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
318
+
319
+ # All remaining elements are parameters (could be conditions or actual params)
320
+ conditions = None
321
+ geo_params = {}
322
+
323
+ # Process all parameters starting from third element
324
+ param_elements = parsed[2:] # Everything after field and 'geo'
325
+
326
+ for element in param_elements:
327
+ if isinstance(element, list):
328
+ if len(element) == 2:
329
+ # Check if this is a parameter or a condition
330
+ if isinstance(element[0], str):
331
+ # This is a proper parameter: ['param_name', 'value']
332
+ param_name, param_value = element
333
+ # Convert string boolean values to actual booleans
334
+ if isinstance(param_value, str):
335
+ if param_value.lower() == "true":
336
+ param_value = True
337
+ elif param_value.lower() == "false":
338
+ param_value = False
339
+ geo_params[param_name] = param_value
340
+ else:
341
+ # This is a condition like [['country_iso_code'], '=', ['US']]
342
+ conditions = element
343
+ elif len(element) == 3 and element[1] == "=":
344
+ # This is a parameter parsed as comparison: [['param'], '=', ['value']]
345
+ if (
346
+ isinstance(element[0], list)
347
+ and len(element[0]) == 1
348
+ and isinstance(element[0][0], str)
349
+ and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
350
+ ):
351
+ param_name = element[0][0]
352
+ param_value = (
353
+ element[2]
354
+ if not isinstance(element[2], list)
355
+ else element[2][0] if element[2] else None
356
+ )
357
+ # Convert string boolean values to actual booleans
358
+ if isinstance(param_value, str):
359
+ if param_value.lower() == "true":
360
+ param_value = True
361
+ elif param_value.lower() == "false":
362
+ param_value = False
363
+ geo_params[param_name] = param_value
364
+ else:
365
+ # This is actual conditions, not a parameter
366
+ conditions = element
367
+ else:
368
+ # This might be conditions
369
+ conditions = element
370
+
371
+ result = {
372
+ "type": "geo_expr",
373
+ "field": field_name,
374
+ "type_hint": type_hint,
375
+ "field_mutators": field_mutators,
376
+ "conditions": self._build_ast(conditions) if conditions else None,
377
+ }
378
+
379
+ # Add geo parameters if any
380
+ if geo_params:
381
+ result["geo_params"] = geo_params
382
+
383
+ return result
384
+
385
+ # Check for nslookup() expression with parameters: field nslookup params...
386
+ elif isinstance(second, str) and second.lower() == "nslookup":
387
+ # This is a nslookup expression: field | nslookup(params...)
388
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
389
+
390
+ # All remaining elements are parameters (could be conditions or actual params)
391
+ conditions = None
392
+ nslookup_params = {}
393
+
394
+ # Process all parameters starting from third element
395
+ param_elements = parsed[2:] # Everything after field and 'nslookup'
396
+
397
+ for element in param_elements:
398
+ if isinstance(element, list):
399
+ if len(element) == 2:
400
+ # Check if this is a parameter or a condition
401
+ if isinstance(element[0], str):
402
+ # This is a proper parameter: ['param_name', 'value']
403
+ param_name, param_value = element
404
+ # Convert string boolean values to actual booleans
405
+ if isinstance(param_value, str):
406
+ if param_value.lower() == "true":
407
+ param_value = True
408
+ elif param_value.lower() == "false":
409
+ param_value = False
410
+ nslookup_params[param_name] = param_value
411
+ else:
412
+ # This is a condition like [['resolved_ip'], 'exists']
413
+ conditions = element
414
+ elif len(element) == 3 and element[1] == "=":
415
+ # This is a parameter parsed as comparison: [['param'], '=', ['value']]
416
+ if (
417
+ isinstance(element[0], list)
418
+ and len(element[0]) == 1
419
+ and isinstance(element[0][0], str)
420
+ and element[0][0]
421
+ in ["force", "servers", "append_field", "save", "types", "field"]
422
+ ):
423
+ param_name = element[0][0]
424
+ param_value = (
425
+ element[2]
426
+ if not isinstance(element[2], list)
427
+ else element[2][0] if element[2] else None
428
+ )
429
+ # Handle types parameter which should be a list
430
+ if param_name == "types" and isinstance(element[2], list):
431
+ param_value = element[2]
432
+ # Unwrap if double-wrapped
433
+ if len(param_value) == 1 and isinstance(param_value[0], list):
434
+ param_value = param_value[0]
435
+ # Convert string boolean values to actual booleans
436
+ elif isinstance(param_value, str):
437
+ if param_value.lower() == "true":
438
+ param_value = True
439
+ elif param_value.lower() == "false":
440
+ param_value = False
441
+ nslookup_params[param_name] = param_value
442
+ else:
443
+ # This is actual conditions, not a parameter
444
+ conditions = element
445
+ else:
446
+ # This might be conditions
447
+ conditions = element
448
+
449
+ result = {
450
+ "type": "nslookup_expr",
451
+ "field": field_name,
452
+ "type_hint": type_hint,
453
+ "field_mutators": field_mutators,
454
+ "conditions": self._build_ast(conditions) if conditions else None,
455
+ }
456
+
457
+ # Add nslookup parameters if any
458
+ if nslookup_params:
459
+ result["nslookup_params"] = nslookup_params
460
+
461
+ return result
462
+
463
+ # Handle "field is not value" or "field ! is value"
464
+ if (
465
+ isinstance(second, str)
466
+ and second.lower() == "is"
467
+ and isinstance(third, str)
468
+ and third.lower() == "not"
469
+ ):
470
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
471
+ result = {
472
+ "type": "comparison",
473
+ "field": field_name,
474
+ "type_hint": type_hint,
475
+ "operator": "is_not",
476
+ "value": fourth,
477
+ }
478
+ if field_mutators:
479
+ result["field_mutators"] = field_mutators
480
+ return result
481
+ elif isinstance(second, str) and second == "!" and isinstance(third, str) and third.lower() == "is":
482
+ # Handle "field ! is value"
483
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
484
+ result = {
485
+ "type": "comparison",
486
+ "field": field_name,
487
+ "type_hint": type_hint,
488
+ "operator": "is_not",
489
+ "value": fourth,
490
+ }
491
+ if field_mutators:
492
+ result["field_mutators"] = field_mutators
493
+ return result
494
+
495
+ # Handle "field not operator value" (e.g., "field not in value") or "field ! operator value"
496
+ if isinstance(second, str) and (second.lower() == "not" or second == "!"):
497
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
498
+ value, value_mutators = self.ast_builder.extract_value_info(fourth)
499
+ result = {
500
+ "type": "comparison",
501
+ "field": field_name,
502
+ "type_hint": type_hint,
503
+ "operator": f"not_{third.lower()}",
504
+ "value": value,
505
+ }
506
+ if field_mutators:
507
+ result["field_mutators"] = field_mutators
508
+ if value_mutators:
509
+ result["value_mutators"] = value_mutators
510
+ return result
511
+ elif len(parsed) == 5:
512
+ # Check for natural between syntax: field between value1 and value2
513
+ # Only process as between if the second element is "between"
514
+ if (
515
+ isinstance(parsed[1], str)
516
+ and parsed[1].lower() == "between"
517
+ and isinstance(parsed[3], str)
518
+ and parsed[3].lower() == "and"
519
+ ):
520
+ field, between_op, value1, and_op, value2 = parsed
521
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
522
+ result = {
523
+ "type": "comparison",
524
+ "field": field_name,
525
+ "type_hint": type_hint,
526
+ "operator": "between",
527
+ "value": [value1, value2],
528
+ }
529
+ if field_mutators:
530
+ result["field_mutators"] = field_mutators
531
+ return result
532
+ else:
533
+ # Check if this is a geo expression with multiple parameters
534
+ if isinstance(parsed[1], str) and parsed[1].lower() in ["geo", "geoip_lookup"]:
535
+ # This is a geo expression with multiple parameters
536
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
537
+
538
+ # All remaining elements are parameters (could be conditions or actual params)
539
+ conditions = None
540
+ geo_params = {}
541
+
542
+ # Process all parameters starting from third element
543
+ param_elements = parsed[2:] # Everything after field and 'geo'
544
+
545
+ for element in param_elements:
546
+ if isinstance(element, list):
547
+ if len(element) == 2:
548
+ # Check if this is a parameter or a condition
549
+ if isinstance(element[0], str):
550
+ # This is a proper parameter: ['param_name', 'value']
551
+ param_name, param_value = element
552
+ # Convert string boolean values to actual booleans
553
+ if isinstance(param_value, str):
554
+ if param_value.lower() == "true":
555
+ param_value = True
556
+ elif param_value.lower() == "false":
557
+ param_value = False
558
+ geo_params[param_name] = param_value
559
+ else:
560
+ # This is a condition like [['country_iso_code'], '=', ['US']]
561
+ conditions = element
562
+ elif len(element) == 3 and element[1] == "=":
563
+ # This is a parameter parsed as comparison: [['param'], '=', ['value']]
564
+ if (
565
+ isinstance(element[0], list)
566
+ and len(element[0]) == 1
567
+ and isinstance(element[0][0], str)
568
+ and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
569
+ ):
570
+ param_name = element[0][0]
571
+ param_value = (
572
+ element[2]
573
+ if not isinstance(element[2], list)
574
+ else element[2][0] if element[2] else None
575
+ )
576
+ # Convert string boolean values to actual booleans
577
+ if isinstance(param_value, str):
578
+ if param_value.lower() == "true":
579
+ param_value = True
580
+ elif param_value.lower() == "false":
581
+ param_value = False
582
+ geo_params[param_name] = param_value
583
+ else:
584
+ # This is actual conditions, not a parameter
585
+ conditions = element
586
+ else:
587
+ # This might be conditions
588
+ conditions = element
589
+
590
+ result = {
591
+ "type": "geo_expr",
592
+ "field": field_name,
593
+ "type_hint": type_hint,
594
+ "field_mutators": field_mutators,
595
+ "conditions": self._build_ast(conditions) if conditions else None,
596
+ }
597
+
598
+ # Add geo parameters if any
599
+ if geo_params:
600
+ result["geo_params"] = geo_params
601
+
602
+ return result
603
+ # Check if this is a nslookup expression with multiple parameters
604
+ elif isinstance(parsed[1], str) and parsed[1].lower() == "nslookup":
605
+ # This is a nslookup expression with multiple parameters
606
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
607
+
608
+ # All remaining elements are parameters (could be conditions or actual params)
609
+ conditions = None
610
+ nslookup_params = {}
611
+
612
+ # Process all parameters starting from third element
613
+ param_elements = parsed[2:] # Everything after field and 'nslookup'
614
+
615
+ for element in param_elements:
616
+ if isinstance(element, list):
617
+ if len(element) == 2:
618
+ # Check if this is a parameter or a condition
619
+ if isinstance(element[0], str):
620
+ # This is a proper parameter: ['param_name', 'value']
621
+ param_name, param_value = element
622
+ # Convert string boolean values to actual booleans
623
+ if isinstance(param_value, str):
624
+ if param_value.lower() == "true":
625
+ param_value = True
626
+ elif param_value.lower() == "false":
627
+ param_value = False
628
+ nslookup_params[param_name] = param_value
629
+ else:
630
+ # This is a condition like [['resolved_ip'], 'exists']
631
+ conditions = element
632
+ elif len(element) == 3 and element[1] == "=":
633
+ # This is a parameter parsed as comparison: [['param'], '=', ['value']]
634
+ if (
635
+ isinstance(element[0], list)
636
+ and len(element[0]) == 1
637
+ and isinstance(element[0][0], str)
638
+ and element[0][0]
639
+ in ["force", "servers", "append_field", "save", "types", "field"]
640
+ ):
641
+ param_name = element[0][0]
642
+ param_value = (
643
+ element[2]
644
+ if not isinstance(element[2], list)
645
+ else element[2][0] if element[2] else None
646
+ )
647
+ # Handle types parameter which should be a list
648
+ if param_name == "types" and isinstance(element[2], list):
649
+ param_value = element[2]
650
+ # Unwrap if double-wrapped
651
+ if len(param_value) == 1 and isinstance(param_value[0], list):
652
+ param_value = param_value[0]
653
+ # Convert string boolean values to actual booleans
654
+ elif isinstance(param_value, str):
655
+ if param_value.lower() == "true":
656
+ param_value = True
657
+ elif param_value.lower() == "false":
658
+ param_value = False
659
+ nslookup_params[param_name] = param_value
660
+ else:
661
+ # This is actual conditions, not a parameter
662
+ conditions = element
663
+ else:
664
+ # This might be conditions
665
+ conditions = element
666
+
667
+ result = {
668
+ "type": "nslookup_expr",
669
+ "field": field_name,
670
+ "type_hint": type_hint,
671
+ "field_mutators": field_mutators,
672
+ "conditions": self._build_ast(conditions) if conditions else None,
673
+ }
674
+
675
+ # Add nslookup parameters if any
676
+ if nslookup_params:
677
+ result["nslookup_params"] = nslookup_params
678
+
679
+ return result
680
+ else:
681
+ # This is a chained operation, not a between operation
682
+ return self._build_chained_ast(parsed)
683
+
684
+ elif len(parsed) == 6:
685
+ # Check for "field not between value1 and value2" or "field ! between value1 and value2"
686
+ # Only process as not_between if it matches the pattern
687
+ if (
688
+ len(parsed) >= 6
689
+ and isinstance(parsed[1], str)
690
+ and (parsed[1].lower() == "not" or parsed[1] == "!")
691
+ and isinstance(parsed[2], str)
692
+ and parsed[2].lower() == "between"
693
+ and isinstance(parsed[4], str)
694
+ and parsed[4].lower() == "and"
695
+ ):
696
+ field, not_word, between_op, value1, and_op, value2 = parsed
697
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
698
+ result = {
699
+ "type": "comparison",
700
+ "field": field_name,
701
+ "type_hint": type_hint,
702
+ "operator": "not_between",
703
+ "value": [value1, value2],
704
+ }
705
+ if field_mutators:
706
+ result["field_mutators"] = field_mutators
707
+ return result
708
+ else:
709
+ # Check if this is a geo expression with multiple parameters
710
+ if isinstance(parsed[1], str) and parsed[1].lower() in ["geo", "geoip_lookup"]:
711
+ # This is a geo expression with multiple parameters (6+ elements)
712
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
713
+
714
+ # All remaining elements are parameters (could be conditions or actual params)
715
+ conditions = None
716
+ geo_params = {}
717
+
718
+ # Process all parameters starting from third element
719
+ param_elements = parsed[2:] # Everything after field and 'geo'
720
+
721
+ for element in param_elements:
722
+ if isinstance(element, list):
723
+ if len(element) == 2:
724
+ # Check if this is a parameter or a condition
725
+ if isinstance(element[0], str):
726
+ # This is a proper parameter: ['param_name', 'value']
727
+ param_name, param_value = element
728
+ # Convert string boolean values to actual booleans
729
+ if isinstance(param_value, str):
730
+ if param_value.lower() == "true":
731
+ param_value = True
732
+ elif param_value.lower() == "false":
733
+ param_value = False
734
+ geo_params[param_name] = param_value
735
+ else:
736
+ # This is a condition like [['country_iso_code'], '=', ['US']]
737
+ conditions = element
738
+ elif len(element) == 3 and element[1] == "=":
739
+ # This is a parameter parsed as comparison: [['param'], '=', ['value']]
740
+ if (
741
+ isinstance(element[0], list)
742
+ and len(element[0]) == 1
743
+ and isinstance(element[0][0], str)
744
+ and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
745
+ ):
746
+ param_name = element[0][0]
747
+ param_value = (
748
+ element[2]
749
+ if not isinstance(element[2], list)
750
+ else element[2][0] if element[2] else None
751
+ )
752
+ # Convert string boolean values to actual booleans
753
+ if isinstance(param_value, str):
754
+ if param_value.lower() == "true":
755
+ param_value = True
756
+ elif param_value.lower() == "false":
757
+ param_value = False
758
+ geo_params[param_name] = param_value
759
+ else:
760
+ # This is actual conditions, not a parameter
761
+ conditions = element
762
+ else:
763
+ # This might be conditions
764
+ conditions = element
765
+
766
+ result = {
767
+ "type": "geo_expr",
768
+ "field": field_name,
769
+ "type_hint": type_hint,
770
+ "field_mutators": field_mutators,
771
+ "conditions": self._build_ast(conditions) if conditions else None,
772
+ }
773
+
774
+ # Add geo parameters if any
775
+ if geo_params:
776
+ result["geo_params"] = geo_params
777
+
778
+ return result
779
+ else:
780
+ # This is a chained operation, not a not_between operation
781
+ return self._build_chained_ast(parsed)
782
+
783
+ elif len(parsed) == 3:
784
+ # Binary operation or comparison (including negated unary operators like "field not exists")
785
+ left, operator, right = parsed
786
+
787
+ # Check for geo() expression first
788
+ if isinstance(operator, str) and operator.lower() in ["geo", "geoip_lookup"]:
789
+ # This is a geo expression: field | geo(conditions OR params)
790
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
791
+
792
+ conditions = None
793
+ geo_params = {}
794
+
795
+ # Check if this is actually a parameter masquerading as a condition
796
+ # Look for comparison operations where the field is a known parameter name
797
+ if (
798
+ isinstance(right, list)
799
+ and len(right) == 3
800
+ and isinstance(right[1], str)
801
+ and right[1] == "="
802
+ and isinstance(right[0], list)
803
+ and len(right[0]) == 1
804
+ and isinstance(right[0][0], str)
805
+ and right[0][0] in ["force", "cache", "cache_ttl", "db_path", "save"]
806
+ ):
807
+ # This is a parameter parsed as a comparison: force = true
808
+ param_name = right[0][0]
809
+ param_value = right[2] if not isinstance(right[2], list) else right[2][0] if right[2] else None
810
+ # Convert string boolean values to actual booleans
811
+ if isinstance(param_value, str):
812
+ if param_value.lower() == "true":
813
+ param_value = True
814
+ elif param_value.lower() == "false":
815
+ param_value = False
816
+ geo_params[param_name] = param_value
817
+ else:
818
+ # This is actual conditions: geo(country_iso_code eq 'US')
819
+ conditions = right
820
+
821
+ result = {
822
+ "type": "geo_expr",
823
+ "field": field_name,
824
+ "type_hint": type_hint,
825
+ "field_mutators": field_mutators,
826
+ "conditions": self._build_ast(conditions) if conditions else None,
827
+ }
828
+
829
+ # Add geo parameters if any
830
+ if geo_params:
831
+ result["geo_params"] = geo_params
832
+
833
+ return result
834
+
835
+ # Check for nslookup() expression
836
+ elif isinstance(operator, str) and operator.lower() == "nslookup":
837
+ # This is a nslookup expression: field | nslookup(conditions OR params)
838
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
839
+
840
+ conditions = None
841
+ nslookup_params = {}
842
+
843
+ # Check if this is actually a parameter masquerading as a condition
844
+ # Look for comparison operations where the field is a known parameter name
845
+ if (
846
+ isinstance(right, list)
847
+ and len(right) == 3
848
+ and isinstance(right[1], str)
849
+ and right[1] == "="
850
+ and isinstance(right[0], list)
851
+ and len(right[0]) == 1
852
+ and isinstance(right[0][0], str)
853
+ and right[0][0] in ["force", "servers", "append_field", "save", "types"]
854
+ ):
855
+ # This is a parameter parsed as a comparison: force = true
856
+ param_name = right[0][0]
857
+ param_value = right[2] if not isinstance(right[2], list) else right[2][0] if right[2] else None
858
+ # Handle types parameter which should be a list
859
+ if param_name == "types" and isinstance(right[2], list):
860
+ param_value = right[2]
861
+ # Unwrap if double-wrapped
862
+ if len(param_value) == 1 and isinstance(param_value[0], list):
863
+ param_value = param_value[0]
864
+ # Convert string boolean values to actual booleans
865
+ elif isinstance(param_value, str):
866
+ if param_value.lower() == "true":
867
+ param_value = True
868
+ elif param_value.lower() == "false":
869
+ param_value = False
870
+ nslookup_params[param_name] = param_value
871
+ else:
872
+ # This is actual conditions: nslookup(data contains 'example.com')
873
+ conditions = right
874
+
875
+ result = {
876
+ "type": "nslookup_expr",
877
+ "field": field_name,
878
+ "type_hint": type_hint,
879
+ "field_mutators": field_mutators,
880
+ "conditions": self._build_ast(conditions) if conditions else None,
881
+ }
882
+
883
+ # Add nslookup parameters if any
884
+ if nslookup_params:
885
+ result["nslookup_params"] = nslookup_params
886
+
887
+ return result
888
+
889
+ if operator.lower() in ["and", "or"]:
890
+ # Logical operation
891
+ return {
892
+ "type": "logical_op",
893
+ "operator": operator.lower(),
894
+ "left": self._build_ast(left),
895
+ "right": self._build_ast(right),
896
+ }
897
+ elif (
898
+ isinstance(operator, str)
899
+ and (operator.lower() == "not" or operator == "!")
900
+ and isinstance(right, str)
901
+ and right.lower() == "exists"
902
+ ):
903
+ # Handle "field not exists" or "field ! exists" (negated unary operator)
904
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
905
+ result = {
906
+ "type": "comparison",
907
+ "field": field_name,
908
+ "type_hint": type_hint,
909
+ "operator": "not_exists",
910
+ "value": None,
911
+ }
912
+ if field_mutators:
913
+ result["field_mutators"] = field_mutators
914
+ return result
915
+ elif (
916
+ isinstance(operator, str)
917
+ and operator.lower() == "is"
918
+ and isinstance(right, str)
919
+ and right.lower() == "not"
920
+ ):
921
+ # This will be handled in the 4-element case for "field is not value"
922
+ # Return unknown for now - should not normally reach here
923
+ return {"type": "unknown", "value": parsed}
924
+ elif isinstance(operator, str) and operator == "!" and isinstance(right, str) and right.lower() == "is":
925
+ # Handle "field ! is value" - need to look ahead
926
+ # This is incomplete and will be handled in the 4-element case
927
+ # Return unknown for now - should not normally reach here
928
+ return {"type": "unknown", "value": parsed}
929
+ else:
930
+ # Comparison operation
931
+ # Handle 'in' operator - always value in field(s)
932
+ if isinstance(operator, str) and operator.lower() == "in":
933
+ # Check for old syntax: [field1, field2] in value
934
+ # The parser wraps list literals, so check for wrapped lists too
935
+ check_list = left
936
+ if isinstance(left, list) and len(left) == 1 and isinstance(left[0], list):
937
+ # Unwrap if it's [[field1, field2]]
938
+ check_list = left[0]
939
+
940
+ if isinstance(check_list, list) and len(check_list) > 1:
941
+ # Check if this is a list of identifiers (field names)
942
+ is_field_list = True
943
+ field_names = []
944
+ for item in check_list:
945
+ if isinstance(item, str):
946
+ field_names.append(item)
947
+ else:
948
+ is_field_list = False
949
+ break
950
+
951
+ if is_field_list:
952
+ # Extract value for suggestion
953
+ value_str = right
954
+ if isinstance(right, list) and len(right) > 0:
955
+ value_str = right[0]
956
+
957
+ raise TQLSyntaxError(
958
+ "Field list on left side of 'in' operator is no longer supported",
959
+ suggestions=[
960
+ f'"{value_str}" in [{", ".join(field_names)}]',
961
+ f"'{value_str}' in [{', '.join(field_names)}]",
962
+ ],
963
+ position=0,
964
+ )
965
+
966
+ # For 'in' operator, left is always the value, right is field(s)
967
+ # Extract the value from left
968
+ value_extracted, value_mutators = self.ast_builder.extract_value_info(left)
969
+
970
+ # Check if right is a list of fields
971
+ if isinstance(right, list) and len(right) > 0:
972
+ # Check if all elements are fields
973
+ all_fields = True
974
+ for item in right:
975
+ if isinstance(item, list):
976
+ # This is a typed_field group
977
+ if not (len(item) >= 1 and isinstance(item[0], str)):
978
+ all_fields = False
979
+ break
980
+ elif not isinstance(item, str):
981
+ all_fields = False
982
+ break
983
+
984
+ if all_fields:
985
+ # This is "value in [field1, field2, ...]" format
986
+ # Create an OR expression for all fields
987
+ field_comparisons = []
988
+ for field in right:
989
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
990
+ comparison = {
991
+ "type": "comparison",
992
+ "field": field_name,
993
+ "type_hint": type_hint,
994
+ "operator": "in",
995
+ "value": (
996
+ [value_extracted]
997
+ if not isinstance(value_extracted, list)
998
+ else value_extracted
999
+ ),
1000
+ }
1001
+ if field_mutators:
1002
+ comparison["field_mutators"] = field_mutators
1003
+ if value_mutators:
1004
+ comparison["value_mutators"] = value_mutators
1005
+ field_comparisons.append(comparison)
1006
+
1007
+ # Build OR expression
1008
+ result = field_comparisons[0]
1009
+ for i in range(1, len(field_comparisons)):
1010
+ result = {
1011
+ "type": "logical_op",
1012
+ "operator": "or",
1013
+ "left": result,
1014
+ "right": field_comparisons[i],
1015
+ }
1016
+ return result
1017
+
1018
+ # Otherwise, treat as standard "value in field" (single field)
1019
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(right)
1020
+ result = {
1021
+ "type": "comparison",
1022
+ "field": field_name,
1023
+ "type_hint": type_hint,
1024
+ "operator": "in",
1025
+ "value": [value_extracted] if not isinstance(value_extracted, list) else value_extracted,
1026
+ }
1027
+ if field_mutators:
1028
+ result["field_mutators"] = field_mutators
1029
+ if value_mutators:
1030
+ result["value_mutators"] = value_mutators
1031
+ return result
1032
+
1033
+ if operator.lower() == "between":
1034
+ # Between operator with list of values
1035
+ if isinstance(right, list) and len(right) == 2:
1036
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
1037
+ result = {
1038
+ "type": "comparison",
1039
+ "field": field_name,
1040
+ "type_hint": type_hint,
1041
+ "operator": "between",
1042
+ "value": right,
1043
+ }
1044
+ if field_mutators:
1045
+ result["field_mutators"] = field_mutators
1046
+ return result
1047
+ else:
1048
+ # Extract field name for error message
1049
+ field_display = (
1050
+ self.ast_builder.extract_field_info(left)[0] if isinstance(left, list) else left
1051
+ )
1052
+ raise TQLOperatorError(
1053
+ f"'between' operator requires exactly 2 values, got {len(right) if isinstance(right, list) else 1}",
1054
+ suggestions=[f"{field_display} between [18, 65]"],
1055
+ )
1056
+
1057
+ # Check for negated operators (space-separated like "not in")
1058
+ if isinstance(operator, list) and len(operator) == 2:
1059
+ neg_word, base_op = operator
1060
+ if neg_word.lower() == "not" or neg_word == "!":
1061
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
1062
+ value, value_mutators = self.ast_builder.extract_value_info(right)
1063
+ # Handle 'not none' -> 'any' (double negative)
1064
+ if base_op.lower() == "none":
1065
+ normalized_operator = "any"
1066
+ else:
1067
+ normalized_operator = f"not_{base_op.lower()}"
1068
+ result = {
1069
+ "type": "comparison",
1070
+ "field": field_name,
1071
+ "type_hint": type_hint,
1072
+ "operator": normalized_operator,
1073
+ "value": value,
1074
+ }
1075
+ if field_mutators:
1076
+ result["field_mutators"] = field_mutators
1077
+ if value_mutators:
1078
+ result["value_mutators"] = value_mutators
1079
+ return result
1080
+
1081
+ # Check for bang operators (like !contains, !in, etc.)
1082
+ if isinstance(operator, str) and operator.startswith("!") and operator != "!=":
1083
+ # Bang operator - convert to not_operator (but not !=)
1084
+ base_op = operator[1:].lower()
1085
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
1086
+ value, value_mutators = self.ast_builder.extract_value_info(right)
1087
+ # Handle '!none' -> 'any' (double negative)
1088
+ if base_op == "none":
1089
+ normalized_operator = "any"
1090
+ else:
1091
+ normalized_operator = f"not_{base_op}"
1092
+ result = {
1093
+ "type": "comparison",
1094
+ "field": field_name,
1095
+ "type_hint": type_hint,
1096
+ "operator": normalized_operator,
1097
+ "value": value,
1098
+ }
1099
+ if field_mutators:
1100
+ result["field_mutators"] = field_mutators
1101
+ if value_mutators:
1102
+ result["value_mutators"] = value_mutators
1103
+ return result
1104
+
1105
+ # Standard "field op value" format
1106
+ field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
1107
+ value, value_mutators = self.ast_builder.extract_value_info(right)
1108
+ # Normalize operator: convert 'none' to 'not_any'
1109
+ normalized_operator = operator.lower()
1110
+ if normalized_operator == "none":
1111
+ normalized_operator = "not_any"
1112
+
1113
+ # Additional check for old 'in' syntax that got parsed differently
1114
+ # If operator is 'in' and value is a list of identifiers, this might be the old syntax
1115
+ if normalized_operator == "in" and isinstance(value, list) and len(value) > 1:
1116
+ # Check if all items look like field names
1117
+ all_identifiers = all(
1118
+ isinstance(v, str) and v.replace(".", "").replace("_", "").isalnum() for v in value
1119
+ )
1120
+ if all_identifiers:
1121
+ raise TQLSyntaxError(
1122
+ "Field list in value syntax is no longer supported. Use value in [fields] instead",
1123
+ suggestions=[
1124
+ f'"{field_name}" in [{", ".join(value)}]',
1125
+ f"'{field_name}' in [{', '.join(value)}]",
1126
+ ],
1127
+ position=0,
1128
+ )
1129
+
1130
+ result = {
1131
+ "type": "comparison",
1132
+ "field": field_name,
1133
+ "type_hint": type_hint,
1134
+ "operator": normalized_operator,
1135
+ "value": value,
1136
+ }
1137
+ if field_mutators:
1138
+ result["field_mutators"] = field_mutators
1139
+ if value_mutators:
1140
+ result["value_mutators"] = value_mutators
1141
+ return result
1142
+ else:
1143
+ # Handle longer lists (chained operations)
1144
+ # This happens with infixNotation for multiple AND/OR operations
1145
+ # The structure will be flattened, so we need to reconstruct the tree
1146
+ return self._build_chained_ast(parsed)
1147
+ else:
1148
+ # Single value - should already be a proper AST node
1149
+ if isinstance(parsed, dict):
1150
+ return parsed
1151
+ else:
1152
+ # This shouldn't happen, but handle gracefully
1153
+ raise TQLParseError(f"Unexpected parsed value type: {type(parsed)}")
1154
+
1155
+ # This should be unreachable, but helps mypy understand all paths return
1156
+ raise AssertionError("Unreachable code in _build_ast")
1157
+
1158
+ def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
1159
+ """Build AST from chained operations (e.g., A AND B AND C).
1160
+
1161
+ Args:
1162
+ parsed_list: List of alternating operands and operators
1163
+
1164
+ Returns:
1165
+ Dictionary representing the AST node
1166
+ """
1167
+ if len(parsed_list) < 3:
1168
+ # Not enough elements for a chained operation
1169
+ return {"type": "unknown", "value": parsed_list}
1170
+
1171
+ # Start with the first operand
1172
+ result = self._build_ast(parsed_list[0])
1173
+
1174
+ # Process pairs of (operator, operand)
1175
+ i = 1
1176
+ while i < len(parsed_list) - 1:
1177
+ operator = parsed_list[i]
1178
+ operand = parsed_list[i + 1]
1179
+
1180
+ if operator.lower() in ["and", "or"]:
1181
+ result = {
1182
+ "type": "logical_op",
1183
+ "operator": operator.lower(),
1184
+ "left": result,
1185
+ "right": self._build_ast(operand),
1186
+ }
1187
+ else:
1188
+ # This shouldn't happen in a well-formed chained expression
1189
+ return {"type": "unknown", "value": parsed_list}
1190
+
1191
+ i += 2
1192
+
1193
+ return result
1194
+
1195
+ def _build_stats_ast(self, parsed: List[Any]) -> Dict[str, Any]: # noqa: C901
1196
+ """Build AST for stats expression.
1197
+
1198
+ Args:
1199
+ parsed: Parsed stats expression [stats, aggregations, [by, fields]]
1200
+
1201
+ Returns:
1202
+ Dictionary representing the stats AST
1203
+ """
1204
+ result: Dict[str, Any] = {"type": "stats_expr", "aggregations": [], "group_by": []}
1205
+
1206
+ # Skip the 'stats' keyword
1207
+ i = 1
1208
+
1209
+ # Process aggregations until we hit 'by' or end
1210
+ while i < len(parsed):
1211
+ if isinstance(parsed[i], str) and parsed[i].lower() == "by":
1212
+ # Start of group by clause
1213
+ i += 1
1214
+ break
1215
+
1216
+ # Process aggregation
1217
+ if isinstance(parsed[i], str) and parsed[i].lower() == "count":
1218
+ # Special case for count(*)
1219
+ result["aggregations"].append({"function": "count", "field": "*", "alias": None})
1220
+ i += 1
1221
+ elif isinstance(parsed[i], list):
1222
+ # This is a list of aggregations
1223
+ for item in parsed[i]:
1224
+ agg_dict: Dict[str, Any] = {}
1225
+
1226
+ if isinstance(item, str) and item.lower() == "count":
1227
+ # count(*) case
1228
+ agg_dict["function"] = "count"
1229
+ agg_dict["field"] = "*"
1230
+ agg_dict["alias"] = None
1231
+ elif isinstance(item, list):
1232
+ # Regular aggregation: [func, field, ...] or [[func, field], 'as', 'alias']
1233
+ if len(item) >= 2 and isinstance(item[0], list):
1234
+ # Aggregation with alias: [[func, field, ...], 'as', 'alias']
1235
+ func_spec = item[0]
1236
+ # Normalize function aliases
1237
+ func = func_spec[0].lower()
1238
+ if func == "avg":
1239
+ func = "average"
1240
+ elif func == "med":
1241
+ func = "median"
1242
+ elif func == "standard_deviation":
1243
+ func = "std"
1244
+ elif func in ["p", "pct", "percentiles"]:
1245
+ func = "percentile"
1246
+ elif func in ["pct_rank", "pct_ranks", "percentile_ranks"]:
1247
+ func = "percentile_rank"
1248
+ agg_dict["function"] = func
1249
+ agg_dict["field"] = func_spec[1] if len(func_spec) > 1 else "*"
1250
+
1251
+ # Check for modifiers (top/bottom) or percentile values
1252
+ if len(func_spec) >= 3:
1253
+ # Check if it's a percentile function with values
1254
+ func_name = agg_dict["function"]
1255
+ if func_name in ["percentile", "percentiles", "p", "pct"]:
1256
+ # Handle percentile values - they come as separate elements
1257
+ percentile_values = []
1258
+ for j in range(2, len(func_spec)):
1259
+ if isinstance(func_spec[j], str) and func_spec[j].replace(".", "").isdigit():
1260
+ percentile_values.append(float(func_spec[j]))
1261
+ else:
1262
+ break # Stop if we hit a non-numeric value
1263
+ agg_dict["percentile_values"] = percentile_values
1264
+ elif func_name in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
1265
+ # Handle percentile rank values - they come as separate elements
1266
+ rank_values = []
1267
+ for j in range(2, len(func_spec)):
1268
+ if (
1269
+ isinstance(func_spec[j], str)
1270
+ and func_spec[j].replace(".", "").replace("-", "").isdigit()
1271
+ ):
1272
+ rank_values.append(float(func_spec[j]))
1273
+ else:
1274
+ break # Stop if we hit a non-numeric value
1275
+ agg_dict["rank_values"] = rank_values
1276
+ elif len(func_spec) >= 4 and func_spec[2].lower() in ["top", "bottom"]:
1277
+ agg_dict["modifier"] = func_spec[2].lower()
1278
+ agg_dict["limit"] = int(func_spec[3])
1279
+
1280
+ # Check for alias
1281
+ if len(item) >= 3 and item[1].lower() == "as":
1282
+ agg_dict["alias"] = item[2]
1283
+ else:
1284
+ agg_dict["alias"] = None
1285
+ else:
1286
+ # Simple aggregation: [func, field]
1287
+ # Normalize function aliases
1288
+ func = item[0].lower() if len(item) > 0 else "count"
1289
+ if func == "avg":
1290
+ func = "average"
1291
+ elif func == "med":
1292
+ func = "median"
1293
+ elif func == "standard_deviation":
1294
+ func = "std"
1295
+ elif func in ["p", "pct", "percentiles"]:
1296
+ func = "percentile"
1297
+ elif func in ["pct_rank", "pct_ranks", "percentile_ranks"]:
1298
+ func = "percentile_rank"
1299
+ agg_dict["function"] = func
1300
+ agg_dict["field"] = item[1] if len(item) > 1 else "*"
1301
+ agg_dict["alias"] = None
1302
+
1303
+ # Check for modifiers or percentile values
1304
+ if len(item) >= 3:
1305
+ func_name = agg_dict["function"]
1306
+ if func_name in ["percentile", "percentiles", "p", "pct"]:
1307
+ # Handle percentile values - they come as separate elements
1308
+ percentile_values = []
1309
+ for j in range(2, len(item)):
1310
+ if isinstance(item[j], str) and item[j].replace(".", "").isdigit():
1311
+ percentile_values.append(float(item[j]))
1312
+ else:
1313
+ break # Stop if we hit a non-numeric value
1314
+ agg_dict["percentile_values"] = percentile_values
1315
+ elif func_name in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
1316
+ # Handle percentile rank values - they come as separate elements
1317
+ rank_values = []
1318
+ for j in range(2, len(item)):
1319
+ if (
1320
+ isinstance(item[j], str)
1321
+ and item[j].replace(".", "").replace("-", "").isdigit()
1322
+ ):
1323
+ rank_values.append(float(item[j]))
1324
+ else:
1325
+ break # Stop if we hit a non-numeric value
1326
+ agg_dict["rank_values"] = rank_values
1327
+ elif len(item) >= 4 and item[2].lower() in ["top", "bottom"]:
1328
+ agg_dict["modifier"] = item[2].lower()
1329
+ agg_dict["limit"] = int(item[3])
1330
+
1331
+ if "function" in agg_dict:
1332
+ result["aggregations"].append(agg_dict)
1333
+
1334
+ i += 1
1335
+ else:
1336
+ i += 1
1337
+
1338
+ # Process group by fields
1339
+ while i < len(parsed):
1340
+ if isinstance(parsed[i], str) and parsed[i] not in ["by", ","]:
1341
+ result["group_by"].append(parsed[i])
1342
+ i += 1
1343
+
1344
+ return result
1345
+
1346
+
1347
+ # Legacy function for backward compatibility
1348
+ def parse_query(query: str):
1349
+ """Parse a TQL query string and return the parsed result.
1350
+
1351
+ This function is kept for backward compatibility with existing code.
1352
+ New code should use TQLParser class directly.
1353
+
1354
+ Args:
1355
+ query: The TQL query string.
1356
+
1357
+ Returns:
1358
+ The pyparsing ParseResults.
1359
+ """
1360
+ parser = TQLParser()
1361
+ # For legacy compatibility, we return the raw pyparsing result
1362
+ parsed_result = parser.grammar.tql_expr.parseString(query, parseAll=True)
1363
+ return parsed_result