tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
tql/parser.py
ADDED
|
@@ -0,0 +1,1363 @@
|
|
|
1
|
+
"""Parser module for Tellaro Query Language (TQL).
|
|
2
|
+
|
|
3
|
+
This module provides the main TQLParser class that orchestrates parsing
|
|
4
|
+
using the modular parser components.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
from pyparsing import ParseException, ParserElement
|
|
10
|
+
|
|
11
|
+
from .exceptions import TQLOperatorError, TQLParseError, TQLSyntaxError, TQLValueError
|
|
12
|
+
from .parser_components.ast_builder import ASTBuilder
|
|
13
|
+
from .parser_components.error_analyzer import ErrorAnalyzer
|
|
14
|
+
from .parser_components.field_extractor import FieldExtractor
|
|
15
|
+
from .parser_components.grammar import TQLGrammar
|
|
16
|
+
|
|
17
|
+
ParserElement.enablePackrat()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TQLParser:
|
|
21
|
+
"""TQL query parser.
|
|
22
|
+
|
|
23
|
+
Parses TQL query strings into an Abstract Syntax Tree (AST) that can be
|
|
24
|
+
evaluated against data or converted to backend-specific query formats.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
"""Initialize the parser with TQL grammar."""
|
|
29
|
+
self.grammar = TQLGrammar()
|
|
30
|
+
self.ast_builder = ASTBuilder()
|
|
31
|
+
self.error_analyzer = ErrorAnalyzer()
|
|
32
|
+
self.field_extractor = FieldExtractor()
|
|
33
|
+
|
|
34
|
+
def parse(self, query: str) -> Dict[str, Any]:
|
|
35
|
+
"""Parse a TQL query string into an AST.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
query: The TQL query string to parse
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Dictionary representing the parsed query AST
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
TQLParseError: If the query has invalid syntax
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
# Parse the query
|
|
48
|
+
parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
|
|
49
|
+
|
|
50
|
+
# Convert to our AST format
|
|
51
|
+
return self._build_ast(parsed_result.asList()[0])
|
|
52
|
+
|
|
53
|
+
except ParseException as e:
|
|
54
|
+
# Extract position and context from pyparsing exception
|
|
55
|
+
position = e.col - 1 if hasattr(e, "col") else e.loc
|
|
56
|
+
|
|
57
|
+
# Check for unclosed quotes first
|
|
58
|
+
if query.count('"') % 2 != 0:
|
|
59
|
+
last_quote_pos = query.rfind('"')
|
|
60
|
+
raise TQLSyntaxError(
|
|
61
|
+
f"Unterminated string literal starting at position {last_quote_pos}",
|
|
62
|
+
position=last_quote_pos,
|
|
63
|
+
query=query,
|
|
64
|
+
suggestions=[],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if query.count("'") % 2 != 0:
|
|
68
|
+
last_quote_pos = query.rfind("'")
|
|
69
|
+
raise TQLSyntaxError(
|
|
70
|
+
f"Unterminated string literal starting at position {last_quote_pos}",
|
|
71
|
+
position=last_quote_pos,
|
|
72
|
+
query=query,
|
|
73
|
+
suggestions=[],
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Analyze the error to provide better feedback
|
|
77
|
+
error_msg, suggestions = self.error_analyzer.analyze_parse_error(query, position, str(e))
|
|
78
|
+
|
|
79
|
+
raise TQLSyntaxError(error_msg, position=position, query=query, suggestions=suggestions)
|
|
80
|
+
except TQLOperatorError as e:
|
|
81
|
+
# Re-raise operator errors with query context
|
|
82
|
+
e.query = query
|
|
83
|
+
raise e
|
|
84
|
+
except ValueError as e:
|
|
85
|
+
# Handle value errors from our own validation
|
|
86
|
+
raise TQLValueError(str(e), query=query)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
# Generic parse error for unexpected exceptions
|
|
89
|
+
raise TQLParseError(f"Invalid TQL syntax: {str(e)}", query=query)
|
|
90
|
+
|
|
91
|
+
def extract_fields(self, query: str) -> List[str]:
|
|
92
|
+
"""Extract all unique field references from a TQL query.
|
|
93
|
+
|
|
94
|
+
This method parses the query and traverses the AST to find all field names
|
|
95
|
+
referenced in the query. Field mappings are not applied.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
query: The TQL query string
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Sorted list of unique field names referenced in the query
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
TQLParseError: If the query has invalid syntax
|
|
105
|
+
"""
|
|
106
|
+
# Parse the query into an AST
|
|
107
|
+
ast = self.parse(query)
|
|
108
|
+
|
|
109
|
+
# Extract fields using the field extractor
|
|
110
|
+
return self.field_extractor.extract_fields(ast)
|
|
111
|
+
|
|
112
|
+
def _build_ast(self, parsed: Any) -> Dict[str, Any]: # noqa: C901
|
|
113
|
+
"""Build AST from parsed pyparsing result.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
parsed: The parsed result from pyparsing
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Dictionary representing the AST node
|
|
120
|
+
"""
|
|
121
|
+
if isinstance(parsed, list):
|
|
122
|
+
if len(parsed) == 1:
|
|
123
|
+
# Single item, check if it's a field with is_private/is_global mutator
|
|
124
|
+
item = parsed[0]
|
|
125
|
+
if isinstance(item, list):
|
|
126
|
+
# Could be a typed_field
|
|
127
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(item)
|
|
128
|
+
if field_mutators:
|
|
129
|
+
# Check if the last mutator is is_private or is_global
|
|
130
|
+
last_mutator = field_mutators[-1] if field_mutators else None
|
|
131
|
+
if last_mutator and last_mutator.get("name", "").lower() in ["is_private", "is_global"]:
|
|
132
|
+
# This is field | is_private or field | is_global without operator
|
|
133
|
+
# Default to eq true
|
|
134
|
+
result = {
|
|
135
|
+
"type": "comparison",
|
|
136
|
+
"field": field_name,
|
|
137
|
+
"type_hint": type_hint,
|
|
138
|
+
"operator": "eq",
|
|
139
|
+
"value": "true",
|
|
140
|
+
}
|
|
141
|
+
if field_mutators:
|
|
142
|
+
result["field_mutators"] = field_mutators
|
|
143
|
+
return result
|
|
144
|
+
# Single item, unwrap it
|
|
145
|
+
return self._build_ast(parsed[0])
|
|
146
|
+
elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
|
|
147
|
+
# This is a stats expression without filter (applies to all records)
|
|
148
|
+
return self._build_stats_ast(parsed)
|
|
149
|
+
elif len(parsed) == 2:
|
|
150
|
+
# Could be unary logical operator (NOT), unary comparison (field exists), stats expression, or empty geo expression
|
|
151
|
+
first, second = parsed
|
|
152
|
+
|
|
153
|
+
# Check for stats expression: | stats ...
|
|
154
|
+
if isinstance(first, str) and first == "|" and isinstance(second, list) and len(second) > 0:
|
|
155
|
+
# Check if this is a stats expression
|
|
156
|
+
if isinstance(second[0], str) and second[0].lower() == "stats":
|
|
157
|
+
# This is | stats expression
|
|
158
|
+
return self._build_stats_ast(second)
|
|
159
|
+
|
|
160
|
+
# Check for empty geo expression: field | geo
|
|
161
|
+
if isinstance(second, str) and second.lower() in ["geo", "geoip_lookup"]:
|
|
162
|
+
# This is an empty geo expression: field | geo()
|
|
163
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
164
|
+
|
|
165
|
+
result = {
|
|
166
|
+
"type": "geo_expr",
|
|
167
|
+
"field": field_name,
|
|
168
|
+
"type_hint": type_hint,
|
|
169
|
+
"field_mutators": field_mutators,
|
|
170
|
+
"conditions": None, # No conditions for enrichment-only
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
# Check for empty nslookup expression: field | nslookup
|
|
176
|
+
elif isinstance(second, str) and second.lower() == "nslookup":
|
|
177
|
+
# This is an empty nslookup expression: field | nslookup()
|
|
178
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
179
|
+
|
|
180
|
+
result = {
|
|
181
|
+
"type": "nslookup_expr",
|
|
182
|
+
"field": field_name,
|
|
183
|
+
"type_hint": type_hint,
|
|
184
|
+
"field_mutators": field_mutators,
|
|
185
|
+
"conditions": None, # No conditions for enrichment-only
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
# Check for is_private/is_global without operator (defaults to eq true)
|
|
191
|
+
# This happens when we have a field with is_private/is_global as the last mutator
|
|
192
|
+
elif isinstance(first, str) and isinstance(second, list) and len(second) == 1:
|
|
193
|
+
# This could be field | mutator structure
|
|
194
|
+
mutator_name = second[0] if isinstance(second[0], str) else None
|
|
195
|
+
if mutator_name and mutator_name.lower() in ["is_private", "is_global"]:
|
|
196
|
+
# Build a typed_field from these components
|
|
197
|
+
typed_field = [first, second]
|
|
198
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(typed_field)
|
|
199
|
+
# This is field | is_private or field | is_global without operator
|
|
200
|
+
# Default to eq true
|
|
201
|
+
result = {
|
|
202
|
+
"type": "comparison",
|
|
203
|
+
"field": field_name,
|
|
204
|
+
"type_hint": type_hint,
|
|
205
|
+
"operator": "eq",
|
|
206
|
+
"value": "true",
|
|
207
|
+
}
|
|
208
|
+
if field_mutators:
|
|
209
|
+
result["field_mutators"] = field_mutators
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
|
|
213
|
+
# Unary logical operator (NOT or !)
|
|
214
|
+
return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
|
|
215
|
+
elif isinstance(second, str) and (second.lower() == "exists" or second.lower() == "!exists"):
|
|
216
|
+
# Unary comparison operation (field exists or !exists)
|
|
217
|
+
field_name, type_hint, mutators = self.ast_builder.extract_field_info(first)
|
|
218
|
+
operator = "not_exists" if second.lower() == "!exists" else "exists"
|
|
219
|
+
result = {
|
|
220
|
+
"type": "comparison",
|
|
221
|
+
"field": field_name,
|
|
222
|
+
"type_hint": type_hint,
|
|
223
|
+
"operator": operator,
|
|
224
|
+
"value": None, # No value for unary operators
|
|
225
|
+
}
|
|
226
|
+
if mutators:
|
|
227
|
+
result["field_mutators"] = mutators
|
|
228
|
+
return result
|
|
229
|
+
elif isinstance(first, list) and isinstance(second, list):
|
|
230
|
+
# This could be filter + stats
|
|
231
|
+
# Check if second element starts with 'stats'
|
|
232
|
+
if len(second) >= 2 and isinstance(second[0], str) and second[0].lower() == "stats":
|
|
233
|
+
# This is filter | stats
|
|
234
|
+
return {
|
|
235
|
+
"type": "query_with_stats",
|
|
236
|
+
"filter": self._build_ast(first),
|
|
237
|
+
"stats": self._build_stats_ast(second),
|
|
238
|
+
}
|
|
239
|
+
else:
|
|
240
|
+
# Fallback to treating as unary logical operator
|
|
241
|
+
return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
|
|
242
|
+
elif len(parsed) >= 3:
|
|
243
|
+
# Check if this is a field with multiple mutators ending in is_private/is_global
|
|
244
|
+
if isinstance(parsed[0], str) and all(isinstance(item, list) and len(item) == 1 for item in parsed[1:]):
|
|
245
|
+
# This looks like field | mutator1 | mutator2 | ...
|
|
246
|
+
last_mutator_list = parsed[-1]
|
|
247
|
+
if (
|
|
248
|
+
len(last_mutator_list) == 1
|
|
249
|
+
and isinstance(last_mutator_list[0], str)
|
|
250
|
+
and last_mutator_list[0].lower() in ["is_private", "is_global"]
|
|
251
|
+
):
|
|
252
|
+
# This is a field with mutators ending in is_private/is_global
|
|
253
|
+
# Build the typed_field structure and default to eq true
|
|
254
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed)
|
|
255
|
+
result = {
|
|
256
|
+
"type": "comparison",
|
|
257
|
+
"field": field_name,
|
|
258
|
+
"type_hint": type_hint,
|
|
259
|
+
"operator": "eq",
|
|
260
|
+
"value": "true",
|
|
261
|
+
}
|
|
262
|
+
if field_mutators:
|
|
263
|
+
result["field_mutators"] = field_mutators
|
|
264
|
+
return result
|
|
265
|
+
|
|
266
|
+
if len(parsed) == 4:
|
|
267
|
+
# Check for ANY/ALL operators: ANY field op value
|
|
268
|
+
first, field, operator, value = parsed
|
|
269
|
+
|
|
270
|
+
if isinstance(first, str) and first.lower() in ["any", "all"]:
|
|
271
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
|
|
272
|
+
value_extracted, value_mutators = self.ast_builder.extract_value_info(value)
|
|
273
|
+
result = {
|
|
274
|
+
"type": "collection_op",
|
|
275
|
+
"operator": first.lower(),
|
|
276
|
+
"field": field_name,
|
|
277
|
+
"type_hint": type_hint,
|
|
278
|
+
"comparison_operator": operator.lower(),
|
|
279
|
+
"value": value_extracted,
|
|
280
|
+
}
|
|
281
|
+
if field_mutators:
|
|
282
|
+
result["field_mutators"] = field_mutators
|
|
283
|
+
if value_mutators:
|
|
284
|
+
result["value_mutators"] = value_mutators
|
|
285
|
+
return result
|
|
286
|
+
else:
|
|
287
|
+
# Handle other 4-element cases like "field is not value", "field not in value", or geo expressions
|
|
288
|
+
first, second, third, fourth = parsed
|
|
289
|
+
|
|
290
|
+
# Check for negated operators like "field not none value"
|
|
291
|
+
if (
|
|
292
|
+
isinstance(second, str)
|
|
293
|
+
and (second.lower() == "not" or second == "!")
|
|
294
|
+
and isinstance(third, str)
|
|
295
|
+
):
|
|
296
|
+
# This is a negated operator
|
|
297
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
298
|
+
# Handle 'not none' -> 'any' (double negative)
|
|
299
|
+
if third.lower() == "none":
|
|
300
|
+
normalized_operator = "any"
|
|
301
|
+
else:
|
|
302
|
+
normalized_operator = f"not_{third.lower()}"
|
|
303
|
+
result = {
|
|
304
|
+
"type": "comparison",
|
|
305
|
+
"field": field_name,
|
|
306
|
+
"type_hint": type_hint,
|
|
307
|
+
"operator": normalized_operator,
|
|
308
|
+
"value": fourth,
|
|
309
|
+
}
|
|
310
|
+
if field_mutators:
|
|
311
|
+
result["field_mutators"] = field_mutators
|
|
312
|
+
return result
|
|
313
|
+
|
|
314
|
+
# Check for geo() expression with parameters: field geo params...
|
|
315
|
+
if isinstance(second, str) and second.lower() in ["geo", "geoip_lookup"]:
|
|
316
|
+
# This is a geo expression: field | geo(params...)
|
|
317
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
318
|
+
|
|
319
|
+
# All remaining elements are parameters (could be conditions or actual params)
|
|
320
|
+
conditions = None
|
|
321
|
+
geo_params = {}
|
|
322
|
+
|
|
323
|
+
# Process all parameters starting from third element
|
|
324
|
+
param_elements = parsed[2:] # Everything after field and 'geo'
|
|
325
|
+
|
|
326
|
+
for element in param_elements:
|
|
327
|
+
if isinstance(element, list):
|
|
328
|
+
if len(element) == 2:
|
|
329
|
+
# Check if this is a parameter or a condition
|
|
330
|
+
if isinstance(element[0], str):
|
|
331
|
+
# This is a proper parameter: ['param_name', 'value']
|
|
332
|
+
param_name, param_value = element
|
|
333
|
+
# Convert string boolean values to actual booleans
|
|
334
|
+
if isinstance(param_value, str):
|
|
335
|
+
if param_value.lower() == "true":
|
|
336
|
+
param_value = True
|
|
337
|
+
elif param_value.lower() == "false":
|
|
338
|
+
param_value = False
|
|
339
|
+
geo_params[param_name] = param_value
|
|
340
|
+
else:
|
|
341
|
+
# This is a condition like [['country_iso_code'], '=', ['US']]
|
|
342
|
+
conditions = element
|
|
343
|
+
elif len(element) == 3 and element[1] == "=":
|
|
344
|
+
# This is a parameter parsed as comparison: [['param'], '=', ['value']]
|
|
345
|
+
if (
|
|
346
|
+
isinstance(element[0], list)
|
|
347
|
+
and len(element[0]) == 1
|
|
348
|
+
and isinstance(element[0][0], str)
|
|
349
|
+
and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
|
|
350
|
+
):
|
|
351
|
+
param_name = element[0][0]
|
|
352
|
+
param_value = (
|
|
353
|
+
element[2]
|
|
354
|
+
if not isinstance(element[2], list)
|
|
355
|
+
else element[2][0] if element[2] else None
|
|
356
|
+
)
|
|
357
|
+
# Convert string boolean values to actual booleans
|
|
358
|
+
if isinstance(param_value, str):
|
|
359
|
+
if param_value.lower() == "true":
|
|
360
|
+
param_value = True
|
|
361
|
+
elif param_value.lower() == "false":
|
|
362
|
+
param_value = False
|
|
363
|
+
geo_params[param_name] = param_value
|
|
364
|
+
else:
|
|
365
|
+
# This is actual conditions, not a parameter
|
|
366
|
+
conditions = element
|
|
367
|
+
else:
|
|
368
|
+
# This might be conditions
|
|
369
|
+
conditions = element
|
|
370
|
+
|
|
371
|
+
result = {
|
|
372
|
+
"type": "geo_expr",
|
|
373
|
+
"field": field_name,
|
|
374
|
+
"type_hint": type_hint,
|
|
375
|
+
"field_mutators": field_mutators,
|
|
376
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
# Add geo parameters if any
|
|
380
|
+
if geo_params:
|
|
381
|
+
result["geo_params"] = geo_params
|
|
382
|
+
|
|
383
|
+
return result
|
|
384
|
+
|
|
385
|
+
# Check for nslookup() expression with parameters: field nslookup params...
|
|
386
|
+
elif isinstance(second, str) and second.lower() == "nslookup":
|
|
387
|
+
# This is a nslookup expression: field | nslookup(params...)
|
|
388
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
389
|
+
|
|
390
|
+
# All remaining elements are parameters (could be conditions or actual params)
|
|
391
|
+
conditions = None
|
|
392
|
+
nslookup_params = {}
|
|
393
|
+
|
|
394
|
+
# Process all parameters starting from third element
|
|
395
|
+
param_elements = parsed[2:] # Everything after field and 'nslookup'
|
|
396
|
+
|
|
397
|
+
for element in param_elements:
|
|
398
|
+
if isinstance(element, list):
|
|
399
|
+
if len(element) == 2:
|
|
400
|
+
# Check if this is a parameter or a condition
|
|
401
|
+
if isinstance(element[0], str):
|
|
402
|
+
# This is a proper parameter: ['param_name', 'value']
|
|
403
|
+
param_name, param_value = element
|
|
404
|
+
# Convert string boolean values to actual booleans
|
|
405
|
+
if isinstance(param_value, str):
|
|
406
|
+
if param_value.lower() == "true":
|
|
407
|
+
param_value = True
|
|
408
|
+
elif param_value.lower() == "false":
|
|
409
|
+
param_value = False
|
|
410
|
+
nslookup_params[param_name] = param_value
|
|
411
|
+
else:
|
|
412
|
+
# This is a condition like [['resolved_ip'], 'exists']
|
|
413
|
+
conditions = element
|
|
414
|
+
elif len(element) == 3 and element[1] == "=":
|
|
415
|
+
# This is a parameter parsed as comparison: [['param'], '=', ['value']]
|
|
416
|
+
if (
|
|
417
|
+
isinstance(element[0], list)
|
|
418
|
+
and len(element[0]) == 1
|
|
419
|
+
and isinstance(element[0][0], str)
|
|
420
|
+
and element[0][0]
|
|
421
|
+
in ["force", "servers", "append_field", "save", "types", "field"]
|
|
422
|
+
):
|
|
423
|
+
param_name = element[0][0]
|
|
424
|
+
param_value = (
|
|
425
|
+
element[2]
|
|
426
|
+
if not isinstance(element[2], list)
|
|
427
|
+
else element[2][0] if element[2] else None
|
|
428
|
+
)
|
|
429
|
+
# Handle types parameter which should be a list
|
|
430
|
+
if param_name == "types" and isinstance(element[2], list):
|
|
431
|
+
param_value = element[2]
|
|
432
|
+
# Unwrap if double-wrapped
|
|
433
|
+
if len(param_value) == 1 and isinstance(param_value[0], list):
|
|
434
|
+
param_value = param_value[0]
|
|
435
|
+
# Convert string boolean values to actual booleans
|
|
436
|
+
elif isinstance(param_value, str):
|
|
437
|
+
if param_value.lower() == "true":
|
|
438
|
+
param_value = True
|
|
439
|
+
elif param_value.lower() == "false":
|
|
440
|
+
param_value = False
|
|
441
|
+
nslookup_params[param_name] = param_value
|
|
442
|
+
else:
|
|
443
|
+
# This is actual conditions, not a parameter
|
|
444
|
+
conditions = element
|
|
445
|
+
else:
|
|
446
|
+
# This might be conditions
|
|
447
|
+
conditions = element
|
|
448
|
+
|
|
449
|
+
result = {
|
|
450
|
+
"type": "nslookup_expr",
|
|
451
|
+
"field": field_name,
|
|
452
|
+
"type_hint": type_hint,
|
|
453
|
+
"field_mutators": field_mutators,
|
|
454
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# Add nslookup parameters if any
|
|
458
|
+
if nslookup_params:
|
|
459
|
+
result["nslookup_params"] = nslookup_params
|
|
460
|
+
|
|
461
|
+
return result
|
|
462
|
+
|
|
463
|
+
# Handle "field is not value" or "field ! is value"
|
|
464
|
+
if (
|
|
465
|
+
isinstance(second, str)
|
|
466
|
+
and second.lower() == "is"
|
|
467
|
+
and isinstance(third, str)
|
|
468
|
+
and third.lower() == "not"
|
|
469
|
+
):
|
|
470
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
471
|
+
result = {
|
|
472
|
+
"type": "comparison",
|
|
473
|
+
"field": field_name,
|
|
474
|
+
"type_hint": type_hint,
|
|
475
|
+
"operator": "is_not",
|
|
476
|
+
"value": fourth,
|
|
477
|
+
}
|
|
478
|
+
if field_mutators:
|
|
479
|
+
result["field_mutators"] = field_mutators
|
|
480
|
+
return result
|
|
481
|
+
elif isinstance(second, str) and second == "!" and isinstance(third, str) and third.lower() == "is":
|
|
482
|
+
# Handle "field ! is value"
|
|
483
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
484
|
+
result = {
|
|
485
|
+
"type": "comparison",
|
|
486
|
+
"field": field_name,
|
|
487
|
+
"type_hint": type_hint,
|
|
488
|
+
"operator": "is_not",
|
|
489
|
+
"value": fourth,
|
|
490
|
+
}
|
|
491
|
+
if field_mutators:
|
|
492
|
+
result["field_mutators"] = field_mutators
|
|
493
|
+
return result
|
|
494
|
+
|
|
495
|
+
# Handle "field not operator value" (e.g., "field not in value") or "field ! operator value"
|
|
496
|
+
if isinstance(second, str) and (second.lower() == "not" or second == "!"):
|
|
497
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(first)
|
|
498
|
+
value, value_mutators = self.ast_builder.extract_value_info(fourth)
|
|
499
|
+
result = {
|
|
500
|
+
"type": "comparison",
|
|
501
|
+
"field": field_name,
|
|
502
|
+
"type_hint": type_hint,
|
|
503
|
+
"operator": f"not_{third.lower()}",
|
|
504
|
+
"value": value,
|
|
505
|
+
}
|
|
506
|
+
if field_mutators:
|
|
507
|
+
result["field_mutators"] = field_mutators
|
|
508
|
+
if value_mutators:
|
|
509
|
+
result["value_mutators"] = value_mutators
|
|
510
|
+
return result
|
|
511
|
+
elif len(parsed) == 5:
|
|
512
|
+
# Check for natural between syntax: field between value1 and value2
|
|
513
|
+
# Only process as between if the second element is "between"
|
|
514
|
+
if (
|
|
515
|
+
isinstance(parsed[1], str)
|
|
516
|
+
and parsed[1].lower() == "between"
|
|
517
|
+
and isinstance(parsed[3], str)
|
|
518
|
+
and parsed[3].lower() == "and"
|
|
519
|
+
):
|
|
520
|
+
field, between_op, value1, and_op, value2 = parsed
|
|
521
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
|
|
522
|
+
result = {
|
|
523
|
+
"type": "comparison",
|
|
524
|
+
"field": field_name,
|
|
525
|
+
"type_hint": type_hint,
|
|
526
|
+
"operator": "between",
|
|
527
|
+
"value": [value1, value2],
|
|
528
|
+
}
|
|
529
|
+
if field_mutators:
|
|
530
|
+
result["field_mutators"] = field_mutators
|
|
531
|
+
return result
|
|
532
|
+
else:
|
|
533
|
+
# Check if this is a geo expression with multiple parameters
|
|
534
|
+
if isinstance(parsed[1], str) and parsed[1].lower() in ["geo", "geoip_lookup"]:
|
|
535
|
+
# This is a geo expression with multiple parameters
|
|
536
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
|
|
537
|
+
|
|
538
|
+
# All remaining elements are parameters (could be conditions or actual params)
|
|
539
|
+
conditions = None
|
|
540
|
+
geo_params = {}
|
|
541
|
+
|
|
542
|
+
# Process all parameters starting from third element
|
|
543
|
+
param_elements = parsed[2:] # Everything after field and 'geo'
|
|
544
|
+
|
|
545
|
+
for element in param_elements:
|
|
546
|
+
if isinstance(element, list):
|
|
547
|
+
if len(element) == 2:
|
|
548
|
+
# Check if this is a parameter or a condition
|
|
549
|
+
if isinstance(element[0], str):
|
|
550
|
+
# This is a proper parameter: ['param_name', 'value']
|
|
551
|
+
param_name, param_value = element
|
|
552
|
+
# Convert string boolean values to actual booleans
|
|
553
|
+
if isinstance(param_value, str):
|
|
554
|
+
if param_value.lower() == "true":
|
|
555
|
+
param_value = True
|
|
556
|
+
elif param_value.lower() == "false":
|
|
557
|
+
param_value = False
|
|
558
|
+
geo_params[param_name] = param_value
|
|
559
|
+
else:
|
|
560
|
+
# This is a condition like [['country_iso_code'], '=', ['US']]
|
|
561
|
+
conditions = element
|
|
562
|
+
elif len(element) == 3 and element[1] == "=":
|
|
563
|
+
# This is a parameter parsed as comparison: [['param'], '=', ['value']]
|
|
564
|
+
if (
|
|
565
|
+
isinstance(element[0], list)
|
|
566
|
+
and len(element[0]) == 1
|
|
567
|
+
and isinstance(element[0][0], str)
|
|
568
|
+
and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
|
|
569
|
+
):
|
|
570
|
+
param_name = element[0][0]
|
|
571
|
+
param_value = (
|
|
572
|
+
element[2]
|
|
573
|
+
if not isinstance(element[2], list)
|
|
574
|
+
else element[2][0] if element[2] else None
|
|
575
|
+
)
|
|
576
|
+
# Convert string boolean values to actual booleans
|
|
577
|
+
if isinstance(param_value, str):
|
|
578
|
+
if param_value.lower() == "true":
|
|
579
|
+
param_value = True
|
|
580
|
+
elif param_value.lower() == "false":
|
|
581
|
+
param_value = False
|
|
582
|
+
geo_params[param_name] = param_value
|
|
583
|
+
else:
|
|
584
|
+
# This is actual conditions, not a parameter
|
|
585
|
+
conditions = element
|
|
586
|
+
else:
|
|
587
|
+
# This might be conditions
|
|
588
|
+
conditions = element
|
|
589
|
+
|
|
590
|
+
result = {
|
|
591
|
+
"type": "geo_expr",
|
|
592
|
+
"field": field_name,
|
|
593
|
+
"type_hint": type_hint,
|
|
594
|
+
"field_mutators": field_mutators,
|
|
595
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
# Add geo parameters if any
|
|
599
|
+
if geo_params:
|
|
600
|
+
result["geo_params"] = geo_params
|
|
601
|
+
|
|
602
|
+
return result
|
|
603
|
+
# Check if this is a nslookup expression with multiple parameters
|
|
604
|
+
elif isinstance(parsed[1], str) and parsed[1].lower() == "nslookup":
|
|
605
|
+
# This is a nslookup expression with multiple parameters
|
|
606
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
|
|
607
|
+
|
|
608
|
+
# All remaining elements are parameters (could be conditions or actual params)
|
|
609
|
+
conditions = None
|
|
610
|
+
nslookup_params = {}
|
|
611
|
+
|
|
612
|
+
# Process all parameters starting from third element
|
|
613
|
+
param_elements = parsed[2:] # Everything after field and 'nslookup'
|
|
614
|
+
|
|
615
|
+
for element in param_elements:
|
|
616
|
+
if isinstance(element, list):
|
|
617
|
+
if len(element) == 2:
|
|
618
|
+
# Check if this is a parameter or a condition
|
|
619
|
+
if isinstance(element[0], str):
|
|
620
|
+
# This is a proper parameter: ['param_name', 'value']
|
|
621
|
+
param_name, param_value = element
|
|
622
|
+
# Convert string boolean values to actual booleans
|
|
623
|
+
if isinstance(param_value, str):
|
|
624
|
+
if param_value.lower() == "true":
|
|
625
|
+
param_value = True
|
|
626
|
+
elif param_value.lower() == "false":
|
|
627
|
+
param_value = False
|
|
628
|
+
nslookup_params[param_name] = param_value
|
|
629
|
+
else:
|
|
630
|
+
# This is a condition like [['resolved_ip'], 'exists']
|
|
631
|
+
conditions = element
|
|
632
|
+
elif len(element) == 3 and element[1] == "=":
|
|
633
|
+
# This is a parameter parsed as comparison: [['param'], '=', ['value']]
|
|
634
|
+
if (
|
|
635
|
+
isinstance(element[0], list)
|
|
636
|
+
and len(element[0]) == 1
|
|
637
|
+
and isinstance(element[0][0], str)
|
|
638
|
+
and element[0][0]
|
|
639
|
+
in ["force", "servers", "append_field", "save", "types", "field"]
|
|
640
|
+
):
|
|
641
|
+
param_name = element[0][0]
|
|
642
|
+
param_value = (
|
|
643
|
+
element[2]
|
|
644
|
+
if not isinstance(element[2], list)
|
|
645
|
+
else element[2][0] if element[2] else None
|
|
646
|
+
)
|
|
647
|
+
# Handle types parameter which should be a list
|
|
648
|
+
if param_name == "types" and isinstance(element[2], list):
|
|
649
|
+
param_value = element[2]
|
|
650
|
+
# Unwrap if double-wrapped
|
|
651
|
+
if len(param_value) == 1 and isinstance(param_value[0], list):
|
|
652
|
+
param_value = param_value[0]
|
|
653
|
+
# Convert string boolean values to actual booleans
|
|
654
|
+
elif isinstance(param_value, str):
|
|
655
|
+
if param_value.lower() == "true":
|
|
656
|
+
param_value = True
|
|
657
|
+
elif param_value.lower() == "false":
|
|
658
|
+
param_value = False
|
|
659
|
+
nslookup_params[param_name] = param_value
|
|
660
|
+
else:
|
|
661
|
+
# This is actual conditions, not a parameter
|
|
662
|
+
conditions = element
|
|
663
|
+
else:
|
|
664
|
+
# This might be conditions
|
|
665
|
+
conditions = element
|
|
666
|
+
|
|
667
|
+
result = {
|
|
668
|
+
"type": "nslookup_expr",
|
|
669
|
+
"field": field_name,
|
|
670
|
+
"type_hint": type_hint,
|
|
671
|
+
"field_mutators": field_mutators,
|
|
672
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
# Add nslookup parameters if any
|
|
676
|
+
if nslookup_params:
|
|
677
|
+
result["nslookup_params"] = nslookup_params
|
|
678
|
+
|
|
679
|
+
return result
|
|
680
|
+
else:
|
|
681
|
+
# This is a chained operation, not a between operation
|
|
682
|
+
return self._build_chained_ast(parsed)
|
|
683
|
+
|
|
684
|
+
elif len(parsed) == 6:
|
|
685
|
+
# Check for "field not between value1 and value2" or "field ! between value1 and value2"
|
|
686
|
+
# Only process as not_between if it matches the pattern
|
|
687
|
+
if (
|
|
688
|
+
len(parsed) >= 6
|
|
689
|
+
and isinstance(parsed[1], str)
|
|
690
|
+
and (parsed[1].lower() == "not" or parsed[1] == "!")
|
|
691
|
+
and isinstance(parsed[2], str)
|
|
692
|
+
and parsed[2].lower() == "between"
|
|
693
|
+
and isinstance(parsed[4], str)
|
|
694
|
+
and parsed[4].lower() == "and"
|
|
695
|
+
):
|
|
696
|
+
field, not_word, between_op, value1, and_op, value2 = parsed
|
|
697
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
|
|
698
|
+
result = {
|
|
699
|
+
"type": "comparison",
|
|
700
|
+
"field": field_name,
|
|
701
|
+
"type_hint": type_hint,
|
|
702
|
+
"operator": "not_between",
|
|
703
|
+
"value": [value1, value2],
|
|
704
|
+
}
|
|
705
|
+
if field_mutators:
|
|
706
|
+
result["field_mutators"] = field_mutators
|
|
707
|
+
return result
|
|
708
|
+
else:
|
|
709
|
+
# Check if this is a geo expression with multiple parameters
|
|
710
|
+
if isinstance(parsed[1], str) and parsed[1].lower() in ["geo", "geoip_lookup"]:
|
|
711
|
+
# This is a geo expression with multiple parameters (6+ elements)
|
|
712
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(parsed[0])
|
|
713
|
+
|
|
714
|
+
# All remaining elements are parameters (could be conditions or actual params)
|
|
715
|
+
conditions = None
|
|
716
|
+
geo_params = {}
|
|
717
|
+
|
|
718
|
+
# Process all parameters starting from third element
|
|
719
|
+
param_elements = parsed[2:] # Everything after field and 'geo'
|
|
720
|
+
|
|
721
|
+
for element in param_elements:
|
|
722
|
+
if isinstance(element, list):
|
|
723
|
+
if len(element) == 2:
|
|
724
|
+
# Check if this is a parameter or a condition
|
|
725
|
+
if isinstance(element[0], str):
|
|
726
|
+
# This is a proper parameter: ['param_name', 'value']
|
|
727
|
+
param_name, param_value = element
|
|
728
|
+
# Convert string boolean values to actual booleans
|
|
729
|
+
if isinstance(param_value, str):
|
|
730
|
+
if param_value.lower() == "true":
|
|
731
|
+
param_value = True
|
|
732
|
+
elif param_value.lower() == "false":
|
|
733
|
+
param_value = False
|
|
734
|
+
geo_params[param_name] = param_value
|
|
735
|
+
else:
|
|
736
|
+
# This is a condition like [['country_iso_code'], '=', ['US']]
|
|
737
|
+
conditions = element
|
|
738
|
+
elif len(element) == 3 and element[1] == "=":
|
|
739
|
+
# This is a parameter parsed as comparison: [['param'], '=', ['value']]
|
|
740
|
+
if (
|
|
741
|
+
isinstance(element[0], list)
|
|
742
|
+
and len(element[0]) == 1
|
|
743
|
+
and isinstance(element[0][0], str)
|
|
744
|
+
and element[0][0] in ["force", "cache", "cache_ttl", "db_path", "save", "field"]
|
|
745
|
+
):
|
|
746
|
+
param_name = element[0][0]
|
|
747
|
+
param_value = (
|
|
748
|
+
element[2]
|
|
749
|
+
if not isinstance(element[2], list)
|
|
750
|
+
else element[2][0] if element[2] else None
|
|
751
|
+
)
|
|
752
|
+
# Convert string boolean values to actual booleans
|
|
753
|
+
if isinstance(param_value, str):
|
|
754
|
+
if param_value.lower() == "true":
|
|
755
|
+
param_value = True
|
|
756
|
+
elif param_value.lower() == "false":
|
|
757
|
+
param_value = False
|
|
758
|
+
geo_params[param_name] = param_value
|
|
759
|
+
else:
|
|
760
|
+
# This is actual conditions, not a parameter
|
|
761
|
+
conditions = element
|
|
762
|
+
else:
|
|
763
|
+
# This might be conditions
|
|
764
|
+
conditions = element
|
|
765
|
+
|
|
766
|
+
result = {
|
|
767
|
+
"type": "geo_expr",
|
|
768
|
+
"field": field_name,
|
|
769
|
+
"type_hint": type_hint,
|
|
770
|
+
"field_mutators": field_mutators,
|
|
771
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
# Add geo parameters if any
|
|
775
|
+
if geo_params:
|
|
776
|
+
result["geo_params"] = geo_params
|
|
777
|
+
|
|
778
|
+
return result
|
|
779
|
+
else:
|
|
780
|
+
# This is a chained operation, not a not_between operation
|
|
781
|
+
return self._build_chained_ast(parsed)
|
|
782
|
+
|
|
783
|
+
elif len(parsed) == 3:
|
|
784
|
+
# Binary operation or comparison (including negated unary operators like "field not exists")
|
|
785
|
+
left, operator, right = parsed
|
|
786
|
+
|
|
787
|
+
# Check for geo() expression first
|
|
788
|
+
if isinstance(operator, str) and operator.lower() in ["geo", "geoip_lookup"]:
|
|
789
|
+
# This is a geo expression: field | geo(conditions OR params)
|
|
790
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
791
|
+
|
|
792
|
+
conditions = None
|
|
793
|
+
geo_params = {}
|
|
794
|
+
|
|
795
|
+
# Check if this is actually a parameter masquerading as a condition
|
|
796
|
+
# Look for comparison operations where the field is a known parameter name
|
|
797
|
+
if (
|
|
798
|
+
isinstance(right, list)
|
|
799
|
+
and len(right) == 3
|
|
800
|
+
and isinstance(right[1], str)
|
|
801
|
+
and right[1] == "="
|
|
802
|
+
and isinstance(right[0], list)
|
|
803
|
+
and len(right[0]) == 1
|
|
804
|
+
and isinstance(right[0][0], str)
|
|
805
|
+
and right[0][0] in ["force", "cache", "cache_ttl", "db_path", "save"]
|
|
806
|
+
):
|
|
807
|
+
# This is a parameter parsed as a comparison: force = true
|
|
808
|
+
param_name = right[0][0]
|
|
809
|
+
param_value = right[2] if not isinstance(right[2], list) else right[2][0] if right[2] else None
|
|
810
|
+
# Convert string boolean values to actual booleans
|
|
811
|
+
if isinstance(param_value, str):
|
|
812
|
+
if param_value.lower() == "true":
|
|
813
|
+
param_value = True
|
|
814
|
+
elif param_value.lower() == "false":
|
|
815
|
+
param_value = False
|
|
816
|
+
geo_params[param_name] = param_value
|
|
817
|
+
else:
|
|
818
|
+
# This is actual conditions: geo(country_iso_code eq 'US')
|
|
819
|
+
conditions = right
|
|
820
|
+
|
|
821
|
+
result = {
|
|
822
|
+
"type": "geo_expr",
|
|
823
|
+
"field": field_name,
|
|
824
|
+
"type_hint": type_hint,
|
|
825
|
+
"field_mutators": field_mutators,
|
|
826
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
# Add geo parameters if any
|
|
830
|
+
if geo_params:
|
|
831
|
+
result["geo_params"] = geo_params
|
|
832
|
+
|
|
833
|
+
return result
|
|
834
|
+
|
|
835
|
+
# Check for nslookup() expression
|
|
836
|
+
elif isinstance(operator, str) and operator.lower() == "nslookup":
|
|
837
|
+
# This is a nslookup expression: field | nslookup(conditions OR params)
|
|
838
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
839
|
+
|
|
840
|
+
conditions = None
|
|
841
|
+
nslookup_params = {}
|
|
842
|
+
|
|
843
|
+
# Check if this is actually a parameter masquerading as a condition
|
|
844
|
+
# Look for comparison operations where the field is a known parameter name
|
|
845
|
+
if (
|
|
846
|
+
isinstance(right, list)
|
|
847
|
+
and len(right) == 3
|
|
848
|
+
and isinstance(right[1], str)
|
|
849
|
+
and right[1] == "="
|
|
850
|
+
and isinstance(right[0], list)
|
|
851
|
+
and len(right[0]) == 1
|
|
852
|
+
and isinstance(right[0][0], str)
|
|
853
|
+
and right[0][0] in ["force", "servers", "append_field", "save", "types"]
|
|
854
|
+
):
|
|
855
|
+
# This is a parameter parsed as a comparison: force = true
|
|
856
|
+
param_name = right[0][0]
|
|
857
|
+
param_value = right[2] if not isinstance(right[2], list) else right[2][0] if right[2] else None
|
|
858
|
+
# Handle types parameter which should be a list
|
|
859
|
+
if param_name == "types" and isinstance(right[2], list):
|
|
860
|
+
param_value = right[2]
|
|
861
|
+
# Unwrap if double-wrapped
|
|
862
|
+
if len(param_value) == 1 and isinstance(param_value[0], list):
|
|
863
|
+
param_value = param_value[0]
|
|
864
|
+
# Convert string boolean values to actual booleans
|
|
865
|
+
elif isinstance(param_value, str):
|
|
866
|
+
if param_value.lower() == "true":
|
|
867
|
+
param_value = True
|
|
868
|
+
elif param_value.lower() == "false":
|
|
869
|
+
param_value = False
|
|
870
|
+
nslookup_params[param_name] = param_value
|
|
871
|
+
else:
|
|
872
|
+
# This is actual conditions: nslookup(data contains 'example.com')
|
|
873
|
+
conditions = right
|
|
874
|
+
|
|
875
|
+
result = {
|
|
876
|
+
"type": "nslookup_expr",
|
|
877
|
+
"field": field_name,
|
|
878
|
+
"type_hint": type_hint,
|
|
879
|
+
"field_mutators": field_mutators,
|
|
880
|
+
"conditions": self._build_ast(conditions) if conditions else None,
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
# Add nslookup parameters if any
|
|
884
|
+
if nslookup_params:
|
|
885
|
+
result["nslookup_params"] = nslookup_params
|
|
886
|
+
|
|
887
|
+
return result
|
|
888
|
+
|
|
889
|
+
if operator.lower() in ["and", "or"]:
|
|
890
|
+
# Logical operation
|
|
891
|
+
return {
|
|
892
|
+
"type": "logical_op",
|
|
893
|
+
"operator": operator.lower(),
|
|
894
|
+
"left": self._build_ast(left),
|
|
895
|
+
"right": self._build_ast(right),
|
|
896
|
+
}
|
|
897
|
+
elif (
|
|
898
|
+
isinstance(operator, str)
|
|
899
|
+
and (operator.lower() == "not" or operator == "!")
|
|
900
|
+
and isinstance(right, str)
|
|
901
|
+
and right.lower() == "exists"
|
|
902
|
+
):
|
|
903
|
+
# Handle "field not exists" or "field ! exists" (negated unary operator)
|
|
904
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
905
|
+
result = {
|
|
906
|
+
"type": "comparison",
|
|
907
|
+
"field": field_name,
|
|
908
|
+
"type_hint": type_hint,
|
|
909
|
+
"operator": "not_exists",
|
|
910
|
+
"value": None,
|
|
911
|
+
}
|
|
912
|
+
if field_mutators:
|
|
913
|
+
result["field_mutators"] = field_mutators
|
|
914
|
+
return result
|
|
915
|
+
elif (
|
|
916
|
+
isinstance(operator, str)
|
|
917
|
+
and operator.lower() == "is"
|
|
918
|
+
and isinstance(right, str)
|
|
919
|
+
and right.lower() == "not"
|
|
920
|
+
):
|
|
921
|
+
# This will be handled in the 4-element case for "field is not value"
|
|
922
|
+
# Return unknown for now - should not normally reach here
|
|
923
|
+
return {"type": "unknown", "value": parsed}
|
|
924
|
+
elif isinstance(operator, str) and operator == "!" and isinstance(right, str) and right.lower() == "is":
|
|
925
|
+
# Handle "field ! is value" - need to look ahead
|
|
926
|
+
# This is incomplete and will be handled in the 4-element case
|
|
927
|
+
# Return unknown for now - should not normally reach here
|
|
928
|
+
return {"type": "unknown", "value": parsed}
|
|
929
|
+
else:
|
|
930
|
+
# Comparison operation
|
|
931
|
+
# Handle 'in' operator - always value in field(s)
|
|
932
|
+
if isinstance(operator, str) and operator.lower() == "in":
|
|
933
|
+
# Check for old syntax: [field1, field2] in value
|
|
934
|
+
# The parser wraps list literals, so check for wrapped lists too
|
|
935
|
+
check_list = left
|
|
936
|
+
if isinstance(left, list) and len(left) == 1 and isinstance(left[0], list):
|
|
937
|
+
# Unwrap if it's [[field1, field2]]
|
|
938
|
+
check_list = left[0]
|
|
939
|
+
|
|
940
|
+
if isinstance(check_list, list) and len(check_list) > 1:
|
|
941
|
+
# Check if this is a list of identifiers (field names)
|
|
942
|
+
is_field_list = True
|
|
943
|
+
field_names = []
|
|
944
|
+
for item in check_list:
|
|
945
|
+
if isinstance(item, str):
|
|
946
|
+
field_names.append(item)
|
|
947
|
+
else:
|
|
948
|
+
is_field_list = False
|
|
949
|
+
break
|
|
950
|
+
|
|
951
|
+
if is_field_list:
|
|
952
|
+
# Extract value for suggestion
|
|
953
|
+
value_str = right
|
|
954
|
+
if isinstance(right, list) and len(right) > 0:
|
|
955
|
+
value_str = right[0]
|
|
956
|
+
|
|
957
|
+
raise TQLSyntaxError(
|
|
958
|
+
"Field list on left side of 'in' operator is no longer supported",
|
|
959
|
+
suggestions=[
|
|
960
|
+
f'"{value_str}" in [{", ".join(field_names)}]',
|
|
961
|
+
f"'{value_str}' in [{', '.join(field_names)}]",
|
|
962
|
+
],
|
|
963
|
+
position=0,
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
# For 'in' operator, left is always the value, right is field(s)
|
|
967
|
+
# Extract the value from left
|
|
968
|
+
value_extracted, value_mutators = self.ast_builder.extract_value_info(left)
|
|
969
|
+
|
|
970
|
+
# Check if right is a list of fields
|
|
971
|
+
if isinstance(right, list) and len(right) > 0:
|
|
972
|
+
# Check if all elements are fields
|
|
973
|
+
all_fields = True
|
|
974
|
+
for item in right:
|
|
975
|
+
if isinstance(item, list):
|
|
976
|
+
# This is a typed_field group
|
|
977
|
+
if not (len(item) >= 1 and isinstance(item[0], str)):
|
|
978
|
+
all_fields = False
|
|
979
|
+
break
|
|
980
|
+
elif not isinstance(item, str):
|
|
981
|
+
all_fields = False
|
|
982
|
+
break
|
|
983
|
+
|
|
984
|
+
if all_fields:
|
|
985
|
+
# This is "value in [field1, field2, ...]" format
|
|
986
|
+
# Create an OR expression for all fields
|
|
987
|
+
field_comparisons = []
|
|
988
|
+
for field in right:
|
|
989
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(field)
|
|
990
|
+
comparison = {
|
|
991
|
+
"type": "comparison",
|
|
992
|
+
"field": field_name,
|
|
993
|
+
"type_hint": type_hint,
|
|
994
|
+
"operator": "in",
|
|
995
|
+
"value": (
|
|
996
|
+
[value_extracted]
|
|
997
|
+
if not isinstance(value_extracted, list)
|
|
998
|
+
else value_extracted
|
|
999
|
+
),
|
|
1000
|
+
}
|
|
1001
|
+
if field_mutators:
|
|
1002
|
+
comparison["field_mutators"] = field_mutators
|
|
1003
|
+
if value_mutators:
|
|
1004
|
+
comparison["value_mutators"] = value_mutators
|
|
1005
|
+
field_comparisons.append(comparison)
|
|
1006
|
+
|
|
1007
|
+
# Build OR expression
|
|
1008
|
+
result = field_comparisons[0]
|
|
1009
|
+
for i in range(1, len(field_comparisons)):
|
|
1010
|
+
result = {
|
|
1011
|
+
"type": "logical_op",
|
|
1012
|
+
"operator": "or",
|
|
1013
|
+
"left": result,
|
|
1014
|
+
"right": field_comparisons[i],
|
|
1015
|
+
}
|
|
1016
|
+
return result
|
|
1017
|
+
|
|
1018
|
+
# Otherwise, treat as standard "value in field" (single field)
|
|
1019
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(right)
|
|
1020
|
+
result = {
|
|
1021
|
+
"type": "comparison",
|
|
1022
|
+
"field": field_name,
|
|
1023
|
+
"type_hint": type_hint,
|
|
1024
|
+
"operator": "in",
|
|
1025
|
+
"value": [value_extracted] if not isinstance(value_extracted, list) else value_extracted,
|
|
1026
|
+
}
|
|
1027
|
+
if field_mutators:
|
|
1028
|
+
result["field_mutators"] = field_mutators
|
|
1029
|
+
if value_mutators:
|
|
1030
|
+
result["value_mutators"] = value_mutators
|
|
1031
|
+
return result
|
|
1032
|
+
|
|
1033
|
+
if operator.lower() == "between":
|
|
1034
|
+
# Between operator with list of values
|
|
1035
|
+
if isinstance(right, list) and len(right) == 2:
|
|
1036
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
1037
|
+
result = {
|
|
1038
|
+
"type": "comparison",
|
|
1039
|
+
"field": field_name,
|
|
1040
|
+
"type_hint": type_hint,
|
|
1041
|
+
"operator": "between",
|
|
1042
|
+
"value": right,
|
|
1043
|
+
}
|
|
1044
|
+
if field_mutators:
|
|
1045
|
+
result["field_mutators"] = field_mutators
|
|
1046
|
+
return result
|
|
1047
|
+
else:
|
|
1048
|
+
# Extract field name for error message
|
|
1049
|
+
field_display = (
|
|
1050
|
+
self.ast_builder.extract_field_info(left)[0] if isinstance(left, list) else left
|
|
1051
|
+
)
|
|
1052
|
+
raise TQLOperatorError(
|
|
1053
|
+
f"'between' operator requires exactly 2 values, got {len(right) if isinstance(right, list) else 1}",
|
|
1054
|
+
suggestions=[f"{field_display} between [18, 65]"],
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
# Check for negated operators (space-separated like "not in")
|
|
1058
|
+
if isinstance(operator, list) and len(operator) == 2:
|
|
1059
|
+
neg_word, base_op = operator
|
|
1060
|
+
if neg_word.lower() == "not" or neg_word == "!":
|
|
1061
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
1062
|
+
value, value_mutators = self.ast_builder.extract_value_info(right)
|
|
1063
|
+
# Handle 'not none' -> 'any' (double negative)
|
|
1064
|
+
if base_op.lower() == "none":
|
|
1065
|
+
normalized_operator = "any"
|
|
1066
|
+
else:
|
|
1067
|
+
normalized_operator = f"not_{base_op.lower()}"
|
|
1068
|
+
result = {
|
|
1069
|
+
"type": "comparison",
|
|
1070
|
+
"field": field_name,
|
|
1071
|
+
"type_hint": type_hint,
|
|
1072
|
+
"operator": normalized_operator,
|
|
1073
|
+
"value": value,
|
|
1074
|
+
}
|
|
1075
|
+
if field_mutators:
|
|
1076
|
+
result["field_mutators"] = field_mutators
|
|
1077
|
+
if value_mutators:
|
|
1078
|
+
result["value_mutators"] = value_mutators
|
|
1079
|
+
return result
|
|
1080
|
+
|
|
1081
|
+
# Check for bang operators (like !contains, !in, etc.)
|
|
1082
|
+
if isinstance(operator, str) and operator.startswith("!") and operator != "!=":
|
|
1083
|
+
# Bang operator - convert to not_operator (but not !=)
|
|
1084
|
+
base_op = operator[1:].lower()
|
|
1085
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
1086
|
+
value, value_mutators = self.ast_builder.extract_value_info(right)
|
|
1087
|
+
# Handle '!none' -> 'any' (double negative)
|
|
1088
|
+
if base_op == "none":
|
|
1089
|
+
normalized_operator = "any"
|
|
1090
|
+
else:
|
|
1091
|
+
normalized_operator = f"not_{base_op}"
|
|
1092
|
+
result = {
|
|
1093
|
+
"type": "comparison",
|
|
1094
|
+
"field": field_name,
|
|
1095
|
+
"type_hint": type_hint,
|
|
1096
|
+
"operator": normalized_operator,
|
|
1097
|
+
"value": value,
|
|
1098
|
+
}
|
|
1099
|
+
if field_mutators:
|
|
1100
|
+
result["field_mutators"] = field_mutators
|
|
1101
|
+
if value_mutators:
|
|
1102
|
+
result["value_mutators"] = value_mutators
|
|
1103
|
+
return result
|
|
1104
|
+
|
|
1105
|
+
# Standard "field op value" format
|
|
1106
|
+
field_name, type_hint, field_mutators = self.ast_builder.extract_field_info(left)
|
|
1107
|
+
value, value_mutators = self.ast_builder.extract_value_info(right)
|
|
1108
|
+
# Normalize operator: convert 'none' to 'not_any'
|
|
1109
|
+
normalized_operator = operator.lower()
|
|
1110
|
+
if normalized_operator == "none":
|
|
1111
|
+
normalized_operator = "not_any"
|
|
1112
|
+
|
|
1113
|
+
# Additional check for old 'in' syntax that got parsed differently
|
|
1114
|
+
# If operator is 'in' and value is a list of identifiers, this might be the old syntax
|
|
1115
|
+
if normalized_operator == "in" and isinstance(value, list) and len(value) > 1:
|
|
1116
|
+
# Check if all items look like field names
|
|
1117
|
+
all_identifiers = all(
|
|
1118
|
+
isinstance(v, str) and v.replace(".", "").replace("_", "").isalnum() for v in value
|
|
1119
|
+
)
|
|
1120
|
+
if all_identifiers:
|
|
1121
|
+
raise TQLSyntaxError(
|
|
1122
|
+
"Field list in value syntax is no longer supported. Use value in [fields] instead",
|
|
1123
|
+
suggestions=[
|
|
1124
|
+
f'"{field_name}" in [{", ".join(value)}]',
|
|
1125
|
+
f"'{field_name}' in [{', '.join(value)}]",
|
|
1126
|
+
],
|
|
1127
|
+
position=0,
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
result = {
|
|
1131
|
+
"type": "comparison",
|
|
1132
|
+
"field": field_name,
|
|
1133
|
+
"type_hint": type_hint,
|
|
1134
|
+
"operator": normalized_operator,
|
|
1135
|
+
"value": value,
|
|
1136
|
+
}
|
|
1137
|
+
if field_mutators:
|
|
1138
|
+
result["field_mutators"] = field_mutators
|
|
1139
|
+
if value_mutators:
|
|
1140
|
+
result["value_mutators"] = value_mutators
|
|
1141
|
+
return result
|
|
1142
|
+
else:
|
|
1143
|
+
# Handle longer lists (chained operations)
|
|
1144
|
+
# This happens with infixNotation for multiple AND/OR operations
|
|
1145
|
+
# The structure will be flattened, so we need to reconstruct the tree
|
|
1146
|
+
return self._build_chained_ast(parsed)
|
|
1147
|
+
else:
|
|
1148
|
+
# Single value - should already be a proper AST node
|
|
1149
|
+
if isinstance(parsed, dict):
|
|
1150
|
+
return parsed
|
|
1151
|
+
else:
|
|
1152
|
+
# This shouldn't happen, but handle gracefully
|
|
1153
|
+
raise TQLParseError(f"Unexpected parsed value type: {type(parsed)}")
|
|
1154
|
+
|
|
1155
|
+
# This should be unreachable, but helps mypy understand all paths return
|
|
1156
|
+
raise AssertionError("Unreachable code in _build_ast")
|
|
1157
|
+
|
|
1158
|
+
def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
|
|
1159
|
+
"""Build AST from chained operations (e.g., A AND B AND C).
|
|
1160
|
+
|
|
1161
|
+
Args:
|
|
1162
|
+
parsed_list: List of alternating operands and operators
|
|
1163
|
+
|
|
1164
|
+
Returns:
|
|
1165
|
+
Dictionary representing the AST node
|
|
1166
|
+
"""
|
|
1167
|
+
if len(parsed_list) < 3:
|
|
1168
|
+
# Not enough elements for a chained operation
|
|
1169
|
+
return {"type": "unknown", "value": parsed_list}
|
|
1170
|
+
|
|
1171
|
+
# Start with the first operand
|
|
1172
|
+
result = self._build_ast(parsed_list[0])
|
|
1173
|
+
|
|
1174
|
+
# Process pairs of (operator, operand)
|
|
1175
|
+
i = 1
|
|
1176
|
+
while i < len(parsed_list) - 1:
|
|
1177
|
+
operator = parsed_list[i]
|
|
1178
|
+
operand = parsed_list[i + 1]
|
|
1179
|
+
|
|
1180
|
+
if operator.lower() in ["and", "or"]:
|
|
1181
|
+
result = {
|
|
1182
|
+
"type": "logical_op",
|
|
1183
|
+
"operator": operator.lower(),
|
|
1184
|
+
"left": result,
|
|
1185
|
+
"right": self._build_ast(operand),
|
|
1186
|
+
}
|
|
1187
|
+
else:
|
|
1188
|
+
# This shouldn't happen in a well-formed chained expression
|
|
1189
|
+
return {"type": "unknown", "value": parsed_list}
|
|
1190
|
+
|
|
1191
|
+
i += 2
|
|
1192
|
+
|
|
1193
|
+
return result
|
|
1194
|
+
|
|
1195
|
+
def _build_stats_ast(self, parsed: List[Any]) -> Dict[str, Any]: # noqa: C901
|
|
1196
|
+
"""Build AST for stats expression.
|
|
1197
|
+
|
|
1198
|
+
Args:
|
|
1199
|
+
parsed: Parsed stats expression [stats, aggregations, [by, fields]]
|
|
1200
|
+
|
|
1201
|
+
Returns:
|
|
1202
|
+
Dictionary representing the stats AST
|
|
1203
|
+
"""
|
|
1204
|
+
result: Dict[str, Any] = {"type": "stats_expr", "aggregations": [], "group_by": []}
|
|
1205
|
+
|
|
1206
|
+
# Skip the 'stats' keyword
|
|
1207
|
+
i = 1
|
|
1208
|
+
|
|
1209
|
+
# Process aggregations until we hit 'by' or end
|
|
1210
|
+
while i < len(parsed):
|
|
1211
|
+
if isinstance(parsed[i], str) and parsed[i].lower() == "by":
|
|
1212
|
+
# Start of group by clause
|
|
1213
|
+
i += 1
|
|
1214
|
+
break
|
|
1215
|
+
|
|
1216
|
+
# Process aggregation
|
|
1217
|
+
if isinstance(parsed[i], str) and parsed[i].lower() == "count":
|
|
1218
|
+
# Special case for count(*)
|
|
1219
|
+
result["aggregations"].append({"function": "count", "field": "*", "alias": None})
|
|
1220
|
+
i += 1
|
|
1221
|
+
elif isinstance(parsed[i], list):
|
|
1222
|
+
# This is a list of aggregations
|
|
1223
|
+
for item in parsed[i]:
|
|
1224
|
+
agg_dict: Dict[str, Any] = {}
|
|
1225
|
+
|
|
1226
|
+
if isinstance(item, str) and item.lower() == "count":
|
|
1227
|
+
# count(*) case
|
|
1228
|
+
agg_dict["function"] = "count"
|
|
1229
|
+
agg_dict["field"] = "*"
|
|
1230
|
+
agg_dict["alias"] = None
|
|
1231
|
+
elif isinstance(item, list):
|
|
1232
|
+
# Regular aggregation: [func, field, ...] or [[func, field], 'as', 'alias']
|
|
1233
|
+
if len(item) >= 2 and isinstance(item[0], list):
|
|
1234
|
+
# Aggregation with alias: [[func, field, ...], 'as', 'alias']
|
|
1235
|
+
func_spec = item[0]
|
|
1236
|
+
# Normalize function aliases
|
|
1237
|
+
func = func_spec[0].lower()
|
|
1238
|
+
if func == "avg":
|
|
1239
|
+
func = "average"
|
|
1240
|
+
elif func == "med":
|
|
1241
|
+
func = "median"
|
|
1242
|
+
elif func == "standard_deviation":
|
|
1243
|
+
func = "std"
|
|
1244
|
+
elif func in ["p", "pct", "percentiles"]:
|
|
1245
|
+
func = "percentile"
|
|
1246
|
+
elif func in ["pct_rank", "pct_ranks", "percentile_ranks"]:
|
|
1247
|
+
func = "percentile_rank"
|
|
1248
|
+
agg_dict["function"] = func
|
|
1249
|
+
agg_dict["field"] = func_spec[1] if len(func_spec) > 1 else "*"
|
|
1250
|
+
|
|
1251
|
+
# Check for modifiers (top/bottom) or percentile values
|
|
1252
|
+
if len(func_spec) >= 3:
|
|
1253
|
+
# Check if it's a percentile function with values
|
|
1254
|
+
func_name = agg_dict["function"]
|
|
1255
|
+
if func_name in ["percentile", "percentiles", "p", "pct"]:
|
|
1256
|
+
# Handle percentile values - they come as separate elements
|
|
1257
|
+
percentile_values = []
|
|
1258
|
+
for j in range(2, len(func_spec)):
|
|
1259
|
+
if isinstance(func_spec[j], str) and func_spec[j].replace(".", "").isdigit():
|
|
1260
|
+
percentile_values.append(float(func_spec[j]))
|
|
1261
|
+
else:
|
|
1262
|
+
break # Stop if we hit a non-numeric value
|
|
1263
|
+
agg_dict["percentile_values"] = percentile_values
|
|
1264
|
+
elif func_name in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
|
|
1265
|
+
# Handle percentile rank values - they come as separate elements
|
|
1266
|
+
rank_values = []
|
|
1267
|
+
for j in range(2, len(func_spec)):
|
|
1268
|
+
if (
|
|
1269
|
+
isinstance(func_spec[j], str)
|
|
1270
|
+
and func_spec[j].replace(".", "").replace("-", "").isdigit()
|
|
1271
|
+
):
|
|
1272
|
+
rank_values.append(float(func_spec[j]))
|
|
1273
|
+
else:
|
|
1274
|
+
break # Stop if we hit a non-numeric value
|
|
1275
|
+
agg_dict["rank_values"] = rank_values
|
|
1276
|
+
elif len(func_spec) >= 4 and func_spec[2].lower() in ["top", "bottom"]:
|
|
1277
|
+
agg_dict["modifier"] = func_spec[2].lower()
|
|
1278
|
+
agg_dict["limit"] = int(func_spec[3])
|
|
1279
|
+
|
|
1280
|
+
# Check for alias
|
|
1281
|
+
if len(item) >= 3 and item[1].lower() == "as":
|
|
1282
|
+
agg_dict["alias"] = item[2]
|
|
1283
|
+
else:
|
|
1284
|
+
agg_dict["alias"] = None
|
|
1285
|
+
else:
|
|
1286
|
+
# Simple aggregation: [func, field]
|
|
1287
|
+
# Normalize function aliases
|
|
1288
|
+
func = item[0].lower() if len(item) > 0 else "count"
|
|
1289
|
+
if func == "avg":
|
|
1290
|
+
func = "average"
|
|
1291
|
+
elif func == "med":
|
|
1292
|
+
func = "median"
|
|
1293
|
+
elif func == "standard_deviation":
|
|
1294
|
+
func = "std"
|
|
1295
|
+
elif func in ["p", "pct", "percentiles"]:
|
|
1296
|
+
func = "percentile"
|
|
1297
|
+
elif func in ["pct_rank", "pct_ranks", "percentile_ranks"]:
|
|
1298
|
+
func = "percentile_rank"
|
|
1299
|
+
agg_dict["function"] = func
|
|
1300
|
+
agg_dict["field"] = item[1] if len(item) > 1 else "*"
|
|
1301
|
+
agg_dict["alias"] = None
|
|
1302
|
+
|
|
1303
|
+
# Check for modifiers or percentile values
|
|
1304
|
+
if len(item) >= 3:
|
|
1305
|
+
func_name = agg_dict["function"]
|
|
1306
|
+
if func_name in ["percentile", "percentiles", "p", "pct"]:
|
|
1307
|
+
# Handle percentile values - they come as separate elements
|
|
1308
|
+
percentile_values = []
|
|
1309
|
+
for j in range(2, len(item)):
|
|
1310
|
+
if isinstance(item[j], str) and item[j].replace(".", "").isdigit():
|
|
1311
|
+
percentile_values.append(float(item[j]))
|
|
1312
|
+
else:
|
|
1313
|
+
break # Stop if we hit a non-numeric value
|
|
1314
|
+
agg_dict["percentile_values"] = percentile_values
|
|
1315
|
+
elif func_name in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
|
|
1316
|
+
# Handle percentile rank values - they come as separate elements
|
|
1317
|
+
rank_values = []
|
|
1318
|
+
for j in range(2, len(item)):
|
|
1319
|
+
if (
|
|
1320
|
+
isinstance(item[j], str)
|
|
1321
|
+
and item[j].replace(".", "").replace("-", "").isdigit()
|
|
1322
|
+
):
|
|
1323
|
+
rank_values.append(float(item[j]))
|
|
1324
|
+
else:
|
|
1325
|
+
break # Stop if we hit a non-numeric value
|
|
1326
|
+
agg_dict["rank_values"] = rank_values
|
|
1327
|
+
elif len(item) >= 4 and item[2].lower() in ["top", "bottom"]:
|
|
1328
|
+
agg_dict["modifier"] = item[2].lower()
|
|
1329
|
+
agg_dict["limit"] = int(item[3])
|
|
1330
|
+
|
|
1331
|
+
if "function" in agg_dict:
|
|
1332
|
+
result["aggregations"].append(agg_dict)
|
|
1333
|
+
|
|
1334
|
+
i += 1
|
|
1335
|
+
else:
|
|
1336
|
+
i += 1
|
|
1337
|
+
|
|
1338
|
+
# Process group by fields
|
|
1339
|
+
while i < len(parsed):
|
|
1340
|
+
if isinstance(parsed[i], str) and parsed[i] not in ["by", ","]:
|
|
1341
|
+
result["group_by"].append(parsed[i])
|
|
1342
|
+
i += 1
|
|
1343
|
+
|
|
1344
|
+
return result
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
# Legacy function for backward compatibility
|
|
1348
|
+
def parse_query(query: str):
|
|
1349
|
+
"""Parse a TQL query string and return the parsed result.
|
|
1350
|
+
|
|
1351
|
+
This function is kept for backward compatibility with existing code.
|
|
1352
|
+
New code should use TQLParser class directly.
|
|
1353
|
+
|
|
1354
|
+
Args:
|
|
1355
|
+
query: The TQL query string.
|
|
1356
|
+
|
|
1357
|
+
Returns:
|
|
1358
|
+
The pyparsing ParseResults.
|
|
1359
|
+
"""
|
|
1360
|
+
parser = TQLParser()
|
|
1361
|
+
# For legacy compatibility, we return the raw pyparsing result
|
|
1362
|
+
parsed_result = parser.grammar.tql_expr.parseString(query, parseAll=True)
|
|
1363
|
+
return parsed_result
|