tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
"""TQL Grammar definitions using pyparsing."""
|
|
2
|
+
|
|
3
|
+
from pyparsing import (
|
|
4
|
+
CaselessKeyword,
|
|
5
|
+
Forward,
|
|
6
|
+
Group,
|
|
7
|
+
)
|
|
8
|
+
from pyparsing import Optional as PyparsingOptional
|
|
9
|
+
from pyparsing import (
|
|
10
|
+
QuotedString,
|
|
11
|
+
Regex,
|
|
12
|
+
Suppress,
|
|
13
|
+
Word,
|
|
14
|
+
ZeroOrMore,
|
|
15
|
+
alphanums,
|
|
16
|
+
alphas,
|
|
17
|
+
delimitedList,
|
|
18
|
+
infixNotation,
|
|
19
|
+
nums,
|
|
20
|
+
oneOf,
|
|
21
|
+
opAssoc,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TQLGrammar:
|
|
26
|
+
"""TQL grammar definitions.
|
|
27
|
+
|
|
28
|
+
This class contains all the pyparsing grammar definitions for TQL,
|
|
29
|
+
including tokens, operators, expressions, and special syntax.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
"""Initialize all grammar elements."""
|
|
34
|
+
self._setup_basic_tokens()
|
|
35
|
+
self._setup_operators()
|
|
36
|
+
self._setup_fields_and_values()
|
|
37
|
+
self._setup_mutators()
|
|
38
|
+
self._setup_comparisons()
|
|
39
|
+
self._setup_special_expressions()
|
|
40
|
+
self._setup_stats_expressions()
|
|
41
|
+
self._setup_final_expressions()
|
|
42
|
+
|
|
43
|
+
def _setup_basic_tokens(self):
|
|
44
|
+
"""Set up basic tokens and literals."""
|
|
45
|
+
# Basic tokens
|
|
46
|
+
self.identifier = Word(alphas, alphanums + "_.-")
|
|
47
|
+
self.number = Word(nums + ".-")
|
|
48
|
+
self.string_literal = QuotedString('"') | QuotedString("'")
|
|
49
|
+
# CIDR notation for IP addresses (e.g., 192.168.1.0/24)
|
|
50
|
+
self.cidr_notation = Word(nums + "./")
|
|
51
|
+
# Define list items as strings, numbers, or identifiers
|
|
52
|
+
self.list_item = self.string_literal | self.number | self.identifier
|
|
53
|
+
self.list_literal = Group(Suppress("[") + delimitedList(self.list_item) + Suppress("]"))
|
|
54
|
+
|
|
55
|
+
# Define simple values – note order matters (try string literals first, then CIDR)
|
|
56
|
+
self.simple_value = self.string_literal | self.cidr_notation | self.number | self.identifier
|
|
57
|
+
|
|
58
|
+
# Define type hints
|
|
59
|
+
self.type_hint = oneOf("number int float decimal date array bool boolean geo object string", caseless=True)
|
|
60
|
+
|
|
61
|
+
def _setup_operators(self):
|
|
62
|
+
"""Set up operator definitions."""
|
|
63
|
+
# Define binary operators (require a value) - != must come before ! operators
|
|
64
|
+
self.binary_ops = oneOf(
|
|
65
|
+
"!= " # != must be before ! operators
|
|
66
|
+
+ "!contains !in !startswith !endswith !regexp !cidr !is !between "
|
|
67
|
+
+ "regexp in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
|
|
68
|
+
caseless=True,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Define negated binary operators (using space-separated keywords or ! prefix)
|
|
72
|
+
self.not_in_op = (CaselessKeyword("not") | "!") + CaselessKeyword("in")
|
|
73
|
+
self.not_contains_op = (CaselessKeyword("not") | "!") + CaselessKeyword("contains")
|
|
74
|
+
self.not_startswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("startswith")
|
|
75
|
+
self.not_endswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("endswith")
|
|
76
|
+
self.not_regexp_op = (CaselessKeyword("not") | "!") + CaselessKeyword("regexp")
|
|
77
|
+
self.not_cidr_op = (CaselessKeyword("not") | "!") + CaselessKeyword("cidr")
|
|
78
|
+
self.not_any_op = (CaselessKeyword("not") | "!") + CaselessKeyword("any")
|
|
79
|
+
self.not_all_op = (CaselessKeyword("not") | "!") + CaselessKeyword("all")
|
|
80
|
+
self.not_none_op = (CaselessKeyword("not") | "!") + CaselessKeyword("none")
|
|
81
|
+
|
|
82
|
+
# Also support !contains, !startswith etc. as single tokens
|
|
83
|
+
self.bang_in_op = Suppress("!") + CaselessKeyword("in")
|
|
84
|
+
self.bang_contains_op = Suppress("!") + CaselessKeyword("contains")
|
|
85
|
+
self.bang_startswith_op = Suppress("!") + CaselessKeyword("startswith")
|
|
86
|
+
self.bang_endswith_op = Suppress("!") + CaselessKeyword("endswith")
|
|
87
|
+
self.bang_regexp_op = Suppress("!") + CaselessKeyword("regexp")
|
|
88
|
+
self.bang_cidr_op = Suppress("!") + CaselessKeyword("cidr")
|
|
89
|
+
self.bang_any_op = Suppress("!") + CaselessKeyword("any")
|
|
90
|
+
self.bang_all_op = Suppress("!") + CaselessKeyword("all")
|
|
91
|
+
self.bang_none_op = Suppress("!") + CaselessKeyword("none")
|
|
92
|
+
|
|
93
|
+
# Add between operator separately as it has special handling
|
|
94
|
+
self.between_op = CaselessKeyword("between")
|
|
95
|
+
self.not_between_op = (CaselessKeyword("not") | "!") + CaselessKeyword("between")
|
|
96
|
+
self.bang_between_op = Suppress("!") + CaselessKeyword("between")
|
|
97
|
+
|
|
98
|
+
# Define unary operators (no value required)
|
|
99
|
+
self.unary_ops = oneOf("exists !exists", caseless=True)
|
|
100
|
+
self.not_exists_op = (CaselessKeyword("not") | "!") + CaselessKeyword("exists")
|
|
101
|
+
self.bang_exists_op = Suppress("!") + CaselessKeyword("exists")
|
|
102
|
+
|
|
103
|
+
# Define is/is not operators
|
|
104
|
+
self.is_op = CaselessKeyword("is")
|
|
105
|
+
self.is_not_op = CaselessKeyword("is") + CaselessKeyword("not")
|
|
106
|
+
self.bang_is_op = Suppress("!") + CaselessKeyword("is")
|
|
107
|
+
|
|
108
|
+
# Define logical operators
|
|
109
|
+
self.not_kw = CaselessKeyword("not") | "!"
|
|
110
|
+
self.and_kw = CaselessKeyword("and")
|
|
111
|
+
self.or_kw = CaselessKeyword("or")
|
|
112
|
+
self.any_kw = CaselessKeyword("any")
|
|
113
|
+
self.all_kw = CaselessKeyword("all")
|
|
114
|
+
|
|
115
|
+
def _setup_fields_and_values(self):
|
|
116
|
+
"""Set up field and value definitions."""
|
|
117
|
+
# Field names can contain single colons but we need to handle :: for type hints
|
|
118
|
+
# We'll match the field name greedily but stop at ::
|
|
119
|
+
self.field_name = Regex(r"[@a-zA-Z][@a-zA-Z0-9_.:-]*?(?=::|[^@a-zA-Z0-9_.:-]|$)")
|
|
120
|
+
|
|
121
|
+
def _setup_mutators(self):
|
|
122
|
+
"""Set up mutator definitions."""
|
|
123
|
+
# Define mutators
|
|
124
|
+
self.mutator_name = oneOf(
|
|
125
|
+
"lowercase uppercase trim split nslookup geoip_lookup geo "
|
|
126
|
+
"length refang defang b64encode b64decode urldecode "
|
|
127
|
+
"any all avg average max min sum is_private is_global",
|
|
128
|
+
caseless=True,
|
|
129
|
+
)
|
|
130
|
+
self.mutator_param = Group(self.identifier + Suppress("=") + (self.string_literal | self.list_literal))
|
|
131
|
+
self.mutator_params = Group(Suppress("(") + delimitedList(self.mutator_param) + Suppress(")"))
|
|
132
|
+
self.mutator = Group(Suppress("|") + self.mutator_name + PyparsingOptional(self.mutator_params))
|
|
133
|
+
self.mutator_chain = ZeroOrMore(self.mutator)
|
|
134
|
+
|
|
135
|
+
# Field without mutators for geo expression
|
|
136
|
+
self.typed_field_no_mutators = Group(self.field_name + PyparsingOptional(Suppress("::") + self.type_hint))
|
|
137
|
+
|
|
138
|
+
# Field with optional type hint and mutators (field::type | mutator1 | mutator2)
|
|
139
|
+
self.typed_field = Group(
|
|
140
|
+
self.field_name + PyparsingOptional(Suppress("::") + self.type_hint) + self.mutator_chain
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Value with optional mutators (value | mutator1 | mutator2) or ('value' | mutator)
|
|
144
|
+
self.simple_value_with_mutators = Group(self.simple_value + self.mutator_chain)
|
|
145
|
+
self.parenthesized_value = Group(
|
|
146
|
+
Suppress("(") + (self.string_literal | self.number | self.identifier) + self.mutator_chain + Suppress(")")
|
|
147
|
+
)
|
|
148
|
+
self.list_with_mutators = Group(self.list_literal + self.mutator_chain)
|
|
149
|
+
self.value = (
|
|
150
|
+
self.list_with_mutators
|
|
151
|
+
| self.list_literal
|
|
152
|
+
| self.parenthesized_value
|
|
153
|
+
| self.simple_value_with_mutators
|
|
154
|
+
| self.simple_value
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _setup_comparisons(self):
|
|
158
|
+
"""Set up comparison expressions."""
|
|
159
|
+
# Standard comparison with field on left (field op value)
|
|
160
|
+
self.std_comparison = Group(self.typed_field + self.binary_ops + self.value)
|
|
161
|
+
|
|
162
|
+
# Between operator with field and list (field between [val1, val2])
|
|
163
|
+
self.between_comparison_list = Group(self.typed_field + self.between_op + self.list_literal)
|
|
164
|
+
|
|
165
|
+
# Between operator with natural syntax (field between val1 and val2)
|
|
166
|
+
self.between_comparison_natural = Group(
|
|
167
|
+
self.typed_field + self.between_op + self.simple_value + self.and_kw + self.simple_value
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Unary operations (field op)
|
|
171
|
+
self.unary_comparison = Group(self.typed_field + self.unary_ops)
|
|
172
|
+
|
|
173
|
+
# Negated operators
|
|
174
|
+
self.negated_binary_comparison = Group(
|
|
175
|
+
self.typed_field
|
|
176
|
+
+ (
|
|
177
|
+
self.not_in_op
|
|
178
|
+
| self.not_contains_op
|
|
179
|
+
| self.not_startswith_op
|
|
180
|
+
| self.not_endswith_op
|
|
181
|
+
| self.not_regexp_op
|
|
182
|
+
| self.not_cidr_op
|
|
183
|
+
| self.not_any_op
|
|
184
|
+
| self.not_all_op
|
|
185
|
+
| self.not_none_op
|
|
186
|
+
| self.bang_in_op
|
|
187
|
+
| self.bang_contains_op
|
|
188
|
+
| self.bang_startswith_op
|
|
189
|
+
| self.bang_endswith_op
|
|
190
|
+
| self.bang_regexp_op
|
|
191
|
+
| self.bang_cidr_op
|
|
192
|
+
| self.bang_any_op
|
|
193
|
+
| self.bang_all_op
|
|
194
|
+
| self.bang_none_op
|
|
195
|
+
)
|
|
196
|
+
+ self.value
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
self.negated_unary_comparison = Group(self.typed_field + (self.not_exists_op | self.bang_exists_op))
|
|
200
|
+
|
|
201
|
+
self.is_not_comparison = Group(self.typed_field + (self.is_not_op | self.bang_is_op) + self.simple_value)
|
|
202
|
+
|
|
203
|
+
# Not between operators (both syntaxes)
|
|
204
|
+
self.not_between_comparison_list = Group(
|
|
205
|
+
self.typed_field + (self.not_between_op | self.bang_between_op) + self.list_literal
|
|
206
|
+
)
|
|
207
|
+
self.not_between_comparison_natural = Group(
|
|
208
|
+
self.typed_field
|
|
209
|
+
+ (self.not_between_op | self.bang_between_op)
|
|
210
|
+
+ self.simple_value
|
|
211
|
+
+ self.and_kw
|
|
212
|
+
+ self.simple_value
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Define field list for reversed 'in' operator
|
|
216
|
+
self.field_list_item = self.typed_field
|
|
217
|
+
self.field_list = Group(Suppress("[") + delimitedList(self.field_list_item) + Suppress("]"))
|
|
218
|
+
|
|
219
|
+
# Special case for 'in' operator - always value in field(s)
|
|
220
|
+
self.value_in_field = Group(self.value + CaselessKeyword("in") + self.typed_field)
|
|
221
|
+
self.value_in_field_list = Group(self.value + CaselessKeyword("in") + self.field_list)
|
|
222
|
+
|
|
223
|
+
def _setup_special_expressions(self):
|
|
224
|
+
"""Set up special expressions like geo() and nslookup()."""
|
|
225
|
+
# Forward declare for recursive use
|
|
226
|
+
self.comparison_expr = Forward()
|
|
227
|
+
|
|
228
|
+
# Define geo() parenthetical syntax
|
|
229
|
+
self.geo_kw = CaselessKeyword("geo") | CaselessKeyword("geoip_lookup")
|
|
230
|
+
self.geo_conditions = Forward()
|
|
231
|
+
|
|
232
|
+
# Define geo parameters
|
|
233
|
+
self.geo_param_name = Word(alphas, alphanums + "_")
|
|
234
|
+
self.geo_param_value = (
|
|
235
|
+
CaselessKeyword("true")
|
|
236
|
+
| CaselessKeyword("false")
|
|
237
|
+
| QuotedString('"', escChar="\\")
|
|
238
|
+
| QuotedString("'", escChar="\\")
|
|
239
|
+
| Regex(r"\d+")
|
|
240
|
+
)
|
|
241
|
+
self.geo_param = Group(self.geo_param_name + Suppress("=") + self.geo_param_value)
|
|
242
|
+
self.geo_params = PyparsingOptional(Suppress(",") + delimitedList(self.geo_param))
|
|
243
|
+
|
|
244
|
+
# Support multiple geo syntax patterns
|
|
245
|
+
self.geo_empty = Group(
|
|
246
|
+
self.typed_field_no_mutators + Suppress("|") + self.geo_kw + Suppress("(") + Suppress(")")
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
self.geo_params_only = Group(
|
|
250
|
+
self.typed_field_no_mutators
|
|
251
|
+
+ Suppress("|")
|
|
252
|
+
+ self.geo_kw
|
|
253
|
+
+ Suppress("(")
|
|
254
|
+
+ delimitedList(self.geo_param)
|
|
255
|
+
+ Suppress(")")
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
self.geo_conditions_only = Group(
|
|
259
|
+
self.typed_field_no_mutators
|
|
260
|
+
+ Suppress("|")
|
|
261
|
+
+ self.geo_kw
|
|
262
|
+
+ Suppress("(")
|
|
263
|
+
+ self.geo_conditions
|
|
264
|
+
+ Suppress(")")
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
self.geo_conditions_and_params = Group(
|
|
268
|
+
self.typed_field_no_mutators
|
|
269
|
+
+ Suppress("|")
|
|
270
|
+
+ self.geo_kw
|
|
271
|
+
+ Suppress("(")
|
|
272
|
+
+ self.geo_conditions
|
|
273
|
+
+ Suppress(",")
|
|
274
|
+
+ delimitedList(self.geo_param)
|
|
275
|
+
+ Suppress(")")
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
self.geo_params_and_conditions = Group(
|
|
279
|
+
self.typed_field_no_mutators
|
|
280
|
+
+ Suppress("|")
|
|
281
|
+
+ self.geo_kw
|
|
282
|
+
+ Suppress("(")
|
|
283
|
+
+ delimitedList(self.geo_param)
|
|
284
|
+
+ Suppress(",")
|
|
285
|
+
+ self.geo_conditions
|
|
286
|
+
+ Suppress(")")
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Combine all geo expression patterns
|
|
290
|
+
self.geo_mutator_expr = (
|
|
291
|
+
self.geo_params_and_conditions
|
|
292
|
+
| self.geo_conditions_and_params
|
|
293
|
+
| self.geo_conditions_only
|
|
294
|
+
| self.geo_params_only
|
|
295
|
+
| self.geo_empty
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Define nslookup() parenthetical syntax
|
|
299
|
+
self.nslookup_kw = CaselessKeyword("nslookup")
|
|
300
|
+
self.nslookup_conditions = Forward()
|
|
301
|
+
|
|
302
|
+
# Define nslookup parameters
|
|
303
|
+
self.nslookup_param_name = Word(alphas, alphanums + "_")
|
|
304
|
+
self.nslookup_param_value = (
|
|
305
|
+
CaselessKeyword("true")
|
|
306
|
+
| CaselessKeyword("false")
|
|
307
|
+
| QuotedString('"', escChar="\\")
|
|
308
|
+
| QuotedString("'", escChar="\\")
|
|
309
|
+
| self.list_literal
|
|
310
|
+
| Regex(r"\d+")
|
|
311
|
+
)
|
|
312
|
+
self.nslookup_param = Group(self.nslookup_param_name + Suppress("=") + self.nslookup_param_value)
|
|
313
|
+
self.nslookup_params = PyparsingOptional(Suppress(",") + delimitedList(self.nslookup_param))
|
|
314
|
+
|
|
315
|
+
# Support multiple nslookup syntax patterns
|
|
316
|
+
self.nslookup_empty = Group(
|
|
317
|
+
self.typed_field_no_mutators + Suppress("|") + self.nslookup_kw + Suppress("(") + Suppress(")")
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
self.nslookup_params_only = Group(
|
|
321
|
+
self.typed_field_no_mutators
|
|
322
|
+
+ Suppress("|")
|
|
323
|
+
+ self.nslookup_kw
|
|
324
|
+
+ Suppress("(")
|
|
325
|
+
+ delimitedList(self.nslookup_param)
|
|
326
|
+
+ Suppress(")")
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
self.nslookup_conditions_only = Group(
|
|
330
|
+
self.typed_field_no_mutators
|
|
331
|
+
+ Suppress("|")
|
|
332
|
+
+ self.nslookup_kw
|
|
333
|
+
+ Suppress("(")
|
|
334
|
+
+ self.nslookup_conditions
|
|
335
|
+
+ Suppress(")")
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
self.nslookup_conditions_and_params = Group(
|
|
339
|
+
self.typed_field_no_mutators
|
|
340
|
+
+ Suppress("|")
|
|
341
|
+
+ self.nslookup_kw
|
|
342
|
+
+ Suppress("(")
|
|
343
|
+
+ self.nslookup_conditions
|
|
344
|
+
+ Suppress(",")
|
|
345
|
+
+ delimitedList(self.nslookup_param)
|
|
346
|
+
+ Suppress(")")
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
self.nslookup_params_and_conditions = Group(
|
|
350
|
+
self.typed_field_no_mutators
|
|
351
|
+
+ Suppress("|")
|
|
352
|
+
+ self.nslookup_kw
|
|
353
|
+
+ Suppress("(")
|
|
354
|
+
+ delimitedList(self.nslookup_param)
|
|
355
|
+
+ Suppress(",")
|
|
356
|
+
+ self.nslookup_conditions
|
|
357
|
+
+ Suppress(")")
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Combine all nslookup expression patterns
|
|
361
|
+
self.nslookup_mutator_expr = (
|
|
362
|
+
self.nslookup_params_and_conditions
|
|
363
|
+
| self.nslookup_conditions_and_params
|
|
364
|
+
| self.nslookup_conditions_only
|
|
365
|
+
| self.nslookup_params_only
|
|
366
|
+
| self.nslookup_empty
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
def _setup_stats_expressions(self):
|
|
370
|
+
"""Set up statistics expressions."""
|
|
371
|
+
# Define stats expressions
|
|
372
|
+
self.stats_kw = CaselessKeyword("stats")
|
|
373
|
+
self.by_kw = CaselessKeyword("by")
|
|
374
|
+
|
|
375
|
+
# Aggregation function names - including aliases
|
|
376
|
+
self.agg_function_name = oneOf(
|
|
377
|
+
"count unique_count sum min max average avg median med std standard_deviation "
|
|
378
|
+
"percentile percentiles p pct percentile_rank percentile_ranks pct_rank pct_ranks",
|
|
379
|
+
caseless=True,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Special case for count(*)
|
|
383
|
+
self.count_all = CaselessKeyword("count") + Suppress("(") + Suppress("*") + Suppress(")")
|
|
384
|
+
|
|
385
|
+
# Aggregation function with field
|
|
386
|
+
self.agg_function = (
|
|
387
|
+
Group(
|
|
388
|
+
self.agg_function_name
|
|
389
|
+
+ Suppress("(")
|
|
390
|
+
+ self.field_name
|
|
391
|
+
+ PyparsingOptional(
|
|
392
|
+
Suppress(",") + (oneOf("top bottom", caseless=True) + self.number | delimitedList(self.number))
|
|
393
|
+
)
|
|
394
|
+
+ Suppress(")")
|
|
395
|
+
)
|
|
396
|
+
| self.count_all
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Support for aliasing: sum(revenue) as total_revenue
|
|
400
|
+
self.as_kw = CaselessKeyword("as")
|
|
401
|
+
self.agg_with_alias = Group(self.agg_function + PyparsingOptional(self.as_kw + self.identifier))
|
|
402
|
+
|
|
403
|
+
# Multiple aggregations separated by commas
|
|
404
|
+
self.agg_list = delimitedList(self.agg_with_alias)
|
|
405
|
+
|
|
406
|
+
# Group by fields
|
|
407
|
+
self.group_by_fields = delimitedList(self.field_name)
|
|
408
|
+
|
|
409
|
+
# Complete stats expression: | stats agg_functions [by group_fields]
|
|
410
|
+
self.stats_expr = Group(
|
|
411
|
+
Suppress("|") + self.stats_kw + self.agg_list + PyparsingOptional(self.by_kw + self.group_by_fields)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
def _setup_final_expressions(self):
|
|
415
|
+
"""Set up final expression definitions."""
|
|
416
|
+
# Define all forms of comparison
|
|
417
|
+
self.comparison_expr << (
|
|
418
|
+
self.negated_binary_comparison
|
|
419
|
+
| self.negated_unary_comparison
|
|
420
|
+
| self.is_not_comparison
|
|
421
|
+
| self.not_between_comparison_natural
|
|
422
|
+
| self.not_between_comparison_list
|
|
423
|
+
| self.std_comparison
|
|
424
|
+
| self.between_comparison_natural
|
|
425
|
+
| self.between_comparison_list
|
|
426
|
+
| self.unary_comparison
|
|
427
|
+
| self.value_in_field_list
|
|
428
|
+
| self.value_in_field
|
|
429
|
+
| self.typed_field
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Create a combined expression that includes regular comparisons, geo, and nslookup expressions
|
|
433
|
+
self.base_expr = self.geo_mutator_expr | self.nslookup_mutator_expr | self.comparison_expr
|
|
434
|
+
|
|
435
|
+
# Define filter expression with operator precedence
|
|
436
|
+
self.filter_expr = infixNotation(
|
|
437
|
+
self.base_expr,
|
|
438
|
+
[
|
|
439
|
+
(self.not_kw, 1, opAssoc.RIGHT),
|
|
440
|
+
(self.and_kw, 2, opAssoc.LEFT),
|
|
441
|
+
(self.or_kw, 2, opAssoc.LEFT),
|
|
442
|
+
],
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# Define the complete TQL expression
|
|
446
|
+
self.tql_expr = Forward()
|
|
447
|
+
self.tql_expr << (
|
|
448
|
+
# filter | stats
|
|
449
|
+
(
|
|
450
|
+
Group(self.filter_expr + self.stats_expr) # filter | stats
|
|
451
|
+
| self.stats_expr # just stats (applies to all records)
|
|
452
|
+
| self.filter_expr
|
|
453
|
+
) # just filter (no stats)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# Define geo_conditions and nslookup_conditions
|
|
457
|
+
self.geo_conditions << infixNotation(
|
|
458
|
+
self.comparison_expr,
|
|
459
|
+
[
|
|
460
|
+
(self.not_kw, 1, opAssoc.RIGHT),
|
|
461
|
+
(self.and_kw, 2, opAssoc.LEFT),
|
|
462
|
+
(self.or_kw, 2, opAssoc.LEFT),
|
|
463
|
+
],
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
self.nslookup_conditions << infixNotation(
|
|
467
|
+
self.comparison_expr,
|
|
468
|
+
[
|
|
469
|
+
(self.not_kw, 1, opAssoc.RIGHT),
|
|
470
|
+
(self.and_kw, 2, opAssoc.LEFT),
|
|
471
|
+
(self.or_kw, 2, opAssoc.LEFT),
|
|
472
|
+
],
|
|
473
|
+
)
|