tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,473 @@
1
+ """TQL Grammar definitions using pyparsing."""
2
+
3
+ from pyparsing import (
4
+ CaselessKeyword,
5
+ Forward,
6
+ Group,
7
+ )
8
+ from pyparsing import Optional as PyparsingOptional
9
+ from pyparsing import (
10
+ QuotedString,
11
+ Regex,
12
+ Suppress,
13
+ Word,
14
+ ZeroOrMore,
15
+ alphanums,
16
+ alphas,
17
+ delimitedList,
18
+ infixNotation,
19
+ nums,
20
+ oneOf,
21
+ opAssoc,
22
+ )
23
+
24
+
25
+ class TQLGrammar:
26
+ """TQL grammar definitions.
27
+
28
+ This class contains all the pyparsing grammar definitions for TQL,
29
+ including tokens, operators, expressions, and special syntax.
30
+ """
31
+
32
+ def __init__(self):
33
+ """Initialize all grammar elements."""
34
+ self._setup_basic_tokens()
35
+ self._setup_operators()
36
+ self._setup_fields_and_values()
37
+ self._setup_mutators()
38
+ self._setup_comparisons()
39
+ self._setup_special_expressions()
40
+ self._setup_stats_expressions()
41
+ self._setup_final_expressions()
42
+
43
+ def _setup_basic_tokens(self):
44
+ """Set up basic tokens and literals."""
45
+ # Basic tokens
46
+ self.identifier = Word(alphas, alphanums + "_.-")
47
+ self.number = Word(nums + ".-")
48
+ self.string_literal = QuotedString('"') | QuotedString("'")
49
+ # CIDR notation for IP addresses (e.g., 192.168.1.0/24)
50
+ self.cidr_notation = Word(nums + "./")
51
+ # Define list items as strings, numbers, or identifiers
52
+ self.list_item = self.string_literal | self.number | self.identifier
53
+ self.list_literal = Group(Suppress("[") + delimitedList(self.list_item) + Suppress("]"))
54
+
55
+ # Define simple values – note order matters (try string literals first, then CIDR)
56
+ self.simple_value = self.string_literal | self.cidr_notation | self.number | self.identifier
57
+
58
+ # Define type hints
59
+ self.type_hint = oneOf("number int float decimal date array bool boolean geo object string", caseless=True)
60
+
61
+ def _setup_operators(self):
62
+ """Set up operator definitions."""
63
+ # Define binary operators (require a value) - != must come before ! operators
64
+ self.binary_ops = oneOf(
65
+ "!= " # != must be before ! operators
66
+ + "!contains !in !startswith !endswith !regexp !cidr !is !between "
67
+ + "regexp in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
68
+ caseless=True,
69
+ )
70
+
71
+ # Define negated binary operators (using space-separated keywords or ! prefix)
72
+ self.not_in_op = (CaselessKeyword("not") | "!") + CaselessKeyword("in")
73
+ self.not_contains_op = (CaselessKeyword("not") | "!") + CaselessKeyword("contains")
74
+ self.not_startswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("startswith")
75
+ self.not_endswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("endswith")
76
+ self.not_regexp_op = (CaselessKeyword("not") | "!") + CaselessKeyword("regexp")
77
+ self.not_cidr_op = (CaselessKeyword("not") | "!") + CaselessKeyword("cidr")
78
+ self.not_any_op = (CaselessKeyword("not") | "!") + CaselessKeyword("any")
79
+ self.not_all_op = (CaselessKeyword("not") | "!") + CaselessKeyword("all")
80
+ self.not_none_op = (CaselessKeyword("not") | "!") + CaselessKeyword("none")
81
+
82
+ # Also support !contains, !startswith etc. as single tokens
83
+ self.bang_in_op = Suppress("!") + CaselessKeyword("in")
84
+ self.bang_contains_op = Suppress("!") + CaselessKeyword("contains")
85
+ self.bang_startswith_op = Suppress("!") + CaselessKeyword("startswith")
86
+ self.bang_endswith_op = Suppress("!") + CaselessKeyword("endswith")
87
+ self.bang_regexp_op = Suppress("!") + CaselessKeyword("regexp")
88
+ self.bang_cidr_op = Suppress("!") + CaselessKeyword("cidr")
89
+ self.bang_any_op = Suppress("!") + CaselessKeyword("any")
90
+ self.bang_all_op = Suppress("!") + CaselessKeyword("all")
91
+ self.bang_none_op = Suppress("!") + CaselessKeyword("none")
92
+
93
+ # Add between operator separately as it has special handling
94
+ self.between_op = CaselessKeyword("between")
95
+ self.not_between_op = (CaselessKeyword("not") | "!") + CaselessKeyword("between")
96
+ self.bang_between_op = Suppress("!") + CaselessKeyword("between")
97
+
98
+ # Define unary operators (no value required)
99
+ self.unary_ops = oneOf("exists !exists", caseless=True)
100
+ self.not_exists_op = (CaselessKeyword("not") | "!") + CaselessKeyword("exists")
101
+ self.bang_exists_op = Suppress("!") + CaselessKeyword("exists")
102
+
103
+ # Define is/is not operators
104
+ self.is_op = CaselessKeyword("is")
105
+ self.is_not_op = CaselessKeyword("is") + CaselessKeyword("not")
106
+ self.bang_is_op = Suppress("!") + CaselessKeyword("is")
107
+
108
+ # Define logical operators
109
+ self.not_kw = CaselessKeyword("not") | "!"
110
+ self.and_kw = CaselessKeyword("and")
111
+ self.or_kw = CaselessKeyword("or")
112
+ self.any_kw = CaselessKeyword("any")
113
+ self.all_kw = CaselessKeyword("all")
114
+
115
+ def _setup_fields_and_values(self):
116
+ """Set up field and value definitions."""
117
+ # Field names can contain single colons but we need to handle :: for type hints
118
+ # We'll match the field name greedily but stop at ::
119
+ self.field_name = Regex(r"[@a-zA-Z][@a-zA-Z0-9_.:-]*?(?=::|[^@a-zA-Z0-9_.:-]|$)")
120
+
121
+ def _setup_mutators(self):
122
+ """Set up mutator definitions."""
123
+ # Define mutators
124
+ self.mutator_name = oneOf(
125
+ "lowercase uppercase trim split nslookup geoip_lookup geo "
126
+ "length refang defang b64encode b64decode urldecode "
127
+ "any all avg average max min sum is_private is_global",
128
+ caseless=True,
129
+ )
130
+ self.mutator_param = Group(self.identifier + Suppress("=") + (self.string_literal | self.list_literal))
131
+ self.mutator_params = Group(Suppress("(") + delimitedList(self.mutator_param) + Suppress(")"))
132
+ self.mutator = Group(Suppress("|") + self.mutator_name + PyparsingOptional(self.mutator_params))
133
+ self.mutator_chain = ZeroOrMore(self.mutator)
134
+
135
+ # Field without mutators for geo expression
136
+ self.typed_field_no_mutators = Group(self.field_name + PyparsingOptional(Suppress("::") + self.type_hint))
137
+
138
+ # Field with optional type hint and mutators (field::type | mutator1 | mutator2)
139
+ self.typed_field = Group(
140
+ self.field_name + PyparsingOptional(Suppress("::") + self.type_hint) + self.mutator_chain
141
+ )
142
+
143
+ # Value with optional mutators (value | mutator1 | mutator2) or ('value' | mutator)
144
+ self.simple_value_with_mutators = Group(self.simple_value + self.mutator_chain)
145
+ self.parenthesized_value = Group(
146
+ Suppress("(") + (self.string_literal | self.number | self.identifier) + self.mutator_chain + Suppress(")")
147
+ )
148
+ self.list_with_mutators = Group(self.list_literal + self.mutator_chain)
149
+ self.value = (
150
+ self.list_with_mutators
151
+ | self.list_literal
152
+ | self.parenthesized_value
153
+ | self.simple_value_with_mutators
154
+ | self.simple_value
155
+ )
156
+
157
+ def _setup_comparisons(self):
158
+ """Set up comparison expressions."""
159
+ # Standard comparison with field on left (field op value)
160
+ self.std_comparison = Group(self.typed_field + self.binary_ops + self.value)
161
+
162
+ # Between operator with field and list (field between [val1, val2])
163
+ self.between_comparison_list = Group(self.typed_field + self.between_op + self.list_literal)
164
+
165
+ # Between operator with natural syntax (field between val1 and val2)
166
+ self.between_comparison_natural = Group(
167
+ self.typed_field + self.between_op + self.simple_value + self.and_kw + self.simple_value
168
+ )
169
+
170
+ # Unary operations (field op)
171
+ self.unary_comparison = Group(self.typed_field + self.unary_ops)
172
+
173
+ # Negated operators
174
+ self.negated_binary_comparison = Group(
175
+ self.typed_field
176
+ + (
177
+ self.not_in_op
178
+ | self.not_contains_op
179
+ | self.not_startswith_op
180
+ | self.not_endswith_op
181
+ | self.not_regexp_op
182
+ | self.not_cidr_op
183
+ | self.not_any_op
184
+ | self.not_all_op
185
+ | self.not_none_op
186
+ | self.bang_in_op
187
+ | self.bang_contains_op
188
+ | self.bang_startswith_op
189
+ | self.bang_endswith_op
190
+ | self.bang_regexp_op
191
+ | self.bang_cidr_op
192
+ | self.bang_any_op
193
+ | self.bang_all_op
194
+ | self.bang_none_op
195
+ )
196
+ + self.value
197
+ )
198
+
199
+ self.negated_unary_comparison = Group(self.typed_field + (self.not_exists_op | self.bang_exists_op))
200
+
201
+ self.is_not_comparison = Group(self.typed_field + (self.is_not_op | self.bang_is_op) + self.simple_value)
202
+
203
+ # Not between operators (both syntaxes)
204
+ self.not_between_comparison_list = Group(
205
+ self.typed_field + (self.not_between_op | self.bang_between_op) + self.list_literal
206
+ )
207
+ self.not_between_comparison_natural = Group(
208
+ self.typed_field
209
+ + (self.not_between_op | self.bang_between_op)
210
+ + self.simple_value
211
+ + self.and_kw
212
+ + self.simple_value
213
+ )
214
+
215
+ # Define field list for reversed 'in' operator
216
+ self.field_list_item = self.typed_field
217
+ self.field_list = Group(Suppress("[") + delimitedList(self.field_list_item) + Suppress("]"))
218
+
219
+ # Special case for 'in' operator - always value in field(s)
220
+ self.value_in_field = Group(self.value + CaselessKeyword("in") + self.typed_field)
221
+ self.value_in_field_list = Group(self.value + CaselessKeyword("in") + self.field_list)
222
+
223
+ def _setup_special_expressions(self):
224
+ """Set up special expressions like geo() and nslookup()."""
225
+ # Forward declare for recursive use
226
+ self.comparison_expr = Forward()
227
+
228
+ # Define geo() parenthetical syntax
229
+ self.geo_kw = CaselessKeyword("geo") | CaselessKeyword("geoip_lookup")
230
+ self.geo_conditions = Forward()
231
+
232
+ # Define geo parameters
233
+ self.geo_param_name = Word(alphas, alphanums + "_")
234
+ self.geo_param_value = (
235
+ CaselessKeyword("true")
236
+ | CaselessKeyword("false")
237
+ | QuotedString('"', escChar="\\")
238
+ | QuotedString("'", escChar="\\")
239
+ | Regex(r"\d+")
240
+ )
241
+ self.geo_param = Group(self.geo_param_name + Suppress("=") + self.geo_param_value)
242
+ self.geo_params = PyparsingOptional(Suppress(",") + delimitedList(self.geo_param))
243
+
244
+ # Support multiple geo syntax patterns
245
+ self.geo_empty = Group(
246
+ self.typed_field_no_mutators + Suppress("|") + self.geo_kw + Suppress("(") + Suppress(")")
247
+ )
248
+
249
+ self.geo_params_only = Group(
250
+ self.typed_field_no_mutators
251
+ + Suppress("|")
252
+ + self.geo_kw
253
+ + Suppress("(")
254
+ + delimitedList(self.geo_param)
255
+ + Suppress(")")
256
+ )
257
+
258
+ self.geo_conditions_only = Group(
259
+ self.typed_field_no_mutators
260
+ + Suppress("|")
261
+ + self.geo_kw
262
+ + Suppress("(")
263
+ + self.geo_conditions
264
+ + Suppress(")")
265
+ )
266
+
267
+ self.geo_conditions_and_params = Group(
268
+ self.typed_field_no_mutators
269
+ + Suppress("|")
270
+ + self.geo_kw
271
+ + Suppress("(")
272
+ + self.geo_conditions
273
+ + Suppress(",")
274
+ + delimitedList(self.geo_param)
275
+ + Suppress(")")
276
+ )
277
+
278
+ self.geo_params_and_conditions = Group(
279
+ self.typed_field_no_mutators
280
+ + Suppress("|")
281
+ + self.geo_kw
282
+ + Suppress("(")
283
+ + delimitedList(self.geo_param)
284
+ + Suppress(",")
285
+ + self.geo_conditions
286
+ + Suppress(")")
287
+ )
288
+
289
+ # Combine all geo expression patterns
290
+ self.geo_mutator_expr = (
291
+ self.geo_params_and_conditions
292
+ | self.geo_conditions_and_params
293
+ | self.geo_conditions_only
294
+ | self.geo_params_only
295
+ | self.geo_empty
296
+ )
297
+
298
+ # Define nslookup() parenthetical syntax
299
+ self.nslookup_kw = CaselessKeyword("nslookup")
300
+ self.nslookup_conditions = Forward()
301
+
302
+ # Define nslookup parameters
303
+ self.nslookup_param_name = Word(alphas, alphanums + "_")
304
+ self.nslookup_param_value = (
305
+ CaselessKeyword("true")
306
+ | CaselessKeyword("false")
307
+ | QuotedString('"', escChar="\\")
308
+ | QuotedString("'", escChar="\\")
309
+ | self.list_literal
310
+ | Regex(r"\d+")
311
+ )
312
+ self.nslookup_param = Group(self.nslookup_param_name + Suppress("=") + self.nslookup_param_value)
313
+ self.nslookup_params = PyparsingOptional(Suppress(",") + delimitedList(self.nslookup_param))
314
+
315
+ # Support multiple nslookup syntax patterns
316
+ self.nslookup_empty = Group(
317
+ self.typed_field_no_mutators + Suppress("|") + self.nslookup_kw + Suppress("(") + Suppress(")")
318
+ )
319
+
320
+ self.nslookup_params_only = Group(
321
+ self.typed_field_no_mutators
322
+ + Suppress("|")
323
+ + self.nslookup_kw
324
+ + Suppress("(")
325
+ + delimitedList(self.nslookup_param)
326
+ + Suppress(")")
327
+ )
328
+
329
+ self.nslookup_conditions_only = Group(
330
+ self.typed_field_no_mutators
331
+ + Suppress("|")
332
+ + self.nslookup_kw
333
+ + Suppress("(")
334
+ + self.nslookup_conditions
335
+ + Suppress(")")
336
+ )
337
+
338
+ self.nslookup_conditions_and_params = Group(
339
+ self.typed_field_no_mutators
340
+ + Suppress("|")
341
+ + self.nslookup_kw
342
+ + Suppress("(")
343
+ + self.nslookup_conditions
344
+ + Suppress(",")
345
+ + delimitedList(self.nslookup_param)
346
+ + Suppress(")")
347
+ )
348
+
349
+ self.nslookup_params_and_conditions = Group(
350
+ self.typed_field_no_mutators
351
+ + Suppress("|")
352
+ + self.nslookup_kw
353
+ + Suppress("(")
354
+ + delimitedList(self.nslookup_param)
355
+ + Suppress(",")
356
+ + self.nslookup_conditions
357
+ + Suppress(")")
358
+ )
359
+
360
+ # Combine all nslookup expression patterns
361
+ self.nslookup_mutator_expr = (
362
+ self.nslookup_params_and_conditions
363
+ | self.nslookup_conditions_and_params
364
+ | self.nslookup_conditions_only
365
+ | self.nslookup_params_only
366
+ | self.nslookup_empty
367
+ )
368
+
369
+ def _setup_stats_expressions(self):
370
+ """Set up statistics expressions."""
371
+ # Define stats expressions
372
+ self.stats_kw = CaselessKeyword("stats")
373
+ self.by_kw = CaselessKeyword("by")
374
+
375
+ # Aggregation function names - including aliases
376
+ self.agg_function_name = oneOf(
377
+ "count unique_count sum min max average avg median med std standard_deviation "
378
+ "percentile percentiles p pct percentile_rank percentile_ranks pct_rank pct_ranks",
379
+ caseless=True,
380
+ )
381
+
382
+ # Special case for count(*)
383
+ self.count_all = CaselessKeyword("count") + Suppress("(") + Suppress("*") + Suppress(")")
384
+
385
+ # Aggregation function with field
386
+ self.agg_function = (
387
+ Group(
388
+ self.agg_function_name
389
+ + Suppress("(")
390
+ + self.field_name
391
+ + PyparsingOptional(
392
+ Suppress(",") + (oneOf("top bottom", caseless=True) + self.number | delimitedList(self.number))
393
+ )
394
+ + Suppress(")")
395
+ )
396
+ | self.count_all
397
+ )
398
+
399
+ # Support for aliasing: sum(revenue) as total_revenue
400
+ self.as_kw = CaselessKeyword("as")
401
+ self.agg_with_alias = Group(self.agg_function + PyparsingOptional(self.as_kw + self.identifier))
402
+
403
+ # Multiple aggregations separated by commas
404
+ self.agg_list = delimitedList(self.agg_with_alias)
405
+
406
+ # Group by fields
407
+ self.group_by_fields = delimitedList(self.field_name)
408
+
409
+ # Complete stats expression: | stats agg_functions [by group_fields]
410
+ self.stats_expr = Group(
411
+ Suppress("|") + self.stats_kw + self.agg_list + PyparsingOptional(self.by_kw + self.group_by_fields)
412
+ )
413
+
414
+ def _setup_final_expressions(self):
415
+ """Set up final expression definitions."""
416
+ # Define all forms of comparison
417
+ self.comparison_expr << (
418
+ self.negated_binary_comparison
419
+ | self.negated_unary_comparison
420
+ | self.is_not_comparison
421
+ | self.not_between_comparison_natural
422
+ | self.not_between_comparison_list
423
+ | self.std_comparison
424
+ | self.between_comparison_natural
425
+ | self.between_comparison_list
426
+ | self.unary_comparison
427
+ | self.value_in_field_list
428
+ | self.value_in_field
429
+ | self.typed_field
430
+ )
431
+
432
+ # Create a combined expression that includes regular comparisons, geo, and nslookup expressions
433
+ self.base_expr = self.geo_mutator_expr | self.nslookup_mutator_expr | self.comparison_expr
434
+
435
+ # Define filter expression with operator precedence
436
+ self.filter_expr = infixNotation(
437
+ self.base_expr,
438
+ [
439
+ (self.not_kw, 1, opAssoc.RIGHT),
440
+ (self.and_kw, 2, opAssoc.LEFT),
441
+ (self.or_kw, 2, opAssoc.LEFT),
442
+ ],
443
+ )
444
+
445
+ # Define the complete TQL expression
446
+ self.tql_expr = Forward()
447
+ self.tql_expr << (
448
+ # filter | stats
449
+ (
450
+ Group(self.filter_expr + self.stats_expr) # filter | stats
451
+ | self.stats_expr # just stats (applies to all records)
452
+ | self.filter_expr
453
+ ) # just filter (no stats)
454
+ )
455
+
456
+ # Define geo_conditions and nslookup_conditions
457
+ self.geo_conditions << infixNotation(
458
+ self.comparison_expr,
459
+ [
460
+ (self.not_kw, 1, opAssoc.RIGHT),
461
+ (self.and_kw, 2, opAssoc.LEFT),
462
+ (self.or_kw, 2, opAssoc.LEFT),
463
+ ],
464
+ )
465
+
466
+ self.nslookup_conditions << infixNotation(
467
+ self.comparison_expr,
468
+ [
469
+ (self.not_kw, 1, opAssoc.RIGHT),
470
+ (self.and_kw, 2, opAssoc.LEFT),
471
+ (self.or_kw, 2, opAssoc.LEFT),
472
+ ],
473
+ )