tellaro-query-language 0.2.17__tar.gz → 0.2.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/PKG-INFO +1 -1
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/pyproject.toml +1 -1
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/value_comparison.py +20 -8
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/encoding.py +7 -6
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/field_mapping.py +1 -1
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/lucene_converter.py +1 -1
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/query_converter.py +2 -2
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser.py +2 -2
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/grammar.py +80 -44
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/LICENSE +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/README.md +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/analyzer.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/cache/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/cache/base.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/cache/memory.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/cache/redis.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/cli.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/README.md +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/file_operations.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/opensearch_operations.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/stats_operations.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/validation_operations.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/README.md +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/field_access.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/special_expressions.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/exceptions.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/field_type_inference.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/geoip_normalizer.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutator_analyzer.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/base.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/dns.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/geo.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/list.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/network.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/security.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/string.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/README.md +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_mappings.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_stats.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/README.md +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/__init__.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/ast_builder.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/error_analyzer.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/field_extractor.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/post_processor.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/scripts.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/stats_evaluator.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/stats_transformer.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/streaming_file_processor.py +0 -0
- {tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/validators.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "tellaro-query-language"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.19"
|
|
4
4
|
description = "A flexible, human-friendly query language for searching and filtering structured data"
|
|
5
5
|
authors = ["Justin Henderson <justin@tellaro.io>"]
|
|
6
6
|
license = "Proprietary"
|
|
@@ -56,7 +56,14 @@ class ValueComparator:
|
|
|
56
56
|
return False # Missing fields return False for all "is not" comparisons
|
|
57
57
|
# For negated string operators, missing fields should return True
|
|
58
58
|
# (e.g., if field doesn't exist, it doesn't contain/start with/end with the value)
|
|
59
|
-
elif operator in [
|
|
59
|
+
elif operator in [
|
|
60
|
+
"not_contains",
|
|
61
|
+
"not_startswith",
|
|
62
|
+
"not_endswith",
|
|
63
|
+
"not_regexp",
|
|
64
|
+
"not_matches",
|
|
65
|
+
"not_regex",
|
|
66
|
+
]:
|
|
60
67
|
return True
|
|
61
68
|
# For not_cidr, missing fields should return False (can't check CIDR on missing IP)
|
|
62
69
|
elif operator in ["cidr", "not_cidr"]:
|
|
@@ -94,18 +101,23 @@ class ValueComparator:
|
|
|
94
101
|
if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
|
|
95
102
|
field_value = field_value.lower() == "true"
|
|
96
103
|
|
|
97
|
-
# Type compatibility check for
|
|
98
|
-
#
|
|
104
|
+
# Type compatibility check for comparison operators
|
|
105
|
+
# For >, >=, <, <= operators:
|
|
106
|
+
# - Numeric comparison is preferred if both values are numeric
|
|
107
|
+
# - String comparison is allowed (supports ISO 8601 timestamps which sort correctly as strings)
|
|
108
|
+
# - Mixed types (one numeric, one string) return False
|
|
99
109
|
# Exception: Arrays are handled specially in the operator logic below
|
|
100
110
|
if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
|
|
101
111
|
# Skip check if field_value is an array - handled by array logic below
|
|
102
112
|
if not isinstance(field_value, (list, tuple)):
|
|
103
113
|
field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
|
|
104
114
|
expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
|
|
115
|
+
field_is_string = isinstance(field_value, str)
|
|
116
|
+
expected_is_string = isinstance(expected_value, str)
|
|
105
117
|
|
|
106
|
-
if
|
|
107
|
-
|
|
108
|
-
#
|
|
118
|
+
# Allow comparison if both are numeric OR both are strings
|
|
119
|
+
if not ((field_is_numeric and expected_is_numeric) or (field_is_string and expected_is_string)):
|
|
120
|
+
# Mixed types or unsupported types - cannot compare
|
|
109
121
|
return False
|
|
110
122
|
|
|
111
123
|
try:
|
|
@@ -184,7 +196,7 @@ class ValueComparator:
|
|
|
184
196
|
return field_value in converted_list
|
|
185
197
|
else:
|
|
186
198
|
return field_value == expected_value
|
|
187
|
-
elif operator
|
|
199
|
+
elif operator in ("regexp", "matches", "regex"):
|
|
188
200
|
# Unwrap single-element lists for string operators
|
|
189
201
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
190
202
|
expected_value = expected_value[0]
|
|
@@ -259,7 +271,7 @@ class ValueComparator:
|
|
|
259
271
|
expected_value = expected_value[0]
|
|
260
272
|
# Case-insensitive comparison to match post-processor behavior
|
|
261
273
|
return not str(field_value).lower().endswith(str(expected_value).lower())
|
|
262
|
-
elif operator
|
|
274
|
+
elif operator in ("not_regexp", "not_matches", "not_regex"):
|
|
263
275
|
# Unwrap single-element lists for string operators
|
|
264
276
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
265
277
|
expected_value = expected_value[0]
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/encoding.py
RENAMED
|
@@ -337,26 +337,27 @@ class Md5Mutator(BaseMutator):
|
|
|
337
337
|
append_field = self.params.get("field")
|
|
338
338
|
|
|
339
339
|
# Handle different input types
|
|
340
|
+
# Note: MD5 is used here for data fingerprinting/checksums, not security
|
|
340
341
|
hashed_value: Any
|
|
341
342
|
if value is None:
|
|
342
343
|
hashed_value = None
|
|
343
344
|
elif isinstance(value, str):
|
|
344
|
-
hashed_value = hashlib.md5(value.encode("utf-8")).hexdigest()
|
|
345
|
+
hashed_value = hashlib.md5(value.encode("utf-8"), usedforsecurity=False).hexdigest()
|
|
345
346
|
elif isinstance(value, bytes):
|
|
346
|
-
hashed_value = hashlib.md5(value).hexdigest()
|
|
347
|
+
hashed_value = hashlib.md5(value, usedforsecurity=False).hexdigest()
|
|
347
348
|
elif isinstance(value, list):
|
|
348
349
|
hashed_value = []
|
|
349
350
|
for item in value:
|
|
350
351
|
if isinstance(item, str):
|
|
351
|
-
hashed_value.append(hashlib.md5(item.encode("utf-8")).hexdigest())
|
|
352
|
+
hashed_value.append(hashlib.md5(item.encode("utf-8"), usedforsecurity=False).hexdigest())
|
|
352
353
|
elif isinstance(item, bytes):
|
|
353
|
-
hashed_value.append(hashlib.md5(item).hexdigest())
|
|
354
|
+
hashed_value.append(hashlib.md5(item, usedforsecurity=False).hexdigest())
|
|
354
355
|
elif item is None:
|
|
355
356
|
hashed_value.append(None)
|
|
356
357
|
else:
|
|
357
|
-
hashed_value.append(hashlib.md5(str(item).encode("utf-8")).hexdigest())
|
|
358
|
+
hashed_value.append(hashlib.md5(str(item).encode("utf-8"), usedforsecurity=False).hexdigest())
|
|
358
359
|
else:
|
|
359
|
-
hashed_value = hashlib.md5(str(value).encode("utf-8")).hexdigest()
|
|
360
|
+
hashed_value = hashlib.md5(str(value).encode("utf-8"), usedforsecurity=False).hexdigest()
|
|
360
361
|
|
|
361
362
|
# If field is specified, add to record and return original value
|
|
362
363
|
if append_field:
|
|
@@ -231,7 +231,7 @@ class FieldMapping:
|
|
|
231
231
|
}
|
|
232
232
|
|
|
233
233
|
# Operators that work best with text fields (full-text search)
|
|
234
|
-
text_operators = {"contains", "regexp", "not_regexp"}
|
|
234
|
+
text_operators = {"contains", "regexp", "matches", "regex", "not_regexp", "not_matches", "not_regex"}
|
|
235
235
|
|
|
236
236
|
# Operators that require numeric/date fields
|
|
237
237
|
range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
|
|
@@ -105,7 +105,7 @@ class LuceneConverter:
|
|
|
105
105
|
return f"{lucene_field}:({' OR '.join(escaped_values)})"
|
|
106
106
|
else:
|
|
107
107
|
return f"{lucene_field}:{escaped_value}"
|
|
108
|
-
elif operator
|
|
108
|
+
elif operator in ("regexp", "matches", "regex"):
|
|
109
109
|
return f"{lucene_field}:/{escaped_value}/"
|
|
110
110
|
elif operator == "exists":
|
|
111
111
|
return f"_exists_:{lucene_field}"
|
|
@@ -315,7 +315,7 @@ class QueryConverter:
|
|
|
315
315
|
return {"terms": {opensearch_field: value}}
|
|
316
316
|
else:
|
|
317
317
|
return {"term": {opensearch_field: value}}
|
|
318
|
-
elif operator
|
|
318
|
+
elif operator in ("regexp", "matches", "regex"):
|
|
319
319
|
# Unwrap single-element lists for string operators
|
|
320
320
|
if isinstance(value, list) and len(value) == 1:
|
|
321
321
|
value = value[0]
|
|
@@ -390,7 +390,7 @@ class QueryConverter:
|
|
|
390
390
|
if isinstance(value, list) and len(value) == 1:
|
|
391
391
|
value = value[0]
|
|
392
392
|
return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
|
|
393
|
-
elif operator
|
|
393
|
+
elif operator in ("not_regexp", "not_matches", "not_regex"):
|
|
394
394
|
# Unwrap single-element lists for string operators
|
|
395
395
|
if isinstance(value, list) and len(value) == 1:
|
|
396
396
|
value = value[0]
|
|
@@ -14,7 +14,7 @@ from .parser_components.error_analyzer import ErrorAnalyzer
|
|
|
14
14
|
from .parser_components.field_extractor import FieldExtractor
|
|
15
15
|
from .parser_components.grammar import TQLGrammar
|
|
16
16
|
|
|
17
|
-
ParserElement.
|
|
17
|
+
ParserElement.enable_packrat()
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class TQLParser:
|
|
@@ -53,7 +53,7 @@ class TQLParser:
|
|
|
53
53
|
|
|
54
54
|
try:
|
|
55
55
|
# Parse the query
|
|
56
|
-
parsed_result = self.grammar.tql_expr.
|
|
56
|
+
parsed_result = self.grammar.tql_expr.parse_string(query, parse_all=True)
|
|
57
57
|
|
|
58
58
|
# Convert to our AST format
|
|
59
59
|
# Start depth counting at 0 from parse() entry point
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/grammar.py
RENAMED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from pyparsing import (
|
|
4
4
|
CaselessKeyword,
|
|
5
|
+
DelimitedList,
|
|
5
6
|
Forward,
|
|
6
7
|
Group,
|
|
7
8
|
Literal,
|
|
@@ -15,10 +16,8 @@ from pyparsing import (
|
|
|
15
16
|
ZeroOrMore,
|
|
16
17
|
alphanums,
|
|
17
18
|
alphas,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
nums,
|
|
21
|
-
oneOf,
|
|
19
|
+
infix_notation,
|
|
20
|
+
one_of,
|
|
22
21
|
opAssoc,
|
|
23
22
|
)
|
|
24
23
|
|
|
@@ -45,27 +44,54 @@ class TQLGrammar:
|
|
|
45
44
|
"""Set up basic tokens and literals."""
|
|
46
45
|
# Basic tokens
|
|
47
46
|
self.identifier = Word(alphas, alphanums + "_.-")
|
|
48
|
-
|
|
47
|
+
# Number pattern supports:
|
|
48
|
+
# - Integers: 123, -456
|
|
49
|
+
# - Floats: 1.5, -3.14
|
|
50
|
+
# - Scientific notation: 1.0e5, 1.5e-3, 2E+10
|
|
51
|
+
# Pattern matches Rust's float grammar for parity
|
|
52
|
+
self.scientific_number = Regex(r"-?\d+\.\d+[eE][+-]?\d+")
|
|
53
|
+
self.regular_number = Regex(r"-?\d+(\.\d+)?")
|
|
54
|
+
self.number = self.scientific_number | self.regular_number
|
|
49
55
|
self.string_literal = QuotedString('"') | QuotedString("'")
|
|
50
56
|
# CIDR notation for IP addresses (e.g., 192.168.1.0/24)
|
|
51
|
-
self.cidr_notation =
|
|
57
|
+
self.cidr_notation = Regex(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/\d{1,2}")
|
|
58
|
+
# IP address (without mask) - matches 4 octets separated by dots
|
|
59
|
+
self.ip_address = Regex(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
|
|
60
|
+
# Dot-separated numeric values (like partial IPs: 10.0.0, version numbers: 1.2.3)
|
|
61
|
+
# This allows values like "10.0.0" to be matched as a single token
|
|
62
|
+
self.dotted_number = Regex(r"\d+(\.\d+){2,}")
|
|
52
63
|
# Define list items as strings, numbers, or identifiers
|
|
53
64
|
self.list_item = self.string_literal | self.number | self.identifier
|
|
54
|
-
self.list_literal = Group(Suppress("[") +
|
|
55
|
-
|
|
56
|
-
# Define simple values –
|
|
57
|
-
|
|
65
|
+
self.list_literal = Group(Suppress("[") + DelimitedList(self.list_item) + Suppress("]"))
|
|
66
|
+
|
|
67
|
+
# Define simple values – order matters:
|
|
68
|
+
# 1. String literals (quoted)
|
|
69
|
+
# 2. CIDR notation (IP/mask format) - must come before IP address
|
|
70
|
+
# 3. IP address (4 octets without mask)
|
|
71
|
+
# 4. Dotted numbers (partial IPs like 10.0.0, versions like 1.2.3)
|
|
72
|
+
# 5. Scientific notation (must come before regular numbers to avoid partial match)
|
|
73
|
+
# 6. Regular numbers
|
|
74
|
+
# 7. Identifiers (unquoted strings)
|
|
75
|
+
self.simple_value = (
|
|
76
|
+
self.string_literal
|
|
77
|
+
| self.cidr_notation
|
|
78
|
+
| self.ip_address
|
|
79
|
+
| self.dotted_number
|
|
80
|
+
| self.scientific_number
|
|
81
|
+
| self.regular_number
|
|
82
|
+
| self.identifier
|
|
83
|
+
)
|
|
58
84
|
|
|
59
85
|
# Define type hints
|
|
60
|
-
self.type_hint =
|
|
86
|
+
self.type_hint = one_of("number int float decimal date array bool boolean geo object string", caseless=True)
|
|
61
87
|
|
|
62
88
|
def _setup_operators(self):
|
|
63
89
|
"""Set up operator definitions."""
|
|
64
90
|
# Define binary operators (require a value) - != must come before ! operators
|
|
65
|
-
self.binary_ops =
|
|
91
|
+
self.binary_ops = one_of(
|
|
66
92
|
"!= " # != must be before ! operators
|
|
67
|
-
+ "!contains !in !startswith !endswith !regexp !cidr !is !between "
|
|
68
|
-
+ "regexp in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
|
|
93
|
+
+ "!contains !in !startswith !endswith !regexp !matches !cidr !is !between "
|
|
94
|
+
+ "regexp matches regex in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
|
|
69
95
|
caseless=True,
|
|
70
96
|
)
|
|
71
97
|
|
|
@@ -75,6 +101,7 @@ class TQLGrammar:
|
|
|
75
101
|
self.not_startswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("startswith")
|
|
76
102
|
self.not_endswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("endswith")
|
|
77
103
|
self.not_regexp_op = (CaselessKeyword("not") | "!") + CaselessKeyword("regexp")
|
|
104
|
+
self.not_matches_op = (CaselessKeyword("not") | "!") + CaselessKeyword("matches")
|
|
78
105
|
self.not_cidr_op = (CaselessKeyword("not") | "!") + CaselessKeyword("cidr")
|
|
79
106
|
self.not_any_op = (CaselessKeyword("not") | "!") + CaselessKeyword("any")
|
|
80
107
|
self.not_all_op = (CaselessKeyword("not") | "!") + CaselessKeyword("all")
|
|
@@ -86,6 +113,7 @@ class TQLGrammar:
|
|
|
86
113
|
self.bang_startswith_op = Suppress("!") + CaselessKeyword("startswith")
|
|
87
114
|
self.bang_endswith_op = Suppress("!") + CaselessKeyword("endswith")
|
|
88
115
|
self.bang_regexp_op = Suppress("!") + CaselessKeyword("regexp")
|
|
116
|
+
self.bang_matches_op = Suppress("!") + CaselessKeyword("matches")
|
|
89
117
|
self.bang_cidr_op = Suppress("!") + CaselessKeyword("cidr")
|
|
90
118
|
self.bang_any_op = Suppress("!") + CaselessKeyword("any")
|
|
91
119
|
self.bang_all_op = Suppress("!") + CaselessKeyword("all")
|
|
@@ -97,7 +125,7 @@ class TQLGrammar:
|
|
|
97
125
|
self.bang_between_op = Suppress("!") + CaselessKeyword("between")
|
|
98
126
|
|
|
99
127
|
# Define unary operators (no value required)
|
|
100
|
-
self.unary_ops =
|
|
128
|
+
self.unary_ops = one_of("exists !exists", caseless=True)
|
|
101
129
|
self.not_exists_op = (CaselessKeyword("not") | "!") + CaselessKeyword("exists")
|
|
102
130
|
self.bang_exists_op = Suppress("!") + CaselessKeyword("exists")
|
|
103
131
|
|
|
@@ -115,16 +143,22 @@ class TQLGrammar:
|
|
|
115
143
|
|
|
116
144
|
def _setup_fields_and_values(self):
|
|
117
145
|
"""Set up field and value definitions."""
|
|
118
|
-
# Field names
|
|
119
|
-
#
|
|
120
|
-
|
|
146
|
+
# Field names:
|
|
147
|
+
# - May start with @ (like @timestamp, @metadata)
|
|
148
|
+
# - First char after optional @ must be a letter or underscore
|
|
149
|
+
# - Can contain letters, numbers, underscores, dots, hyphens
|
|
150
|
+
# - Colon NOT allowed (conflicts with :: type hints)
|
|
151
|
+
# - @ only allowed at start (time@stamp is INVALID)
|
|
152
|
+
# - Stops at :: for type hints
|
|
153
|
+
self.field_name = Regex(r"@?[a-zA-Z_][a-zA-Z0-9_.-]*(?=::|[^a-zA-Z0-9_.-]|$)")
|
|
121
154
|
|
|
122
155
|
def _setup_mutators(self):
|
|
123
156
|
"""Set up mutator definitions."""
|
|
124
157
|
# Define mutators
|
|
125
|
-
self.mutator_name =
|
|
158
|
+
self.mutator_name = one_of(
|
|
126
159
|
"lowercase uppercase trim split replace nslookup geoip_lookup geo "
|
|
127
|
-
"length refang defang b64encode b64decode urldecode "
|
|
160
|
+
"length refang defang b64encode b64decode urldecode hexencode hexdecode "
|
|
161
|
+
"md5 sha256 "
|
|
128
162
|
"any all none avg average max min sum is_private is_global "
|
|
129
163
|
"count unique first last",
|
|
130
164
|
caseless=True,
|
|
@@ -137,7 +171,7 @@ class TQLGrammar:
|
|
|
137
171
|
# Positional parameters can be strings (quoted or unquoted), numbers, or identifiers
|
|
138
172
|
self.mutator_positional_param = self.string_literal | self.number | self.identifier
|
|
139
173
|
self.mutator_param = self.mutator_named_param | self.mutator_positional_param
|
|
140
|
-
self.mutator_params = Group(Suppress("(") +
|
|
174
|
+
self.mutator_params = Group(Suppress("(") + DelimitedList(self.mutator_param) + Suppress(")"))
|
|
141
175
|
self.mutator = Group(Suppress("|") + self.mutator_name + PyparsingOptional(self.mutator_params))
|
|
142
176
|
self.mutator_chain = ZeroOrMore(self.mutator)
|
|
143
177
|
|
|
@@ -188,6 +222,7 @@ class TQLGrammar:
|
|
|
188
222
|
| self.not_startswith_op
|
|
189
223
|
| self.not_endswith_op
|
|
190
224
|
| self.not_regexp_op
|
|
225
|
+
| self.not_matches_op
|
|
191
226
|
| self.not_cidr_op
|
|
192
227
|
| self.not_any_op
|
|
193
228
|
| self.not_all_op
|
|
@@ -197,6 +232,7 @@ class TQLGrammar:
|
|
|
197
232
|
| self.bang_startswith_op
|
|
198
233
|
| self.bang_endswith_op
|
|
199
234
|
| self.bang_regexp_op
|
|
235
|
+
| self.bang_matches_op
|
|
200
236
|
| self.bang_cidr_op
|
|
201
237
|
| self.bang_any_op
|
|
202
238
|
| self.bang_all_op
|
|
@@ -223,7 +259,7 @@ class TQLGrammar:
|
|
|
223
259
|
|
|
224
260
|
# Define field list for reversed 'in' operator
|
|
225
261
|
self.field_list_item = self.typed_field
|
|
226
|
-
self.field_list = Group(Suppress("[") +
|
|
262
|
+
self.field_list = Group(Suppress("[") + DelimitedList(self.field_list_item) + Suppress("]"))
|
|
227
263
|
|
|
228
264
|
# Special case for 'in' operator - value in field(s)
|
|
229
265
|
self.value_in_field = Group(self.value + CaselessKeyword("in") + self.typed_field)
|
|
@@ -235,14 +271,14 @@ class TQLGrammar:
|
|
|
235
271
|
self.typed_field
|
|
236
272
|
+ CaselessKeyword("in")
|
|
237
273
|
+ self.list_literal
|
|
238
|
-
+ Literal("").
|
|
274
|
+
+ Literal("").set_parse_action(lambda: "__field_in_values__")
|
|
239
275
|
)
|
|
240
276
|
self.field_not_in_values = Group(
|
|
241
277
|
self.typed_field
|
|
242
278
|
+ (CaselessKeyword("not") | Literal("!"))
|
|
243
279
|
+ CaselessKeyword("in")
|
|
244
280
|
+ self.list_literal
|
|
245
|
-
+ Literal("").
|
|
281
|
+
+ Literal("").set_parse_action(lambda: "__field_not_in_values__")
|
|
246
282
|
)
|
|
247
283
|
|
|
248
284
|
def _setup_special_expressions(self):
|
|
@@ -259,12 +295,12 @@ class TQLGrammar:
|
|
|
259
295
|
self.geo_param_value = (
|
|
260
296
|
CaselessKeyword("true")
|
|
261
297
|
| CaselessKeyword("false")
|
|
262
|
-
| QuotedString('"',
|
|
263
|
-
| QuotedString("'",
|
|
298
|
+
| QuotedString('"', esc_char="\\")
|
|
299
|
+
| QuotedString("'", esc_char="\\")
|
|
264
300
|
| Regex(r"\d+")
|
|
265
301
|
)
|
|
266
302
|
self.geo_param = Group(self.geo_param_name + Suppress("=") + self.geo_param_value)
|
|
267
|
-
self.geo_params = PyparsingOptional(Suppress(",") +
|
|
303
|
+
self.geo_params = PyparsingOptional(Suppress(",") + DelimitedList(self.geo_param))
|
|
268
304
|
|
|
269
305
|
# Support multiple geo syntax patterns
|
|
270
306
|
self.geo_empty = Group(
|
|
@@ -276,7 +312,7 @@ class TQLGrammar:
|
|
|
276
312
|
+ Suppress("|")
|
|
277
313
|
+ self.geo_kw
|
|
278
314
|
+ Suppress("(")
|
|
279
|
-
+
|
|
315
|
+
+ DelimitedList(self.geo_param)
|
|
280
316
|
+ Suppress(")")
|
|
281
317
|
)
|
|
282
318
|
|
|
@@ -296,7 +332,7 @@ class TQLGrammar:
|
|
|
296
332
|
+ Suppress("(")
|
|
297
333
|
+ self.geo_conditions
|
|
298
334
|
+ Suppress(",")
|
|
299
|
-
+
|
|
335
|
+
+ DelimitedList(self.geo_param)
|
|
300
336
|
+ Suppress(")")
|
|
301
337
|
)
|
|
302
338
|
|
|
@@ -305,7 +341,7 @@ class TQLGrammar:
|
|
|
305
341
|
+ Suppress("|")
|
|
306
342
|
+ self.geo_kw
|
|
307
343
|
+ Suppress("(")
|
|
308
|
-
+
|
|
344
|
+
+ DelimitedList(self.geo_param)
|
|
309
345
|
+ Suppress(",")
|
|
310
346
|
+ self.geo_conditions
|
|
311
347
|
+ Suppress(")")
|
|
@@ -329,13 +365,13 @@ class TQLGrammar:
|
|
|
329
365
|
self.nslookup_param_value = (
|
|
330
366
|
CaselessKeyword("true")
|
|
331
367
|
| CaselessKeyword("false")
|
|
332
|
-
| QuotedString('"',
|
|
333
|
-
| QuotedString("'",
|
|
368
|
+
| QuotedString('"', esc_char="\\")
|
|
369
|
+
| QuotedString("'", esc_char="\\")
|
|
334
370
|
| self.list_literal
|
|
335
371
|
| Regex(r"\d+")
|
|
336
372
|
)
|
|
337
373
|
self.nslookup_param = Group(self.nslookup_param_name + Suppress("=") + self.nslookup_param_value)
|
|
338
|
-
self.nslookup_params = PyparsingOptional(Suppress(",") +
|
|
374
|
+
self.nslookup_params = PyparsingOptional(Suppress(",") + DelimitedList(self.nslookup_param))
|
|
339
375
|
|
|
340
376
|
# Support multiple nslookup syntax patterns
|
|
341
377
|
self.nslookup_empty = Group(
|
|
@@ -347,7 +383,7 @@ class TQLGrammar:
|
|
|
347
383
|
+ Suppress("|")
|
|
348
384
|
+ self.nslookup_kw
|
|
349
385
|
+ Suppress("(")
|
|
350
|
-
+
|
|
386
|
+
+ DelimitedList(self.nslookup_param)
|
|
351
387
|
+ Suppress(")")
|
|
352
388
|
)
|
|
353
389
|
|
|
@@ -367,7 +403,7 @@ class TQLGrammar:
|
|
|
367
403
|
+ Suppress("(")
|
|
368
404
|
+ self.nslookup_conditions
|
|
369
405
|
+ Suppress(",")
|
|
370
|
-
+
|
|
406
|
+
+ DelimitedList(self.nslookup_param)
|
|
371
407
|
+ Suppress(")")
|
|
372
408
|
)
|
|
373
409
|
|
|
@@ -376,7 +412,7 @@ class TQLGrammar:
|
|
|
376
412
|
+ Suppress("|")
|
|
377
413
|
+ self.nslookup_kw
|
|
378
414
|
+ Suppress("(")
|
|
379
|
-
+
|
|
415
|
+
+ DelimitedList(self.nslookup_param)
|
|
380
416
|
+ Suppress(",")
|
|
381
417
|
+ self.nslookup_conditions
|
|
382
418
|
+ Suppress(")")
|
|
@@ -398,7 +434,7 @@ class TQLGrammar:
|
|
|
398
434
|
self.by_kw = CaselessKeyword("by")
|
|
399
435
|
|
|
400
436
|
# Aggregation function names - including aliases
|
|
401
|
-
self.agg_function_name =
|
|
437
|
+
self.agg_function_name = one_of(
|
|
402
438
|
"count unique_count sum min max average avg median med std standard_deviation "
|
|
403
439
|
"percentile percentiles p pct percentile_rank percentile_ranks pct_rank pct_ranks "
|
|
404
440
|
"values unique cardinality",
|
|
@@ -416,7 +452,7 @@ class TQLGrammar:
|
|
|
416
452
|
+ Suppress("(")
|
|
417
453
|
+ self.field_name
|
|
418
454
|
+ PyparsingOptional(
|
|
419
|
-
Suppress(",") + (
|
|
455
|
+
Suppress(",") + (one_of("top bottom", caseless=True) + self.number | DelimitedList(self.number))
|
|
420
456
|
)
|
|
421
457
|
+ Suppress(")")
|
|
422
458
|
)
|
|
@@ -429,16 +465,16 @@ class TQLGrammar:
|
|
|
429
465
|
self.agg_with_alias = Group(self.agg_function + PyparsingOptional(self.as_kw + self.identifier))
|
|
430
466
|
|
|
431
467
|
# Multiple aggregations separated by commas
|
|
432
|
-
self.agg_list =
|
|
468
|
+
self.agg_list = DelimitedList(self.agg_with_alias)
|
|
433
469
|
|
|
434
470
|
# Group by fields with optional "top N" for each field
|
|
435
471
|
self.top_kw = CaselessKeyword("top")
|
|
436
472
|
self.group_by_field_with_bucket = Group(self.field_name + PyparsingOptional(self.top_kw + self.number))
|
|
437
|
-
self.group_by_fields =
|
|
473
|
+
self.group_by_fields = DelimitedList(self.group_by_field_with_bucket)
|
|
438
474
|
|
|
439
475
|
# Visualization hint: => chart_type
|
|
440
476
|
self.viz_arrow = Literal("=>")
|
|
441
|
-
self.viz_types =
|
|
477
|
+
self.viz_types = one_of(
|
|
442
478
|
"bar barchart line area pie donut scatter heatmap treemap sunburst "
|
|
443
479
|
"table number gauge map grouped_bar stacked_bar nested_pie nested_donut chord",
|
|
444
480
|
caseless=True,
|
|
@@ -486,7 +522,7 @@ class TQLGrammar:
|
|
|
486
522
|
self.base_expr = self.geo_mutator_expr | self.nslookup_mutator_expr | self.comparison_expr
|
|
487
523
|
|
|
488
524
|
# Define filter expression with operator precedence
|
|
489
|
-
self.filter_expr =
|
|
525
|
+
self.filter_expr = infix_notation(
|
|
490
526
|
self.base_expr,
|
|
491
527
|
[
|
|
492
528
|
(self.not_kw, 1, opAssoc.RIGHT),
|
|
@@ -508,7 +544,7 @@ class TQLGrammar:
|
|
|
508
544
|
)
|
|
509
545
|
|
|
510
546
|
# Define geo_conditions and nslookup_conditions
|
|
511
|
-
self.geo_conditions <<
|
|
547
|
+
self.geo_conditions << infix_notation(
|
|
512
548
|
self.comparison_expr,
|
|
513
549
|
[
|
|
514
550
|
(self.not_kw, 1, opAssoc.RIGHT),
|
|
@@ -517,7 +553,7 @@ class TQLGrammar:
|
|
|
517
553
|
],
|
|
518
554
|
)
|
|
519
555
|
|
|
520
|
-
self.nslookup_conditions <<
|
|
556
|
+
self.nslookup_conditions << infix_notation(
|
|
521
557
|
self.comparison_expr,
|
|
522
558
|
[
|
|
523
559
|
(self.not_kw, 1, opAssoc.RIGHT),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/README.md
RENAMED
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/core_components/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/field_type_inference.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/mutators/security.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/opensearch_mappings.py
RENAMED
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/parser_components/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/stats_transformer.py
RENAMED
|
File without changes
|
{tellaro_query_language-0.2.17 → tellaro_query_language-0.2.19}/src/tql/streaming_file_processor.py
RENAMED
|
File without changes
|
|
File without changes
|