tellaro-query-language 0.2.16__tar.gz → 0.2.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/PKG-INFO +1 -1
  2. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/pyproject.toml +1 -1
  3. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/value_comparison.py +20 -8
  4. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/__init__.py +29 -1
  5. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/encoding.py +198 -0
  6. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/field_mapping.py +1 -1
  7. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/lucene_converter.py +1 -1
  8. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/query_converter.py +2 -2
  9. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser.py +2 -2
  10. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/grammar.py +80 -44
  11. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/LICENSE +0 -0
  12. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/README.md +0 -0
  13. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/__init__.py +0 -0
  14. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/analyzer.py +0 -0
  15. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/cache/__init__.py +0 -0
  16. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/cache/base.py +0 -0
  17. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/cache/memory.py +0 -0
  18. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/cache/redis.py +0 -0
  19. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/cli.py +0 -0
  20. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core.py +0 -0
  21. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/README.md +0 -0
  22. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/__init__.py +0 -0
  23. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/file_operations.py +0 -0
  24. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/opensearch_operations.py +0 -0
  25. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/stats_operations.py +0 -0
  26. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/core_components/validation_operations.py +0 -0
  27. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator.py +0 -0
  28. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/README.md +0 -0
  29. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/__init__.py +0 -0
  30. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/field_access.py +0 -0
  31. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/evaluator_components/special_expressions.py +0 -0
  32. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/exceptions.py +0 -0
  33. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/field_type_inference.py +0 -0
  34. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/geoip_normalizer.py +0 -0
  35. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutator_analyzer.py +0 -0
  36. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/base.py +0 -0
  37. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/dns.py +0 -0
  38. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/geo.py +0 -0
  39. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/list.py +0 -0
  40. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/network.py +0 -0
  41. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/security.py +0 -0
  42. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/mutators/string.py +0 -0
  43. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch.py +0 -0
  44. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/README.md +0 -0
  45. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_components/__init__.py +0 -0
  46. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_mappings.py +0 -0
  47. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/opensearch_stats.py +0 -0
  48. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/README.md +0 -0
  49. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/__init__.py +0 -0
  50. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/ast_builder.py +0 -0
  51. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/error_analyzer.py +0 -0
  52. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/parser_components/field_extractor.py +0 -0
  53. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/post_processor.py +0 -0
  54. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/scripts.py +0 -0
  55. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/stats_evaluator.py +0 -0
  56. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/stats_transformer.py +0 -0
  57. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/streaming_file_processor.py +0 -0
  58. {tellaro_query_language-0.2.16 → tellaro_query_language-0.2.19}/src/tql/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tellaro-query-language
3
- Version: 0.2.16
3
+ Version: 0.2.19
4
4
  Summary: A flexible, human-friendly query language for searching and filtering structured data
5
5
  License: Proprietary
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tellaro-query-language"
3
- version = "0.2.16"
3
+ version = "0.2.19"
4
4
  description = "A flexible, human-friendly query language for searching and filtering structured data"
5
5
  authors = ["Justin Henderson <justin@tellaro.io>"]
6
6
  license = "Proprietary"
@@ -56,7 +56,14 @@ class ValueComparator:
56
56
  return False # Missing fields return False for all "is not" comparisons
57
57
  # For negated string operators, missing fields should return True
58
58
  # (e.g., if field doesn't exist, it doesn't contain/start with/end with the value)
59
- elif operator in ["not_contains", "not_startswith", "not_endswith", "not_regexp"]:
59
+ elif operator in [
60
+ "not_contains",
61
+ "not_startswith",
62
+ "not_endswith",
63
+ "not_regexp",
64
+ "not_matches",
65
+ "not_regex",
66
+ ]:
60
67
  return True
61
68
  # For not_cidr, missing fields should return False (can't check CIDR on missing IP)
62
69
  elif operator in ["cidr", "not_cidr"]:
@@ -94,18 +101,23 @@ class ValueComparator:
94
101
  if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
95
102
  field_value = field_value.lower() == "true"
96
103
 
97
- # Type compatibility check for numeric operators
98
- # If operator requires numeric comparison, both values must be numeric
104
+ # Type compatibility check for comparison operators
105
+ # For >, >=, <, <= operators:
106
+ # - Numeric comparison is preferred if both values are numeric
107
+ # - String comparison is allowed (supports ISO 8601 timestamps which sort correctly as strings)
108
+ # - Mixed types (one numeric, one string) return False
99
109
  # Exception: Arrays are handled specially in the operator logic below
100
110
  if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
101
111
  # Skip check if field_value is an array - handled by array logic below
102
112
  if not isinstance(field_value, (list, tuple)):
103
113
  field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
104
114
  expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
115
+ field_is_string = isinstance(field_value, str)
116
+ expected_is_string = isinstance(expected_value, str)
105
117
 
106
- if not (field_is_numeric and expected_is_numeric):
107
- # At least one value failed numeric conversion
108
- # Cannot perform numeric comparison - return False
118
+ # Allow comparison if both are numeric OR both are strings
119
+ if not ((field_is_numeric and expected_is_numeric) or (field_is_string and expected_is_string)):
120
+ # Mixed types or unsupported types - cannot compare
109
121
  return False
110
122
 
111
123
  try:
@@ -184,7 +196,7 @@ class ValueComparator:
184
196
  return field_value in converted_list
185
197
  else:
186
198
  return field_value == expected_value
187
- elif operator == "regexp":
199
+ elif operator in ("regexp", "matches", "regex"):
188
200
  # Unwrap single-element lists for string operators
189
201
  if isinstance(expected_value, list) and len(expected_value) == 1:
190
202
  expected_value = expected_value[0]
@@ -259,7 +271,7 @@ class ValueComparator:
259
271
  expected_value = expected_value[0]
260
272
  # Case-insensitive comparison to match post-processor behavior
261
273
  return not str(field_value).lower().endswith(str(expected_value).lower())
262
- elif operator == "not_regexp":
274
+ elif operator in ("not_regexp", "not_matches", "not_regex"):
263
275
  # Unwrap single-element lists for string operators
264
276
  if isinstance(expected_value, list) and len(expected_value) == 1:
265
277
  expected_value = expected_value[0]
@@ -13,7 +13,15 @@ from ..cache import CacheManager, LocalCacheManager, RedisCacheManager
13
13
  # Import all mutator classes
14
14
  from .base import BaseMutator, append_to_result
15
15
  from .dns import NSLookupMutator
16
- from .encoding import Base64DecodeMutator, Base64EncodeMutator, URLDecodeMutator
16
+ from .encoding import (
17
+ Base64DecodeMutator,
18
+ Base64EncodeMutator,
19
+ HexDecodeMutator,
20
+ HexEncodeMutator,
21
+ Md5Mutator,
22
+ Sha256Mutator,
23
+ URLDecodeMutator,
24
+ )
17
25
  from .geo import GeoIPLookupMutator, GeoIPResolver
18
26
  from .list import (
19
27
  AllMutator,
@@ -44,6 +52,10 @@ __all__ = [
44
52
  "Base64EncodeMutator",
45
53
  "Base64DecodeMutator",
46
54
  "URLDecodeMutator",
55
+ "HexEncodeMutator",
56
+ "HexDecodeMutator",
57
+ "Md5Mutator",
58
+ "Sha256Mutator",
47
59
  # Security mutators
48
60
  "RefangMutator",
49
61
  "DefangMutator",
@@ -91,6 +103,10 @@ ALLOWED_MUTATORS: Dict[str, Optional[Dict[str, type]]] = {
91
103
  "b64encode": {"field": str},
92
104
  "b64decode": {"field": str},
93
105
  "urldecode": {"field": str},
106
+ "hexencode": {"field": str},
107
+ "hexdecode": {"field": str},
108
+ "md5": {"field": str},
109
+ "sha256": {"field": str},
94
110
  # List evaluation mutators
95
111
  "any": None,
96
112
  "all": None,
@@ -124,6 +140,10 @@ ENRICHMENT_MUTATORS = {
124
140
  "b64encode",
125
141
  "b64decode",
126
142
  "urldecode",
143
+ "hexencode",
144
+ "hexdecode",
145
+ "md5",
146
+ "sha256",
127
147
  }
128
148
 
129
149
 
@@ -180,6 +200,14 @@ def create_mutator(name: str, params: Optional[List[List[Any]]] = None) -> BaseM
180
200
  return Base64DecodeMutator(params_dict)
181
201
  elif key == "urldecode":
182
202
  return URLDecodeMutator(params_dict)
203
+ elif key == "hexencode":
204
+ return HexEncodeMutator(params_dict)
205
+ elif key == "hexdecode":
206
+ return HexDecodeMutator(params_dict)
207
+ elif key == "md5":
208
+ return Md5Mutator(params_dict)
209
+ elif key == "sha256":
210
+ return Sha256Mutator(params_dict)
183
211
  elif key == "is_private":
184
212
  return IsPrivateMutator(params_dict)
185
213
  elif key == "is_global":
@@ -216,3 +216,201 @@ class URLDecodeMutator(BaseMutator):
216
216
  else:
217
217
  # Return the decoded value directly
218
218
  return decoded_value
219
+
220
+
221
+ class HexEncodeMutator(BaseMutator):
222
+ """
223
+ Mutator that encodes string values to hexadecimal.
224
+
225
+ Converts strings to their hexadecimal representation.
226
+ Supports encoding individual strings or lists of strings.
227
+
228
+ Parameters:
229
+ field: Optional field to store the encoded value
230
+ """
231
+
232
+ def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
233
+ super().__init__(params)
234
+ self.is_enrichment = True
235
+
236
+ def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
237
+ """Apply the hex encode transformation."""
238
+ append_field = self.params.get("field")
239
+
240
+ # Handle different input types
241
+ encoded_value: Any
242
+ if value is None:
243
+ encoded_value = None
244
+ elif isinstance(value, str):
245
+ encoded_value = value.encode("utf-8").hex()
246
+ elif isinstance(value, bytes):
247
+ encoded_value = value.hex()
248
+ elif isinstance(value, list):
249
+ encoded_value = []
250
+ for item in value:
251
+ if isinstance(item, str):
252
+ encoded_value.append(item.encode("utf-8").hex())
253
+ elif isinstance(item, bytes):
254
+ encoded_value.append(item.hex())
255
+ elif item is None:
256
+ encoded_value.append(None)
257
+ else:
258
+ encoded_value.append(str(item).encode("utf-8").hex())
259
+ else:
260
+ encoded_value = str(value).encode("utf-8").hex()
261
+
262
+ # If field is specified, add to record and return original value
263
+ if append_field:
264
+ append_to_result(record, append_field, encoded_value)
265
+ return value
266
+ return encoded_value
267
+
268
+
269
+ class HexDecodeMutator(BaseMutator):
270
+ """
271
+ Mutator that decodes hexadecimal string values.
272
+
273
+ Converts hexadecimal strings back to their original string representation.
274
+ Supports decoding individual strings or lists of strings.
275
+
276
+ Parameters:
277
+ field: Optional field to store the decoded value
278
+ """
279
+
280
+ def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
281
+ super().__init__(params)
282
+ self.is_enrichment = True
283
+
284
+ def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any: # noqa: C901
285
+ """Apply the hex decode transformation."""
286
+ append_field = self.params.get("field")
287
+
288
+ def _decode_hex(s: str) -> Optional[str]:
289
+ """Decode a hex string, returning None on failure."""
290
+ try:
291
+ return bytes.fromhex(s).decode("utf-8")
292
+ except (ValueError, UnicodeDecodeError): # noqa: B014
293
+ return None
294
+
295
+ # Handle different input types
296
+ decoded_value: Any
297
+ if value is None:
298
+ decoded_value = None
299
+ elif isinstance(value, str):
300
+ decoded_value = _decode_hex(value)
301
+ elif isinstance(value, list):
302
+ decoded_value = []
303
+ for item in value:
304
+ if isinstance(item, str):
305
+ decoded_value.append(_decode_hex(item))
306
+ else:
307
+ decoded_value.append(item)
308
+ else:
309
+ decoded_value = _decode_hex(str(value))
310
+
311
+ # If field is specified, add to record and return original value
312
+ if append_field:
313
+ append_to_result(record, append_field, decoded_value)
314
+ return value
315
+ return decoded_value
316
+
317
+
318
+ class Md5Mutator(BaseMutator):
319
+ """
320
+ Mutator that calculates MD5 hash of string values.
321
+
322
+ Computes the MD5 hash (as a hexadecimal string) of the input.
323
+ Supports hashing individual strings or lists of strings.
324
+
325
+ Parameters:
326
+ field: Optional field to store the hash value
327
+ """
328
+
329
+ def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
330
+ super().__init__(params)
331
+ self.is_enrichment = True
332
+
333
+ def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
334
+ """Apply the MD5 hash transformation."""
335
+ import hashlib
336
+
337
+ append_field = self.params.get("field")
338
+
339
+ # Handle different input types
340
+ # Note: MD5 is used here for data fingerprinting/checksums, not security
341
+ hashed_value: Any
342
+ if value is None:
343
+ hashed_value = None
344
+ elif isinstance(value, str):
345
+ hashed_value = hashlib.md5(value.encode("utf-8"), usedforsecurity=False).hexdigest()
346
+ elif isinstance(value, bytes):
347
+ hashed_value = hashlib.md5(value, usedforsecurity=False).hexdigest()
348
+ elif isinstance(value, list):
349
+ hashed_value = []
350
+ for item in value:
351
+ if isinstance(item, str):
352
+ hashed_value.append(hashlib.md5(item.encode("utf-8"), usedforsecurity=False).hexdigest())
353
+ elif isinstance(item, bytes):
354
+ hashed_value.append(hashlib.md5(item, usedforsecurity=False).hexdigest())
355
+ elif item is None:
356
+ hashed_value.append(None)
357
+ else:
358
+ hashed_value.append(hashlib.md5(str(item).encode("utf-8"), usedforsecurity=False).hexdigest())
359
+ else:
360
+ hashed_value = hashlib.md5(str(value).encode("utf-8"), usedforsecurity=False).hexdigest()
361
+
362
+ # If field is specified, add to record and return original value
363
+ if append_field:
364
+ append_to_result(record, append_field, hashed_value)
365
+ return value
366
+ return hashed_value
367
+
368
+
369
+ class Sha256Mutator(BaseMutator):
370
+ """
371
+ Mutator that calculates SHA256 hash of string values.
372
+
373
+ Computes the SHA256 hash (as a hexadecimal string) of the input.
374
+ Supports hashing individual strings or lists of strings.
375
+
376
+ Parameters:
377
+ field: Optional field to store the hash value
378
+ """
379
+
380
+ def __init__(self, params: Optional[Dict[str, Any]] = None) -> None:
381
+ super().__init__(params)
382
+ self.is_enrichment = True
383
+
384
+ def apply(self, field_name: str, record: Dict[str, Any], value: Any) -> Any:
385
+ """Apply the SHA256 hash transformation."""
386
+ import hashlib
387
+
388
+ append_field = self.params.get("field")
389
+
390
+ # Handle different input types
391
+ hashed_value: Any
392
+ if value is None:
393
+ hashed_value = None
394
+ elif isinstance(value, str):
395
+ hashed_value = hashlib.sha256(value.encode("utf-8")).hexdigest()
396
+ elif isinstance(value, bytes):
397
+ hashed_value = hashlib.sha256(value).hexdigest()
398
+ elif isinstance(value, list):
399
+ hashed_value = []
400
+ for item in value:
401
+ if isinstance(item, str):
402
+ hashed_value.append(hashlib.sha256(item.encode("utf-8")).hexdigest())
403
+ elif isinstance(item, bytes):
404
+ hashed_value.append(hashlib.sha256(item).hexdigest())
405
+ elif item is None:
406
+ hashed_value.append(None)
407
+ else:
408
+ hashed_value.append(hashlib.sha256(str(item).encode("utf-8")).hexdigest())
409
+ else:
410
+ hashed_value = hashlib.sha256(str(value).encode("utf-8")).hexdigest()
411
+
412
+ # If field is specified, add to record and return original value
413
+ if append_field:
414
+ append_to_result(record, append_field, hashed_value)
415
+ return value
416
+ return hashed_value
@@ -231,7 +231,7 @@ class FieldMapping:
231
231
  }
232
232
 
233
233
  # Operators that work best with text fields (full-text search)
234
- text_operators = {"contains", "regexp", "not_regexp"}
234
+ text_operators = {"contains", "regexp", "matches", "regex", "not_regexp", "not_matches", "not_regex"}
235
235
 
236
236
  # Operators that require numeric/date fields
237
237
  range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
@@ -105,7 +105,7 @@ class LuceneConverter:
105
105
  return f"{lucene_field}:({' OR '.join(escaped_values)})"
106
106
  else:
107
107
  return f"{lucene_field}:{escaped_value}"
108
- elif operator == "regexp":
108
+ elif operator in ("regexp", "matches", "regex"):
109
109
  return f"{lucene_field}:/{escaped_value}/"
110
110
  elif operator == "exists":
111
111
  return f"_exists_:{lucene_field}"
@@ -315,7 +315,7 @@ class QueryConverter:
315
315
  return {"terms": {opensearch_field: value}}
316
316
  else:
317
317
  return {"term": {opensearch_field: value}}
318
- elif operator == "regexp":
318
+ elif operator in ("regexp", "matches", "regex"):
319
319
  # Unwrap single-element lists for string operators
320
320
  if isinstance(value, list) and len(value) == 1:
321
321
  value = value[0]
@@ -390,7 +390,7 @@ class QueryConverter:
390
390
  if isinstance(value, list) and len(value) == 1:
391
391
  value = value[0]
392
392
  return {"bool": {"must_not": {"wildcard": {opensearch_field: f"*{value}"}}}}
393
- elif operator == "not_regexp":
393
+ elif operator in ("not_regexp", "not_matches", "not_regex"):
394
394
  # Unwrap single-element lists for string operators
395
395
  if isinstance(value, list) and len(value) == 1:
396
396
  value = value[0]
@@ -14,7 +14,7 @@ from .parser_components.error_analyzer import ErrorAnalyzer
14
14
  from .parser_components.field_extractor import FieldExtractor
15
15
  from .parser_components.grammar import TQLGrammar
16
16
 
17
- ParserElement.enablePackrat()
17
+ ParserElement.enable_packrat()
18
18
 
19
19
 
20
20
  class TQLParser:
@@ -53,7 +53,7 @@ class TQLParser:
53
53
 
54
54
  try:
55
55
  # Parse the query
56
- parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
56
+ parsed_result = self.grammar.tql_expr.parse_string(query, parse_all=True)
57
57
 
58
58
  # Convert to our AST format
59
59
  # Start depth counting at 0 from parse() entry point
@@ -2,6 +2,7 @@
2
2
 
3
3
  from pyparsing import (
4
4
  CaselessKeyword,
5
+ DelimitedList,
5
6
  Forward,
6
7
  Group,
7
8
  Literal,
@@ -15,10 +16,8 @@ from pyparsing import (
15
16
  ZeroOrMore,
16
17
  alphanums,
17
18
  alphas,
18
- delimitedList,
19
- infixNotation,
20
- nums,
21
- oneOf,
19
+ infix_notation,
20
+ one_of,
22
21
  opAssoc,
23
22
  )
24
23
 
@@ -45,27 +44,54 @@ class TQLGrammar:
45
44
  """Set up basic tokens and literals."""
46
45
  # Basic tokens
47
46
  self.identifier = Word(alphas, alphanums + "_.-")
48
- self.number = Word(nums + ".-")
47
+ # Number pattern supports:
48
+ # - Integers: 123, -456
49
+ # - Floats: 1.5, -3.14
50
+ # - Scientific notation: 1.0e5, 1.5e-3, 2E+10
51
+ # Pattern matches Rust's float grammar for parity
52
+ self.scientific_number = Regex(r"-?\d+\.\d+[eE][+-]?\d+")
53
+ self.regular_number = Regex(r"-?\d+(\.\d+)?")
54
+ self.number = self.scientific_number | self.regular_number
49
55
  self.string_literal = QuotedString('"') | QuotedString("'")
50
56
  # CIDR notation for IP addresses (e.g., 192.168.1.0/24)
51
- self.cidr_notation = Word(nums + "./")
57
+ self.cidr_notation = Regex(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/\d{1,2}")
58
+ # IP address (without mask) - matches 4 octets separated by dots
59
+ self.ip_address = Regex(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
60
+ # Dot-separated numeric values (like partial IPs: 10.0.0, version numbers: 1.2.3)
61
+ # This allows values like "10.0.0" to be matched as a single token
62
+ self.dotted_number = Regex(r"\d+(\.\d+){2,}")
52
63
  # Define list items as strings, numbers, or identifiers
53
64
  self.list_item = self.string_literal | self.number | self.identifier
54
- self.list_literal = Group(Suppress("[") + delimitedList(self.list_item) + Suppress("]"))
55
-
56
- # Define simple values – note order matters (try string literals first, then CIDR)
57
- self.simple_value = self.string_literal | self.cidr_notation | self.number | self.identifier
65
+ self.list_literal = Group(Suppress("[") + DelimitedList(self.list_item) + Suppress("]"))
66
+
67
+ # Define simple values – order matters:
68
+ # 1. String literals (quoted)
69
+ # 2. CIDR notation (IP/mask format) - must come before IP address
70
+ # 3. IP address (4 octets without mask)
71
+ # 4. Dotted numbers (partial IPs like 10.0.0, versions like 1.2.3)
72
+ # 5. Scientific notation (must come before regular numbers to avoid partial match)
73
+ # 6. Regular numbers
74
+ # 7. Identifiers (unquoted strings)
75
+ self.simple_value = (
76
+ self.string_literal
77
+ | self.cidr_notation
78
+ | self.ip_address
79
+ | self.dotted_number
80
+ | self.scientific_number
81
+ | self.regular_number
82
+ | self.identifier
83
+ )
58
84
 
59
85
  # Define type hints
60
- self.type_hint = oneOf("number int float decimal date array bool boolean geo object string", caseless=True)
86
+ self.type_hint = one_of("number int float decimal date array bool boolean geo object string", caseless=True)
61
87
 
62
88
  def _setup_operators(self):
63
89
  """Set up operator definitions."""
64
90
  # Define binary operators (require a value) - != must come before ! operators
65
- self.binary_ops = oneOf(
91
+ self.binary_ops = one_of(
66
92
  "!= " # != must be before ! operators
67
- + "!contains !in !startswith !endswith !regexp !cidr !is !between "
68
- + "regexp in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
93
+ + "!contains !in !startswith !endswith !regexp !matches !cidr !is !between "
94
+ + "regexp matches regex in contains = eq ne > gt >= gte < lt <= lte cidr is startswith endswith any all none",
69
95
  caseless=True,
70
96
  )
71
97
 
@@ -75,6 +101,7 @@ class TQLGrammar:
75
101
  self.not_startswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("startswith")
76
102
  self.not_endswith_op = (CaselessKeyword("not") | "!") + CaselessKeyword("endswith")
77
103
  self.not_regexp_op = (CaselessKeyword("not") | "!") + CaselessKeyword("regexp")
104
+ self.not_matches_op = (CaselessKeyword("not") | "!") + CaselessKeyword("matches")
78
105
  self.not_cidr_op = (CaselessKeyword("not") | "!") + CaselessKeyword("cidr")
79
106
  self.not_any_op = (CaselessKeyword("not") | "!") + CaselessKeyword("any")
80
107
  self.not_all_op = (CaselessKeyword("not") | "!") + CaselessKeyword("all")
@@ -86,6 +113,7 @@ class TQLGrammar:
86
113
  self.bang_startswith_op = Suppress("!") + CaselessKeyword("startswith")
87
114
  self.bang_endswith_op = Suppress("!") + CaselessKeyword("endswith")
88
115
  self.bang_regexp_op = Suppress("!") + CaselessKeyword("regexp")
116
+ self.bang_matches_op = Suppress("!") + CaselessKeyword("matches")
89
117
  self.bang_cidr_op = Suppress("!") + CaselessKeyword("cidr")
90
118
  self.bang_any_op = Suppress("!") + CaselessKeyword("any")
91
119
  self.bang_all_op = Suppress("!") + CaselessKeyword("all")
@@ -97,7 +125,7 @@ class TQLGrammar:
97
125
  self.bang_between_op = Suppress("!") + CaselessKeyword("between")
98
126
 
99
127
  # Define unary operators (no value required)
100
- self.unary_ops = oneOf("exists !exists", caseless=True)
128
+ self.unary_ops = one_of("exists !exists", caseless=True)
101
129
  self.not_exists_op = (CaselessKeyword("not") | "!") + CaselessKeyword("exists")
102
130
  self.bang_exists_op = Suppress("!") + CaselessKeyword("exists")
103
131
 
@@ -115,16 +143,22 @@ class TQLGrammar:
115
143
 
116
144
  def _setup_fields_and_values(self):
117
145
  """Set up field and value definitions."""
118
- # Field names can contain single colons but we need to handle :: for type hints
119
- # We'll match the field name greedily but stop at ::
120
- self.field_name = Regex(r"[@a-zA-Z][@a-zA-Z0-9_.:-]*?(?=::|[^@a-zA-Z0-9_.:-]|$)")
146
+ # Field names:
147
+ # - May start with @ (like @timestamp, @metadata)
148
+ # - First char after optional @ must be a letter or underscore
149
+ # - Can contain letters, numbers, underscores, dots, hyphens
150
+ # - Colon NOT allowed (conflicts with :: type hints)
151
+ # - @ only allowed at start (time@stamp is INVALID)
152
+ # - Stops at :: for type hints
153
+ self.field_name = Regex(r"@?[a-zA-Z_][a-zA-Z0-9_.-]*(?=::|[^a-zA-Z0-9_.-]|$)")
121
154
 
122
155
  def _setup_mutators(self):
123
156
  """Set up mutator definitions."""
124
157
  # Define mutators
125
- self.mutator_name = oneOf(
158
+ self.mutator_name = one_of(
126
159
  "lowercase uppercase trim split replace nslookup geoip_lookup geo "
127
- "length refang defang b64encode b64decode urldecode "
160
+ "length refang defang b64encode b64decode urldecode hexencode hexdecode "
161
+ "md5 sha256 "
128
162
  "any all none avg average max min sum is_private is_global "
129
163
  "count unique first last",
130
164
  caseless=True,
@@ -137,7 +171,7 @@ class TQLGrammar:
137
171
  # Positional parameters can be strings (quoted or unquoted), numbers, or identifiers
138
172
  self.mutator_positional_param = self.string_literal | self.number | self.identifier
139
173
  self.mutator_param = self.mutator_named_param | self.mutator_positional_param
140
- self.mutator_params = Group(Suppress("(") + delimitedList(self.mutator_param) + Suppress(")"))
174
+ self.mutator_params = Group(Suppress("(") + DelimitedList(self.mutator_param) + Suppress(")"))
141
175
  self.mutator = Group(Suppress("|") + self.mutator_name + PyparsingOptional(self.mutator_params))
142
176
  self.mutator_chain = ZeroOrMore(self.mutator)
143
177
 
@@ -188,6 +222,7 @@ class TQLGrammar:
188
222
  | self.not_startswith_op
189
223
  | self.not_endswith_op
190
224
  | self.not_regexp_op
225
+ | self.not_matches_op
191
226
  | self.not_cidr_op
192
227
  | self.not_any_op
193
228
  | self.not_all_op
@@ -197,6 +232,7 @@ class TQLGrammar:
197
232
  | self.bang_startswith_op
198
233
  | self.bang_endswith_op
199
234
  | self.bang_regexp_op
235
+ | self.bang_matches_op
200
236
  | self.bang_cidr_op
201
237
  | self.bang_any_op
202
238
  | self.bang_all_op
@@ -223,7 +259,7 @@ class TQLGrammar:
223
259
 
224
260
  # Define field list for reversed 'in' operator
225
261
  self.field_list_item = self.typed_field
226
- self.field_list = Group(Suppress("[") + delimitedList(self.field_list_item) + Suppress("]"))
262
+ self.field_list = Group(Suppress("[") + DelimitedList(self.field_list_item) + Suppress("]"))
227
263
 
228
264
  # Special case for 'in' operator - value in field(s)
229
265
  self.value_in_field = Group(self.value + CaselessKeyword("in") + self.typed_field)
@@ -235,14 +271,14 @@ class TQLGrammar:
235
271
  self.typed_field
236
272
  + CaselessKeyword("in")
237
273
  + self.list_literal
238
- + Literal("").setParseAction(lambda: "__field_in_values__")
274
+ + Literal("").set_parse_action(lambda: "__field_in_values__")
239
275
  )
240
276
  self.field_not_in_values = Group(
241
277
  self.typed_field
242
278
  + (CaselessKeyword("not") | Literal("!"))
243
279
  + CaselessKeyword("in")
244
280
  + self.list_literal
245
- + Literal("").setParseAction(lambda: "__field_not_in_values__")
281
+ + Literal("").set_parse_action(lambda: "__field_not_in_values__")
246
282
  )
247
283
 
248
284
  def _setup_special_expressions(self):
@@ -259,12 +295,12 @@ class TQLGrammar:
259
295
  self.geo_param_value = (
260
296
  CaselessKeyword("true")
261
297
  | CaselessKeyword("false")
262
- | QuotedString('"', escChar="\\")
263
- | QuotedString("'", escChar="\\")
298
+ | QuotedString('"', esc_char="\\")
299
+ | QuotedString("'", esc_char="\\")
264
300
  | Regex(r"\d+")
265
301
  )
266
302
  self.geo_param = Group(self.geo_param_name + Suppress("=") + self.geo_param_value)
267
- self.geo_params = PyparsingOptional(Suppress(",") + delimitedList(self.geo_param))
303
+ self.geo_params = PyparsingOptional(Suppress(",") + DelimitedList(self.geo_param))
268
304
 
269
305
  # Support multiple geo syntax patterns
270
306
  self.geo_empty = Group(
@@ -276,7 +312,7 @@ class TQLGrammar:
276
312
  + Suppress("|")
277
313
  + self.geo_kw
278
314
  + Suppress("(")
279
- + delimitedList(self.geo_param)
315
+ + DelimitedList(self.geo_param)
280
316
  + Suppress(")")
281
317
  )
282
318
 
@@ -296,7 +332,7 @@ class TQLGrammar:
296
332
  + Suppress("(")
297
333
  + self.geo_conditions
298
334
  + Suppress(",")
299
- + delimitedList(self.geo_param)
335
+ + DelimitedList(self.geo_param)
300
336
  + Suppress(")")
301
337
  )
302
338
 
@@ -305,7 +341,7 @@ class TQLGrammar:
305
341
  + Suppress("|")
306
342
  + self.geo_kw
307
343
  + Suppress("(")
308
- + delimitedList(self.geo_param)
344
+ + DelimitedList(self.geo_param)
309
345
  + Suppress(",")
310
346
  + self.geo_conditions
311
347
  + Suppress(")")
@@ -329,13 +365,13 @@ class TQLGrammar:
329
365
  self.nslookup_param_value = (
330
366
  CaselessKeyword("true")
331
367
  | CaselessKeyword("false")
332
- | QuotedString('"', escChar="\\")
333
- | QuotedString("'", escChar="\\")
368
+ | QuotedString('"', esc_char="\\")
369
+ | QuotedString("'", esc_char="\\")
334
370
  | self.list_literal
335
371
  | Regex(r"\d+")
336
372
  )
337
373
  self.nslookup_param = Group(self.nslookup_param_name + Suppress("=") + self.nslookup_param_value)
338
- self.nslookup_params = PyparsingOptional(Suppress(",") + delimitedList(self.nslookup_param))
374
+ self.nslookup_params = PyparsingOptional(Suppress(",") + DelimitedList(self.nslookup_param))
339
375
 
340
376
  # Support multiple nslookup syntax patterns
341
377
  self.nslookup_empty = Group(
@@ -347,7 +383,7 @@ class TQLGrammar:
347
383
  + Suppress("|")
348
384
  + self.nslookup_kw
349
385
  + Suppress("(")
350
- + delimitedList(self.nslookup_param)
386
+ + DelimitedList(self.nslookup_param)
351
387
  + Suppress(")")
352
388
  )
353
389
 
@@ -367,7 +403,7 @@ class TQLGrammar:
367
403
  + Suppress("(")
368
404
  + self.nslookup_conditions
369
405
  + Suppress(",")
370
- + delimitedList(self.nslookup_param)
406
+ + DelimitedList(self.nslookup_param)
371
407
  + Suppress(")")
372
408
  )
373
409
 
@@ -376,7 +412,7 @@ class TQLGrammar:
376
412
  + Suppress("|")
377
413
  + self.nslookup_kw
378
414
  + Suppress("(")
379
- + delimitedList(self.nslookup_param)
415
+ + DelimitedList(self.nslookup_param)
380
416
  + Suppress(",")
381
417
  + self.nslookup_conditions
382
418
  + Suppress(")")
@@ -398,7 +434,7 @@ class TQLGrammar:
398
434
  self.by_kw = CaselessKeyword("by")
399
435
 
400
436
  # Aggregation function names - including aliases
401
- self.agg_function_name = oneOf(
437
+ self.agg_function_name = one_of(
402
438
  "count unique_count sum min max average avg median med std standard_deviation "
403
439
  "percentile percentiles p pct percentile_rank percentile_ranks pct_rank pct_ranks "
404
440
  "values unique cardinality",
@@ -416,7 +452,7 @@ class TQLGrammar:
416
452
  + Suppress("(")
417
453
  + self.field_name
418
454
  + PyparsingOptional(
419
- Suppress(",") + (oneOf("top bottom", caseless=True) + self.number | delimitedList(self.number))
455
+ Suppress(",") + (one_of("top bottom", caseless=True) + self.number | DelimitedList(self.number))
420
456
  )
421
457
  + Suppress(")")
422
458
  )
@@ -429,16 +465,16 @@ class TQLGrammar:
429
465
  self.agg_with_alias = Group(self.agg_function + PyparsingOptional(self.as_kw + self.identifier))
430
466
 
431
467
  # Multiple aggregations separated by commas
432
- self.agg_list = delimitedList(self.agg_with_alias)
468
+ self.agg_list = DelimitedList(self.agg_with_alias)
433
469
 
434
470
  # Group by fields with optional "top N" for each field
435
471
  self.top_kw = CaselessKeyword("top")
436
472
  self.group_by_field_with_bucket = Group(self.field_name + PyparsingOptional(self.top_kw + self.number))
437
- self.group_by_fields = delimitedList(self.group_by_field_with_bucket)
473
+ self.group_by_fields = DelimitedList(self.group_by_field_with_bucket)
438
474
 
439
475
  # Visualization hint: => chart_type
440
476
  self.viz_arrow = Literal("=>")
441
- self.viz_types = oneOf(
477
+ self.viz_types = one_of(
442
478
  "bar barchart line area pie donut scatter heatmap treemap sunburst "
443
479
  "table number gauge map grouped_bar stacked_bar nested_pie nested_donut chord",
444
480
  caseless=True,
@@ -486,7 +522,7 @@ class TQLGrammar:
486
522
  self.base_expr = self.geo_mutator_expr | self.nslookup_mutator_expr | self.comparison_expr
487
523
 
488
524
  # Define filter expression with operator precedence
489
- self.filter_expr = infixNotation(
525
+ self.filter_expr = infix_notation(
490
526
  self.base_expr,
491
527
  [
492
528
  (self.not_kw, 1, opAssoc.RIGHT),
@@ -508,7 +544,7 @@ class TQLGrammar:
508
544
  )
509
545
 
510
546
  # Define geo_conditions and nslookup_conditions
511
- self.geo_conditions << infixNotation(
547
+ self.geo_conditions << infix_notation(
512
548
  self.comparison_expr,
513
549
  [
514
550
  (self.not_kw, 1, opAssoc.RIGHT),
@@ -517,7 +553,7 @@ class TQLGrammar:
517
553
  ],
518
554
  )
519
555
 
520
- self.nslookup_conditions << infixNotation(
556
+ self.nslookup_conditions << infix_notation(
521
557
  self.comparison_expr,
522
558
  [
523
559
  (self.not_kw, 1, opAssoc.RIGHT),