tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/METADATA +1 -1
- {tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/RECORD +18 -18
- tql/__init__.py +1 -1
- tql/cache/base.py +79 -7
- tql/cache/memory.py +126 -18
- tql/core.py +19 -2
- tql/core_components/opensearch_operations.py +23 -4
- tql/evaluator.py +2 -0
- tql/evaluator_components/value_comparison.py +74 -12
- tql/mutator_analyzer.py +2 -2
- tql/opensearch_stats.py +3 -1
- tql/parser.py +52 -21
- tql/post_processor.py +39 -10
- tql/scripts.py +19 -2
- tql/stats_evaluator.py +5 -3
- {tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/LICENSE +0 -0
- {tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/WHEEL +0 -0
- {tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
tql/__init__.py,sha256=
|
|
1
|
+
tql/__init__.py,sha256=eqti5Fmu8EjD-NbCwqb1UKGJQ0OpWJLREsVMqqn6Hs4,1260
|
|
2
2
|
tql/analyzer.py,sha256=Sfzj6f7YzqylT8HIL9hDbXdhl0lf8q8DNoafrxkD-F8,15456
|
|
3
3
|
tql/cache/__init__.py,sha256=GIzIEMZUZEYJj72sAhuVLEG-OJEKUG2srUWNM3Ix-T8,213
|
|
4
|
-
tql/cache/base.py,sha256=
|
|
5
|
-
tql/cache/memory.py,sha256=
|
|
4
|
+
tql/cache/base.py,sha256=CwLpobv4WR7WSz99JVWAHNn-XQTtqr38Yg5UiWNXPiA,3192
|
|
5
|
+
tql/cache/memory.py,sha256=1kDfyODBCDi5UvSP_f_7UhHVmug-sLnrsMYvm4C4cxo,5978
|
|
6
6
|
tql/cache/redis.py,sha256=ZU_IsVDvpSYpNvPfnZ4iulJDODpEGx3c4dkXLzPzPVc,2309
|
|
7
|
-
tql/core.py,sha256=
|
|
7
|
+
tql/core.py,sha256=ZNtxNJSZqotJ-3z9kg8Nc2lKDoeR9Bspe8k7VrXp0Ow,49363
|
|
8
8
|
tql/core_components/README.md,sha256=Rm7w4UHdQ0vPBEFybE5b62IOvSA5Nzq2GRvtBHOapmc,3068
|
|
9
9
|
tql/core_components/__init__.py,sha256=v8BBybPlqV7dkVY9mw1mblvqyAFJZ7Pf_bEc-jAL7FI,643
|
|
10
10
|
tql/core_components/file_operations.py,sha256=Jr0kkxz_OP2KHOAsIr7KMtYe_lbu8LuBUySt2LQbjJw,3925
|
|
11
|
-
tql/core_components/opensearch_operations.py,sha256=
|
|
11
|
+
tql/core_components/opensearch_operations.py,sha256=zgxGiDpXyPW0ZUX-StpZXxf84s8eLxSymAGM5UUJimk,55253
|
|
12
12
|
tql/core_components/stats_operations.py,sha256=aqTGAqIFvR6EkSbJEd0qft8Ldy8uiTrK2XI9o5bZUOs,8014
|
|
13
13
|
tql/core_components/validation_operations.py,sha256=_VPXh0HABBjsXF99jFT7B6-5QAPsADOCy6poinGrxeE,22454
|
|
14
|
-
tql/evaluator.py,sha256=
|
|
14
|
+
tql/evaluator.py,sha256=W2PbD0umxKORlb9npPgg985I7eYxc9QsXUtiZsgOPCk,17889
|
|
15
15
|
tql/evaluator_components/README.md,sha256=c59yf2au34yPhrru7JWgGop_ORteB6w5vfMhsac8j3k,3882
|
|
16
16
|
tql/evaluator_components/__init__.py,sha256=DourRUSYXWPnCghBFj7W0YfMeymT3X8YTDCwnLIyP1c,535
|
|
17
17
|
tql/evaluator_components/field_access.py,sha256=BuXvL9jlv4H77neT70Vh7_qokmzs-d4EbSDA2FB1IT0,6435
|
|
18
18
|
tql/evaluator_components/special_expressions.py,sha256=K6M5pW4Re2kEqxfxj9sc7I_M1tU3pn6LKJ2AfjHeciA,12917
|
|
19
|
-
tql/evaluator_components/value_comparison.py,sha256=
|
|
19
|
+
tql/evaluator_components/value_comparison.py,sha256=Woo8bNwIARdfe7F7ApYD5xi4NrrPwhdem_5ZKSik-t4,21369
|
|
20
20
|
tql/exceptions.py,sha256=hatIixXci6p57J9RrkfdvmKM_2i-JKb8ViL2kU4z7a8,5550
|
|
21
21
|
tql/geoip_normalizer.py,sha256=tvie-5xevJEeLp2KmjoXDjYdND8AvyVE7lCO8qgUzGY,10486
|
|
22
|
-
tql/mutator_analyzer.py,sha256=
|
|
22
|
+
tql/mutator_analyzer.py,sha256=OWx3k5lK5aFHWU9Ez6DaIhenEZDxj9CbB0vM71xqUTw,55670
|
|
23
23
|
tql/mutators/__init__.py,sha256=eTK8sRw4KXXnTZTn5ETIqwcaIek5rSUIVyZsxTwNNHA,6966
|
|
24
24
|
tql/mutators/base.py,sha256=4Ze_x1sTO11OILXfcF2XN7ttyHcZ4gwn96UXFMMaC6M,2523
|
|
25
25
|
tql/mutators/dns.py,sha256=1IKgHolFLRMR4TOgK0AiLjz5vDtFiqO328mVF4Vzk3s,14428
|
|
@@ -36,21 +36,21 @@ tql/opensearch_components/field_mapping.py,sha256=fj388cKVyDXLJKi8giSiGHL9zg4cFR
|
|
|
36
36
|
tql/opensearch_components/lucene_converter.py,sha256=OvYTZHNBktPGow1fsVm4TMlvxHSmWrnqo42lFZNxXTo,13175
|
|
37
37
|
tql/opensearch_components/query_converter.py,sha256=vLoBqv7W3ntqUH6hcuT4PDJkGkAGSQCxMvAWC482c0g,41971
|
|
38
38
|
tql/opensearch_mappings.py,sha256=sVLlQlE3eGD7iNNZ_m4F4j5GVzQAJhZyCqDKYRhLRh8,11531
|
|
39
|
-
tql/opensearch_stats.py,sha256=
|
|
40
|
-
tql/parser.py,sha256=
|
|
39
|
+
tql/opensearch_stats.py,sha256=l1VsHp1hFzsz8VFFD42M4xwlVgKkjptLs8TFB0wqqRw,24478
|
|
40
|
+
tql/parser.py,sha256=t1bpL1hrHVpLZKeEP_DxMszlYUbh7QiudFDYxZtlc5s,80286
|
|
41
41
|
tql/parser_components/README.md,sha256=lvQX72ckq2zyotGs8QIHHCIFqaA7bOHwkP44wU8Zoiw,2322
|
|
42
42
|
tql/parser_components/__init__.py,sha256=zBwHBMPJyHSBbaOojf6qTrJYjJg5A6tPUE8nHFdRiQs,521
|
|
43
43
|
tql/parser_components/ast_builder.py,sha256=erHoeKAMzobswoRIXB9xcsZbzQ5-2ZwaYfQgRWoUAa8,9653
|
|
44
44
|
tql/parser_components/error_analyzer.py,sha256=qlCD9vKyW73aeKQYI33P1OjIWSJ3LPd08wuN9cis2fU,4012
|
|
45
45
|
tql/parser_components/field_extractor.py,sha256=eUEkmiYWX2OexanFqhHeX8hcIkRlfIcgMB667e0HRYs,4629
|
|
46
46
|
tql/parser_components/grammar.py,sha256=h58RBshZHXgbP1EmNwmf7dny-fgVloNg-qN4Rivross,20599
|
|
47
|
-
tql/post_processor.py,sha256=
|
|
48
|
-
tql/scripts.py,sha256=
|
|
49
|
-
tql/stats_evaluator.py,sha256=
|
|
47
|
+
tql/post_processor.py,sha256=hItSj1VaQwCf2rbRQzLaMfAWR5RqMTpvDP-_X0HVhVo,51697
|
|
48
|
+
tql/scripts.py,sha256=2iryuAWqIxYoFA7R5hIzC6NE72ihTH9kIZd1-thKPTM,4331
|
|
49
|
+
tql/stats_evaluator.py,sha256=xJoTaBCBiKzQ0HuITGhTA41dVpeDhIMU9EMguCh_VG0,22427
|
|
50
50
|
tql/stats_transformer.py,sha256=MT-4rDWZSySgn4Fuq9H0c-mvwFYLM6FqWpPv2rHX-rE,7588
|
|
51
51
|
tql/validators.py,sha256=e9MlX-zQ_O3M8YP8vXyMjKU8iiJMTh6mMK0iv0_4gTY,3771
|
|
52
|
-
tellaro_query_language-0.2.
|
|
53
|
-
tellaro_query_language-0.2.
|
|
54
|
-
tellaro_query_language-0.2.
|
|
55
|
-
tellaro_query_language-0.2.
|
|
56
|
-
tellaro_query_language-0.2.
|
|
52
|
+
tellaro_query_language-0.2.3.dist-info/LICENSE,sha256=zRhQ85LnW55fWgAjQctckwQ67DX5Jmt64lq343ThZFU,1063
|
|
53
|
+
tellaro_query_language-0.2.3.dist-info/METADATA,sha256=jRsW9n1nhyQb7m0qZd4F7h-q_ZoH_WzfVcCJMzC9P3c,15740
|
|
54
|
+
tellaro_query_language-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
55
|
+
tellaro_query_language-0.2.3.dist-info/entry_points.txt,sha256=H43APfGBMsZkKsUCnFTaqprQPW-Kce2yz2qsBL3dZrw,164
|
|
56
|
+
tellaro_query_language-0.2.3.dist-info/RECORD,,
|
tql/__init__.py
CHANGED
tql/cache/base.py
CHANGED
|
@@ -1,25 +1,97 @@
|
|
|
1
|
-
"""Base cache infrastructure.
|
|
1
|
+
"""Base cache infrastructure for TQL.
|
|
2
|
+
|
|
3
|
+
This module provides the base CacheManager class that defines the caching
|
|
4
|
+
interface used throughout TQL. Concrete implementations include LocalCacheManager
|
|
5
|
+
for in-memory caching and RedisCacheManager for distributed caching.
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from typing import Any, Dict, Optional
|
|
4
9
|
|
|
5
10
|
|
|
6
11
|
class CacheManager:
|
|
7
|
-
"""Base class for cache management.
|
|
12
|
+
"""Base class for cache management.
|
|
13
|
+
|
|
14
|
+
This class defines the interface for all cache implementations in TQL.
|
|
15
|
+
Subclasses should override these methods to provide actual caching functionality.
|
|
16
|
+
|
|
17
|
+
The base implementation provides no-op defaults that can be safely used when
|
|
18
|
+
caching is disabled or not needed.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> cache = LocalCacheManager()
|
|
22
|
+
>>> cache.set("user:123", {"name": "Alice", "age": 30}, ttl=3600)
|
|
23
|
+
>>> user = cache.get("user:123")
|
|
24
|
+
>>> cache.delete("user:123")
|
|
25
|
+
"""
|
|
8
26
|
|
|
9
27
|
def get(self, key: str) -> Optional[Any]:
|
|
10
|
-
"""Retrieve value from cache.
|
|
28
|
+
"""Retrieve a value from the cache.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
key: The cache key to look up. Should be a string identifier.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The cached value if it exists and hasn't expired, None otherwise.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
>>> value = cache.get("my_key")
|
|
38
|
+
>>> if value is not None:
|
|
39
|
+
... print(f"Found: {value}")
|
|
40
|
+
"""
|
|
11
41
|
return None
|
|
12
42
|
|
|
13
43
|
def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
14
|
-
"""Store value in cache.
|
|
44
|
+
"""Store a value in the cache.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
key: The cache key under which to store the value.
|
|
48
|
+
value: The value to cache. Can be any Python object.
|
|
49
|
+
ttl: Time-to-live in seconds. If None or 0, the value never expires.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> cache.set("config", {"debug": True}, ttl=300) # Cache for 5 minutes
|
|
53
|
+
>>> cache.set("permanent", {"version": "1.0"}) # Never expires
|
|
54
|
+
"""
|
|
15
55
|
|
|
16
56
|
def delete(self, key: str) -> None:
|
|
17
|
-
"""Remove value from cache.
|
|
57
|
+
"""Remove a value from the cache.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
key: The cache key to delete.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> cache.delete("expired_key")
|
|
64
|
+
"""
|
|
18
65
|
|
|
19
66
|
def clear_pattern(self, pattern: str) -> int: # pylint: disable=unused-argument
|
|
20
|
-
"""Clear all keys matching pattern.
|
|
67
|
+
"""Clear all keys matching a pattern.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
pattern: A pattern string to match keys. Format depends on implementation.
|
|
71
|
+
For Redis: supports wildcards like "user:*" or "session:?123"
|
|
72
|
+
For Local: basic string matching
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The number of keys that were deleted.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
>>> count = cache.clear_pattern("temp:*")
|
|
79
|
+
>>> print(f"Cleared {count} temporary keys")
|
|
80
|
+
"""
|
|
21
81
|
return 0
|
|
22
82
|
|
|
23
83
|
def get_stats(self) -> Dict[str, Any]:
|
|
24
|
-
"""Get cache statistics.
|
|
84
|
+
"""Get cache statistics and metrics.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Dictionary containing cache statistics such as:
|
|
88
|
+
- hit_rate: Cache hit rate percentage
|
|
89
|
+
- miss_rate: Cache miss rate percentage
|
|
90
|
+
- size: Number of items in cache
|
|
91
|
+
- memory_usage: Memory used by cache (if available)
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
>>> stats = cache.get_stats()
|
|
95
|
+
>>> print(f"Hit rate: {stats.get('hit_rate', 0)}%")
|
|
96
|
+
"""
|
|
25
97
|
return {}
|
tql/cache/memory.py
CHANGED
|
@@ -1,63 +1,171 @@
|
|
|
1
|
-
"""In-memory cache implementation.
|
|
1
|
+
"""In-memory cache implementation for TQL.
|
|
2
|
+
|
|
3
|
+
This module provides a simple in-memory cache with TTL (time-to-live) support
|
|
4
|
+
and basic LRU (Least Recently Used) eviction when the cache reaches its size limit.
|
|
5
|
+
"""
|
|
2
6
|
|
|
3
7
|
import time
|
|
4
|
-
from typing import Any, Dict, Optional
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
5
9
|
|
|
6
10
|
from .base import CacheManager
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class LocalCacheManager(CacheManager):
|
|
10
|
-
"""Local in-memory cache
|
|
14
|
+
"""Local in-memory cache with TTL and LRU eviction.
|
|
15
|
+
|
|
16
|
+
This implementation provides thread-safe in-memory caching suitable for
|
|
17
|
+
single-process applications. For distributed caching across multiple
|
|
18
|
+
processes or servers, use RedisCacheManager instead.
|
|
19
|
+
|
|
20
|
+
Features:
|
|
21
|
+
- TTL-based expiration
|
|
22
|
+
- LRU eviction when cache is full
|
|
23
|
+
- Hit/miss statistics tracking
|
|
24
|
+
- Pattern-based key clearing
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
max_size: Maximum number of items to store (default: 10000)
|
|
28
|
+
default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> cache = LocalCacheManager(max_size=1000, default_ttl=600)
|
|
32
|
+
>>> cache.set("user:123", {"name": "Alice"}, ttl=300)
|
|
33
|
+
>>> user = cache.get("user:123")
|
|
34
|
+
>>> stats = cache.get_stats()
|
|
35
|
+
>>> print(f"Hit rate: {stats['hit_rate']:.2%}")
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
max_size: Maximum cache size
|
|
39
|
+
default_ttl: Default TTL for cached items
|
|
40
|
+
"""
|
|
11
41
|
|
|
12
42
|
def __init__(self, max_size: int = 10000, default_ttl: int = 3600):
|
|
43
|
+
"""Initialize the local cache.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
max_size: Maximum number of items to cache before eviction starts.
|
|
47
|
+
default_ttl: Default expiration time in seconds for cached items.
|
|
48
|
+
"""
|
|
13
49
|
self.max_size = max_size
|
|
14
50
|
self.default_ttl = default_ttl
|
|
15
|
-
self._cache: Dict[str,
|
|
51
|
+
self._cache: Dict[str, Any] = {}
|
|
52
|
+
self._expiry: Dict[str, float] = {}
|
|
16
53
|
self._hits = 0
|
|
17
54
|
self._misses = 0
|
|
18
55
|
|
|
19
56
|
def get(self, key: str) -> Optional[Any]:
|
|
20
|
-
"""Retrieve value from cache if not expired.
|
|
57
|
+
"""Retrieve value from cache if not expired.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
key: The cache key to retrieve.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The cached value if present and not expired, None otherwise.
|
|
64
|
+
|
|
65
|
+
Note:
|
|
66
|
+
This method automatically removes expired keys when accessed.
|
|
67
|
+
Hit/miss statistics are updated on each call.
|
|
68
|
+
"""
|
|
21
69
|
if key in self._cache:
|
|
22
|
-
|
|
23
|
-
if expiry > time.time():
|
|
70
|
+
expiry = self._expiry.get(key, float('inf'))
|
|
71
|
+
if expiry == 0 or expiry > time.time():
|
|
24
72
|
self._hits += 1
|
|
25
|
-
return
|
|
73
|
+
return self._cache[key]
|
|
26
74
|
else:
|
|
27
|
-
# Expired
|
|
75
|
+
# Expired - clean up
|
|
28
76
|
del self._cache[key]
|
|
77
|
+
del self._expiry[key]
|
|
29
78
|
self._misses += 1
|
|
30
79
|
return None
|
|
31
80
|
|
|
32
81
|
def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
33
|
-
"""Store value in cache with TTL.
|
|
34
|
-
|
|
35
|
-
|
|
82
|
+
"""Store value in cache with optional TTL.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
key: The cache key under which to store the value.
|
|
86
|
+
value: The value to cache (any Python object).
|
|
87
|
+
ttl: Time-to-live in seconds. If None, uses default_ttl.
|
|
88
|
+
If 0, the item never expires.
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
When the cache is full (reaches max_size), the oldest item
|
|
92
|
+
is evicted to make room for the new one (LRU eviction).
|
|
93
|
+
"""
|
|
94
|
+
if len(self._cache) >= self.max_size and key not in self._cache:
|
|
95
|
+
# Simple eviction: remove oldest (first in dict)
|
|
36
96
|
oldest_key = next(iter(self._cache))
|
|
37
97
|
del self._cache[oldest_key]
|
|
98
|
+
self._expiry.pop(oldest_key, None)
|
|
38
99
|
|
|
39
|
-
|
|
40
|
-
|
|
100
|
+
self._cache[key] = value
|
|
101
|
+
if ttl == 0:
|
|
102
|
+
# Never expires
|
|
103
|
+
self._expiry[key] = 0
|
|
104
|
+
else:
|
|
105
|
+
expiry_time = time.time() + (ttl if ttl is not None else self.default_ttl)
|
|
106
|
+
self._expiry[key] = expiry_time
|
|
41
107
|
|
|
42
108
|
def delete(self, key: str) -> None:
|
|
43
|
-
"""Remove value from cache.
|
|
109
|
+
"""Remove value from cache.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
key: The cache key to delete.
|
|
113
|
+
|
|
114
|
+
Note:
|
|
115
|
+
If the key doesn't exist, this method does nothing (no error raised).
|
|
116
|
+
"""
|
|
44
117
|
self._cache.pop(key, None)
|
|
118
|
+
self._expiry.pop(key, None)
|
|
45
119
|
|
|
46
120
|
def clear_pattern(self, pattern: str) -> int:
|
|
47
|
-
"""Clear all keys matching pattern.
|
|
121
|
+
"""Clear all keys matching a glob pattern.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
pattern: A glob pattern to match keys. Supports wildcards:
|
|
125
|
+
- '*' matches any sequence of characters
|
|
126
|
+
- '?' matches any single character
|
|
127
|
+
- '[seq]' matches any character in seq
|
|
128
|
+
- '[!seq]' matches any character not in seq
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
The number of keys that were deleted.
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> cache.set("user:123", data1)
|
|
135
|
+
>>> cache.set("user:456", data2)
|
|
136
|
+
>>> cache.set("session:789", data3)
|
|
137
|
+
>>> count = cache.clear_pattern("user:*") # Deletes user:123 and user:456
|
|
138
|
+
>>> print(count) # 2
|
|
139
|
+
"""
|
|
48
140
|
import fnmatch
|
|
49
141
|
|
|
50
142
|
keys_to_delete = [k for k in self._cache.keys() if fnmatch.fnmatch(k, pattern)]
|
|
51
143
|
for key in keys_to_delete:
|
|
52
144
|
del self._cache[key]
|
|
145
|
+
self._expiry.pop(key, None)
|
|
53
146
|
return len(keys_to_delete)
|
|
54
147
|
|
|
55
148
|
def get_stats(self) -> Dict[str, Any]:
|
|
56
|
-
"""Get cache statistics.
|
|
149
|
+
"""Get cache performance statistics.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Dictionary containing:
|
|
153
|
+
- hits: Number of successful cache retrievals
|
|
154
|
+
- misses: Number of cache misses
|
|
155
|
+
- hit_rate: Ratio of hits to total requests (0.0 to 1.0)
|
|
156
|
+
- size: Current number of items in cache
|
|
157
|
+
- max_size: Maximum cache capacity
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
>>> stats = cache.get_stats()
|
|
161
|
+
>>> print(f"Cache is {stats['hit_rate']:.2%} effective")
|
|
162
|
+
>>> print(f"Using {stats['size']}/{stats['max_size']} slots")
|
|
163
|
+
"""
|
|
164
|
+
total_requests = self._hits + self._misses
|
|
57
165
|
return {
|
|
58
166
|
"hits": self._hits,
|
|
59
167
|
"misses": self._misses,
|
|
60
|
-
"hit_rate": self._hits /
|
|
168
|
+
"hit_rate": self._hits / total_requests if total_requests > 0 else 0.0,
|
|
61
169
|
"size": len(self._cache),
|
|
62
170
|
"max_size": self.max_size,
|
|
63
171
|
}
|
tql/core.py
CHANGED
|
@@ -100,8 +100,25 @@ class TQL:
|
|
|
100
100
|
# This is an OpenSearch-style mapping, map field to itself
|
|
101
101
|
self._simple_mappings[k] = k
|
|
102
102
|
else:
|
|
103
|
-
#
|
|
104
|
-
|
|
103
|
+
# Intelligent field mapping extraction for complex mappings
|
|
104
|
+
# Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
|
|
105
|
+
|
|
106
|
+
if k in v:
|
|
107
|
+
# Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
|
|
108
|
+
self._simple_mappings[k] = k
|
|
109
|
+
else:
|
|
110
|
+
# Find primary field (keys without dots, not starting with underscore)
|
|
111
|
+
primary_fields = [
|
|
112
|
+
field_key for field_key in v.keys()
|
|
113
|
+
if '.' not in field_key and not field_key.startswith('_')
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
if primary_fields:
|
|
117
|
+
# Use first primary field
|
|
118
|
+
self._simple_mappings[k] = primary_fields[0]
|
|
119
|
+
else:
|
|
120
|
+
# Fallback to first key (maintain backward compatibility)
|
|
121
|
+
self._simple_mappings[k] = next(iter(v.keys()))
|
|
105
122
|
else:
|
|
106
123
|
# Default to mapping field to itself
|
|
107
124
|
self._simple_mappings[k] = k
|
|
@@ -239,7 +239,7 @@ class OpenSearchOperations:
|
|
|
239
239
|
analysis_result = self.analyze_opensearch_query(query)
|
|
240
240
|
has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
|
|
241
241
|
needs_post_processing_for_stats = (
|
|
242
|
-
has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
|
|
242
|
+
has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False # type: ignore[union-attr]
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
# Handle stats queries differently
|
|
@@ -258,7 +258,7 @@ class OpenSearchOperations:
|
|
|
258
258
|
if filter_ast:
|
|
259
259
|
# Use the optimized AST if we have mutators
|
|
260
260
|
if has_mutators and needs_post_processing_for_stats:
|
|
261
|
-
filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
|
|
261
|
+
filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"] # type: ignore[union-attr]
|
|
262
262
|
else:
|
|
263
263
|
filter_query = backend.convert(filter_ast)["query"]
|
|
264
264
|
else:
|
|
@@ -529,6 +529,8 @@ class OpenSearchOperations:
|
|
|
529
529
|
stats_evaluator = TQLStatsEvaluator()
|
|
530
530
|
|
|
531
531
|
# Execute the stats aggregation in memory
|
|
532
|
+
if stats_ast_for_post_processing is None:
|
|
533
|
+
raise ValueError("Stats AST is None but phase2 processing was requested")
|
|
532
534
|
stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
|
|
533
535
|
|
|
534
536
|
# Format response for stats-only (no documents)
|
|
@@ -547,7 +549,7 @@ class OpenSearchOperations:
|
|
|
547
549
|
"performance_impact": {
|
|
548
550
|
"overhead_ms": 0, # Would need timing to calculate
|
|
549
551
|
"documents_processed": len(all_documents),
|
|
550
|
-
"mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
|
|
552
|
+
"mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0, # type: ignore[union-attr]
|
|
551
553
|
},
|
|
552
554
|
"opensearch_query": complete_opensearch_query,
|
|
553
555
|
}
|
|
@@ -580,6 +582,8 @@ class OpenSearchOperations:
|
|
|
580
582
|
translator = OpenSearchStatsTranslator()
|
|
581
583
|
|
|
582
584
|
# Transform the response using the translator
|
|
585
|
+
if stats_ast is None:
|
|
586
|
+
raise ValueError("Stats AST is None but grouping was detected")
|
|
583
587
|
transformed_response = translator.transform_response(response, stats_ast)
|
|
584
588
|
|
|
585
589
|
# The transformed response already has the correct structure
|
|
@@ -925,6 +929,21 @@ class OpenSearchOperations:
|
|
|
925
929
|
# Get opensearch total before filtering
|
|
926
930
|
opensearch_total = total_hits
|
|
927
931
|
|
|
932
|
+
# Track optimization features used in this query
|
|
933
|
+
optimizations_applied = []
|
|
934
|
+
if scan_all:
|
|
935
|
+
optimizations_applied.append("scroll_api")
|
|
936
|
+
if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
|
|
937
|
+
optimizations_applied.append("auto_pagination")
|
|
938
|
+
if request_cache:
|
|
939
|
+
optimizations_applied.append("request_cache")
|
|
940
|
+
if preference:
|
|
941
|
+
optimizations_applied.append("preference_routing")
|
|
942
|
+
if routing:
|
|
943
|
+
optimizations_applied.append("custom_routing")
|
|
944
|
+
if terminate_after:
|
|
945
|
+
optimizations_applied.append("early_termination")
|
|
946
|
+
|
|
928
947
|
result = {
|
|
929
948
|
"results": results,
|
|
930
949
|
"total": len(results),
|
|
@@ -934,7 +953,7 @@ class OpenSearchOperations:
|
|
|
934
953
|
"health_status": health_status,
|
|
935
954
|
"health_reasons": health_reasons,
|
|
936
955
|
"performance_impact": performance_impact,
|
|
937
|
-
"optimizations_applied":
|
|
956
|
+
"optimizations_applied": optimizations_applied,
|
|
938
957
|
"opensearch_query": (
|
|
939
958
|
complete_opensearch_query if "complete_opensearch_query" in locals() else {}
|
|
940
959
|
), # Include the full query body
|
tql/evaluator.py
CHANGED
|
@@ -350,6 +350,8 @@ class TQLEvaluator:
|
|
|
350
350
|
return left_missing or right_missing
|
|
351
351
|
elif node_type == "unary_op":
|
|
352
352
|
# Don't recurse through NOT operators - they handle missing fields themselves
|
|
353
|
+
# The NOT operator has special logic at lines 213-254 that handles missing fields correctly
|
|
354
|
+
# Recursing here would cause double-handling and incorrect results
|
|
353
355
|
return False
|
|
354
356
|
elif node_type == "collection_op":
|
|
355
357
|
field_name = node["field"]
|
|
@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
|
|
|
6
6
|
|
|
7
7
|
import ipaddress
|
|
8
8
|
import re
|
|
9
|
+
from functools import lru_cache
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
|
|
@@ -15,6 +16,23 @@ class ValueComparator:
|
|
|
15
16
|
# Sentinel value to distinguish missing fields from None values
|
|
16
17
|
_MISSING_FIELD = object()
|
|
17
18
|
|
|
19
|
+
@staticmethod
|
|
20
|
+
@lru_cache(maxsize=256)
|
|
21
|
+
def _compile_regex(pattern: str) -> re.Pattern:
|
|
22
|
+
"""Compile and cache regex patterns for performance.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
pattern: Regex pattern string
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Compiled regex pattern
|
|
29
|
+
|
|
30
|
+
Note:
|
|
31
|
+
Uses LRU cache with max 256 patterns. This significantly improves
|
|
32
|
+
performance when the same regex patterns are used repeatedly in queries.
|
|
33
|
+
"""
|
|
34
|
+
return re.compile(pattern)
|
|
35
|
+
|
|
18
36
|
def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool: # noqa: C901
|
|
19
37
|
"""Compare a field value against an expected value using the given operator.
|
|
20
38
|
|
|
@@ -49,9 +67,17 @@ class ValueComparator:
|
|
|
49
67
|
return False
|
|
50
68
|
|
|
51
69
|
# Handle None field values (field exists but is None)
|
|
70
|
+
# IMPORTANT: None is a valid value, distinct from missing fields.
|
|
71
|
+
# For 'exists' operator: This code path should NOT be reached because 'exists'
|
|
72
|
+
# checks field presence in the record, not the value. The evaluator handles
|
|
73
|
+
# 'exists' before calling compare_values. If we reach here with None, it means
|
|
74
|
+
# the field exists but has None value, which should NOT match 'exists'.
|
|
52
75
|
if field_value is None:
|
|
53
76
|
if operator in ["exists"]:
|
|
54
|
-
|
|
77
|
+
# Field key exists in record but value is None
|
|
78
|
+
# Semantics: 'exists' means "field has a non-null value"
|
|
79
|
+
# This matches database behavior where NULL != EXISTS
|
|
80
|
+
return False # None value does not satisfy 'exists'
|
|
55
81
|
elif operator in ["is"]:
|
|
56
82
|
# Check for null comparison - expected_value can be None or "null"
|
|
57
83
|
return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
|
|
@@ -59,6 +85,10 @@ class ValueComparator:
|
|
|
59
85
|
return False
|
|
60
86
|
|
|
61
87
|
# Convert numeric strings to numbers for comparison
|
|
88
|
+
# IMPORTANT: Store original values to check if conversion succeeded
|
|
89
|
+
field_value_original = field_value
|
|
90
|
+
expected_value_original = expected_value
|
|
91
|
+
|
|
62
92
|
field_value = self._convert_numeric(field_value)
|
|
63
93
|
expected_value = self._convert_numeric(expected_value)
|
|
64
94
|
|
|
@@ -68,6 +98,20 @@ class ValueComparator:
|
|
|
68
98
|
if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
|
|
69
99
|
field_value = field_value.lower() == "true"
|
|
70
100
|
|
|
101
|
+
# Type compatibility check for numeric operators
|
|
102
|
+
# If operator requires numeric comparison, both values must be numeric
|
|
103
|
+
# Exception: Arrays are handled specially in the operator logic below
|
|
104
|
+
if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
|
|
105
|
+
# Skip check if field_value is an array - handled by array logic below
|
|
106
|
+
if not isinstance(field_value, (list, tuple)):
|
|
107
|
+
field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
|
|
108
|
+
expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
|
|
109
|
+
|
|
110
|
+
if not (field_is_numeric and expected_is_numeric):
|
|
111
|
+
# At least one value failed numeric conversion
|
|
112
|
+
# Cannot perform numeric comparison - return False
|
|
113
|
+
return False
|
|
114
|
+
|
|
71
115
|
try:
|
|
72
116
|
if operator in ["eq", "="]:
|
|
73
117
|
# Handle array fields - check if ANY element equals expected value
|
|
@@ -104,27 +148,30 @@ class ValueComparator:
|
|
|
104
148
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
105
149
|
expected_value = expected_value[0]
|
|
106
150
|
# Handle list fields by checking if ANY element contains the expected value
|
|
151
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
107
152
|
if isinstance(field_value, list):
|
|
108
153
|
# For arrays, check if ANY element contains the expected value
|
|
109
|
-
return any(str(expected_value) in str(elem) for elem in field_value)
|
|
154
|
+
return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
|
|
110
155
|
else:
|
|
111
|
-
return str(expected_value) in str(field_value)
|
|
156
|
+
return str(expected_value).lower() in str(field_value).lower()
|
|
112
157
|
elif operator == "startswith":
|
|
113
158
|
# Unwrap single-element lists for string operators
|
|
114
159
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
115
160
|
expected_value = expected_value[0]
|
|
116
161
|
# Handle array fields - check if ANY element starts with expected value
|
|
162
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
117
163
|
if isinstance(field_value, (list, tuple)):
|
|
118
|
-
return any(str(elem).startswith(str(expected_value)) for elem in field_value)
|
|
119
|
-
return str(field_value).startswith(str(expected_value))
|
|
164
|
+
return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
|
|
165
|
+
return str(field_value).lower().startswith(str(expected_value).lower())
|
|
120
166
|
elif operator == "endswith":
|
|
121
167
|
# Unwrap single-element lists for string operators
|
|
122
168
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
123
169
|
expected_value = expected_value[0]
|
|
124
170
|
# Handle array fields - check if ANY element ends with expected value
|
|
171
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
125
172
|
if isinstance(field_value, (list, tuple)):
|
|
126
|
-
return any(str(elem).endswith(str(expected_value)) for elem in field_value)
|
|
127
|
-
return str(field_value).endswith(str(expected_value))
|
|
173
|
+
return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
|
|
174
|
+
return str(field_value).lower().endswith(str(expected_value).lower())
|
|
128
175
|
elif operator == "in":
|
|
129
176
|
if isinstance(expected_value, list):
|
|
130
177
|
if len(expected_value) == 1 and isinstance(field_value, list):
|
|
@@ -143,7 +190,13 @@ class ValueComparator:
|
|
|
143
190
|
# Unwrap single-element lists for string operators
|
|
144
191
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
145
192
|
expected_value = expected_value[0]
|
|
146
|
-
|
|
193
|
+
# Use cached regex compilation for performance
|
|
194
|
+
try:
|
|
195
|
+
pattern = self._compile_regex(str(expected_value))
|
|
196
|
+
return bool(pattern.search(str(field_value)))
|
|
197
|
+
except (re.error, TypeError):
|
|
198
|
+
# Invalid regex pattern, fall back to no match
|
|
199
|
+
return False
|
|
147
200
|
elif operator == "cidr":
|
|
148
201
|
# Unwrap single-element lists for CIDR
|
|
149
202
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
@@ -194,22 +247,31 @@ class ValueComparator:
|
|
|
194
247
|
# Unwrap single-element lists for string operators
|
|
195
248
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
196
249
|
expected_value = expected_value[0]
|
|
197
|
-
|
|
250
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
251
|
+
return str(expected_value).lower() not in str(field_value).lower()
|
|
198
252
|
elif operator == "not_startswith":
|
|
199
253
|
# Unwrap single-element lists for string operators
|
|
200
254
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
201
255
|
expected_value = expected_value[0]
|
|
202
|
-
|
|
256
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
257
|
+
return not str(field_value).lower().startswith(str(expected_value).lower())
|
|
203
258
|
elif operator == "not_endswith":
|
|
204
259
|
# Unwrap single-element lists for string operators
|
|
205
260
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
206
261
|
expected_value = expected_value[0]
|
|
207
|
-
|
|
262
|
+
# Case-insensitive comparison to match post-processor behavior
|
|
263
|
+
return not str(field_value).lower().endswith(str(expected_value).lower())
|
|
208
264
|
elif operator == "not_regexp":
|
|
209
265
|
# Unwrap single-element lists for string operators
|
|
210
266
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
|
211
267
|
expected_value = expected_value[0]
|
|
212
|
-
|
|
268
|
+
# Use cached regex compilation for performance
|
|
269
|
+
try:
|
|
270
|
+
pattern = self._compile_regex(str(expected_value))
|
|
271
|
+
return not bool(pattern.search(str(field_value)))
|
|
272
|
+
except (re.error, TypeError):
|
|
273
|
+
# Invalid regex pattern, fall back to match (not regexp succeeds)
|
|
274
|
+
return True
|
|
213
275
|
elif operator == "not_cidr":
|
|
214
276
|
# Unwrap single-element lists for CIDR
|
|
215
277
|
if isinstance(expected_value, list) and len(expected_value) == 1:
|
tql/mutator_analyzer.py
CHANGED
|
@@ -491,7 +491,7 @@ class MutatorAnalyzer:
|
|
|
491
491
|
for param_name, param_value in geo_params.items():
|
|
492
492
|
mutator_params.append([param_name, param_value])
|
|
493
493
|
|
|
494
|
-
geo_mutator = {"name": "geoip_lookup"}
|
|
494
|
+
geo_mutator: Dict[str, Any] = {"name": "geoip_lookup"}
|
|
495
495
|
if mutator_params:
|
|
496
496
|
geo_mutator["params"] = mutator_params
|
|
497
497
|
|
|
@@ -539,7 +539,7 @@ class MutatorAnalyzer:
|
|
|
539
539
|
for param_name, param_value in nslookup_params.items():
|
|
540
540
|
mutator_params.append([param_name, param_value])
|
|
541
541
|
|
|
542
|
-
nslookup_mutator = {"name": "nslookup"}
|
|
542
|
+
nslookup_mutator: Dict[str, Any] = {"name": "nslookup"}
|
|
543
543
|
if mutator_params:
|
|
544
544
|
nslookup_mutator["params"] = mutator_params
|
|
545
545
|
|
tql/opensearch_stats.py
CHANGED
|
@@ -449,7 +449,7 @@ class OpenSearchStatsTranslator:
|
|
|
449
449
|
aggregations: List[Dict[str, Any]],
|
|
450
450
|
normalized_fields: List[Dict[str, Any]],
|
|
451
451
|
level: int,
|
|
452
|
-
) ->
|
|
452
|
+
) -> Union[Dict[str, Any], List[Dict[str, Any]], None]:
|
|
453
453
|
"""Transform a bucket recursively for multi-level grouping.
|
|
454
454
|
|
|
455
455
|
Args:
|
|
@@ -502,6 +502,8 @@ class OpenSearchStatsTranslator:
|
|
|
502
502
|
result["aggregations"][agg_key] = value
|
|
503
503
|
else:
|
|
504
504
|
# Handle nested buckets
|
|
505
|
+
if next_level_field is None:
|
|
506
|
+
return None
|
|
505
507
|
sub_buckets = bucket[next_level_field].get("buckets", [])
|
|
506
508
|
sub_results = []
|
|
507
509
|
print(
|
tql/parser.py
CHANGED
|
@@ -24,6 +24,9 @@ class TQLParser:
|
|
|
24
24
|
evaluated against data or converted to backend-specific query formats.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
+
# Maximum query depth to prevent stack overflow and DoS attacks
|
|
28
|
+
MAX_QUERY_DEPTH = 50
|
|
29
|
+
|
|
27
30
|
def __init__(self):
|
|
28
31
|
"""Initialize the parser with TQL grammar."""
|
|
29
32
|
self.grammar = TQLGrammar()
|
|
@@ -53,7 +56,8 @@ class TQLParser:
|
|
|
53
56
|
parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
|
|
54
57
|
|
|
55
58
|
# Convert to our AST format
|
|
56
|
-
|
|
59
|
+
# Start depth counting at 0 from parse() entry point
|
|
60
|
+
return self._build_ast(parsed_result.asList()[0], depth=0)
|
|
57
61
|
|
|
58
62
|
except ParseException as e:
|
|
59
63
|
# Extract position and context from pyparsing exception
|
|
@@ -114,15 +118,29 @@ class TQLParser:
|
|
|
114
118
|
# Extract fields using the field extractor
|
|
115
119
|
return self.field_extractor.extract_fields(ast)
|
|
116
120
|
|
|
117
|
-
def _build_ast(self, parsed: Any) -> Dict[str, Any]: # noqa: C901
|
|
121
|
+
def _build_ast(self, parsed: Any, depth: int = 0) -> Dict[str, Any]: # noqa: C901
|
|
118
122
|
"""Build AST from parsed pyparsing result.
|
|
119
123
|
|
|
120
124
|
Args:
|
|
121
125
|
parsed: The parsed result from pyparsing
|
|
126
|
+
depth: Current recursion depth (for DoS prevention)
|
|
122
127
|
|
|
123
128
|
Returns:
|
|
124
129
|
Dictionary representing the AST node
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
TQLSyntaxError: If query depth exceeds maximum allowed depth
|
|
125
133
|
"""
|
|
134
|
+
# Check depth limit to prevent stack overflow and DoS attacks
|
|
135
|
+
if depth > self.MAX_QUERY_DEPTH:
|
|
136
|
+
raise TQLSyntaxError(
|
|
137
|
+
f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
|
|
138
|
+
"Please simplify your query to reduce nesting.",
|
|
139
|
+
position=0,
|
|
140
|
+
query="",
|
|
141
|
+
suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"]
|
|
142
|
+
)
|
|
143
|
+
|
|
126
144
|
if isinstance(parsed, list):
|
|
127
145
|
if len(parsed) == 1:
|
|
128
146
|
# Single item, check if it's a field with is_private/is_global mutator
|
|
@@ -162,7 +180,7 @@ class TQLParser:
|
|
|
162
180
|
}
|
|
163
181
|
return result
|
|
164
182
|
# Single item, unwrap it
|
|
165
|
-
return self._build_ast(parsed[0])
|
|
183
|
+
return self._build_ast(parsed[0], depth + 1)
|
|
166
184
|
elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
|
|
167
185
|
# This is a stats expression without filter (applies to all records)
|
|
168
186
|
return self._build_stats_ast(parsed)
|
|
@@ -210,7 +228,7 @@ class TQLParser:
|
|
|
210
228
|
# Check for NOT operator first (before field | mutator check)
|
|
211
229
|
elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
|
|
212
230
|
# Unary logical operator (NOT or !)
|
|
213
|
-
return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
|
|
231
|
+
return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second, depth + 1)}
|
|
214
232
|
|
|
215
233
|
# Check for field | mutator without operator
|
|
216
234
|
# This happens when we have a field with mutator(s) as the last element
|
|
@@ -267,12 +285,12 @@ class TQLParser:
|
|
|
267
285
|
# This is filter | stats
|
|
268
286
|
return {
|
|
269
287
|
"type": "query_with_stats",
|
|
270
|
-
"filter": self._build_ast(first),
|
|
288
|
+
"filter": self._build_ast(first, depth + 1),
|
|
271
289
|
"stats": self._build_stats_ast(second),
|
|
272
290
|
}
|
|
273
291
|
else:
|
|
274
292
|
# Fallback to treating as unary logical operator
|
|
275
|
-
return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
|
|
293
|
+
return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second, depth + 1)}
|
|
276
294
|
elif len(parsed) >= 3:
|
|
277
295
|
# Check if this is a field with multiple mutators
|
|
278
296
|
if isinstance(parsed[0], str) and all(
|
|
@@ -419,7 +437,7 @@ class TQLParser:
|
|
|
419
437
|
"field": field_name,
|
|
420
438
|
"type_hint": type_hint,
|
|
421
439
|
"field_mutators": field_mutators,
|
|
422
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
440
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
423
441
|
}
|
|
424
442
|
|
|
425
443
|
# Add geo parameters if any
|
|
@@ -497,7 +515,7 @@ class TQLParser:
|
|
|
497
515
|
"field": field_name,
|
|
498
516
|
"type_hint": type_hint,
|
|
499
517
|
"field_mutators": field_mutators,
|
|
500
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
518
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
501
519
|
}
|
|
502
520
|
|
|
503
521
|
# Add nslookup parameters if any
|
|
@@ -638,7 +656,7 @@ class TQLParser:
|
|
|
638
656
|
"field": field_name,
|
|
639
657
|
"type_hint": type_hint,
|
|
640
658
|
"field_mutators": field_mutators,
|
|
641
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
659
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
642
660
|
}
|
|
643
661
|
|
|
644
662
|
# Add geo parameters if any
|
|
@@ -715,7 +733,7 @@ class TQLParser:
|
|
|
715
733
|
"field": field_name,
|
|
716
734
|
"type_hint": type_hint,
|
|
717
735
|
"field_mutators": field_mutators,
|
|
718
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
736
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
719
737
|
}
|
|
720
738
|
|
|
721
739
|
# Add nslookup parameters if any
|
|
@@ -725,7 +743,7 @@ class TQLParser:
|
|
|
725
743
|
return result
|
|
726
744
|
else:
|
|
727
745
|
# This is a chained operation, not a between operation
|
|
728
|
-
return self._build_chained_ast(parsed)
|
|
746
|
+
return self._build_chained_ast(parsed, depth + 1)
|
|
729
747
|
|
|
730
748
|
elif len(parsed) == 6:
|
|
731
749
|
# Check for "field not between value1 and value2" or "field ! between value1 and value2"
|
|
@@ -814,7 +832,7 @@ class TQLParser:
|
|
|
814
832
|
"field": field_name,
|
|
815
833
|
"type_hint": type_hint,
|
|
816
834
|
"field_mutators": field_mutators,
|
|
817
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
835
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
818
836
|
}
|
|
819
837
|
|
|
820
838
|
# Add geo parameters if any
|
|
@@ -824,7 +842,7 @@ class TQLParser:
|
|
|
824
842
|
return result
|
|
825
843
|
else:
|
|
826
844
|
# This is a chained operation, not a not_between operation
|
|
827
|
-
return self._build_chained_ast(parsed)
|
|
845
|
+
return self._build_chained_ast(parsed, depth + 1)
|
|
828
846
|
|
|
829
847
|
elif len(parsed) == 3:
|
|
830
848
|
# Binary operation or comparison (including negated unary operators like "field not exists")
|
|
@@ -869,7 +887,7 @@ class TQLParser:
|
|
|
869
887
|
"field": field_name,
|
|
870
888
|
"type_hint": type_hint,
|
|
871
889
|
"field_mutators": field_mutators,
|
|
872
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
890
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
873
891
|
}
|
|
874
892
|
|
|
875
893
|
# Add geo parameters if any
|
|
@@ -923,7 +941,7 @@ class TQLParser:
|
|
|
923
941
|
"field": field_name,
|
|
924
942
|
"type_hint": type_hint,
|
|
925
943
|
"field_mutators": field_mutators,
|
|
926
|
-
"conditions": self._build_ast(conditions) if conditions else None,
|
|
944
|
+
"conditions": self._build_ast(conditions, depth + 1) if conditions else None,
|
|
927
945
|
}
|
|
928
946
|
|
|
929
947
|
# Add nslookup parameters if any
|
|
@@ -937,8 +955,8 @@ class TQLParser:
|
|
|
937
955
|
return {
|
|
938
956
|
"type": "logical_op",
|
|
939
957
|
"operator": operator.lower(),
|
|
940
|
-
"left": self._build_ast(left),
|
|
941
|
-
"right": self._build_ast(right),
|
|
958
|
+
"left": self._build_ast(left, depth + 1),
|
|
959
|
+
"right": self._build_ast(right, depth + 1),
|
|
942
960
|
}
|
|
943
961
|
elif (
|
|
944
962
|
isinstance(operator, str)
|
|
@@ -1189,7 +1207,7 @@ class TQLParser:
|
|
|
1189
1207
|
# Handle longer lists (chained operations)
|
|
1190
1208
|
# This happens with infixNotation for multiple AND/OR operations
|
|
1191
1209
|
# The structure will be flattened, so we need to reconstruct the tree
|
|
1192
|
-
return self._build_chained_ast(parsed)
|
|
1210
|
+
return self._build_chained_ast(parsed, depth + 1)
|
|
1193
1211
|
else:
|
|
1194
1212
|
# Single value - should already be a proper AST node
|
|
1195
1213
|
if isinstance(parsed, dict):
|
|
@@ -1201,21 +1219,34 @@ class TQLParser:
|
|
|
1201
1219
|
# This should be unreachable, but helps mypy understand all paths return
|
|
1202
1220
|
raise AssertionError("Unreachable code in _build_ast")
|
|
1203
1221
|
|
|
1204
|
-
def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
|
|
1222
|
+
def _build_chained_ast(self, parsed_list: List[Any], depth: int = 0) -> Dict[str, Any]:
|
|
1205
1223
|
"""Build AST from chained operations (e.g., A AND B AND C).
|
|
1206
1224
|
|
|
1207
1225
|
Args:
|
|
1208
1226
|
parsed_list: List of alternating operands and operators
|
|
1227
|
+
depth: Current recursion depth (for DoS prevention)
|
|
1209
1228
|
|
|
1210
1229
|
Returns:
|
|
1211
1230
|
Dictionary representing the AST node
|
|
1231
|
+
|
|
1232
|
+
Raises:
|
|
1233
|
+
TQLSyntaxError: If query depth exceeds maximum allowed depth
|
|
1212
1234
|
"""
|
|
1235
|
+
# Check depth limit to prevent stack overflow
|
|
1236
|
+
if depth > self.MAX_QUERY_DEPTH:
|
|
1237
|
+
raise TQLSyntaxError(
|
|
1238
|
+
f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
|
|
1239
|
+
"Please simplify your query to reduce nesting.",
|
|
1240
|
+
position=0,
|
|
1241
|
+
query="",
|
|
1242
|
+
suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"]
|
|
1243
|
+
)
|
|
1213
1244
|
if len(parsed_list) < 3:
|
|
1214
1245
|
# Not enough elements for a chained operation
|
|
1215
1246
|
return {"type": "unknown", "value": parsed_list}
|
|
1216
1247
|
|
|
1217
1248
|
# Start with the first operand
|
|
1218
|
-
result = self._build_ast(parsed_list[0])
|
|
1249
|
+
result = self._build_ast(parsed_list[0], depth + 1)
|
|
1219
1250
|
|
|
1220
1251
|
# Process pairs of (operator, operand)
|
|
1221
1252
|
i = 1
|
|
@@ -1228,7 +1259,7 @@ class TQLParser:
|
|
|
1228
1259
|
"type": "logical_op",
|
|
1229
1260
|
"operator": operator.lower(),
|
|
1230
1261
|
"left": result,
|
|
1231
|
-
"right": self._build_ast(operand),
|
|
1262
|
+
"right": self._build_ast(operand, depth + 1),
|
|
1232
1263
|
}
|
|
1233
1264
|
else:
|
|
1234
1265
|
# This shouldn't happen in a well-formed chained expression
|
tql/post_processor.py
CHANGED
|
@@ -73,12 +73,17 @@ class QueryPostProcessor:
|
|
|
73
73
|
field_accessor.get_field_value, evaluator._evaluate_node
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
+
# Safe access with validation
|
|
77
|
+
# conditions is guaranteed to exist by the if check above
|
|
78
|
+
conditions = requirement.metadata["conditions"]
|
|
79
|
+
nslookup_params = requirement.metadata.get("nslookup_params", {})
|
|
80
|
+
|
|
76
81
|
# Build node for evaluation
|
|
77
82
|
node = {
|
|
78
83
|
"type": "nslookup_expr",
|
|
79
84
|
"field": requirement.field_name,
|
|
80
|
-
"conditions":
|
|
81
|
-
"nslookup_params":
|
|
85
|
+
"conditions": conditions,
|
|
86
|
+
"nslookup_params": nslookup_params,
|
|
82
87
|
}
|
|
83
88
|
|
|
84
89
|
# Evaluate the nslookup expression
|
|
@@ -91,6 +96,7 @@ class QueryPostProcessor:
|
|
|
91
96
|
and requirement.metadata
|
|
92
97
|
and "conditions" in requirement.metadata
|
|
93
98
|
):
|
|
99
|
+
# Safe access - conditions is guaranteed to exist by the if check
|
|
94
100
|
conditions = requirement.metadata["conditions"]
|
|
95
101
|
if conditions:
|
|
96
102
|
# Get the geo data that was enriched
|
|
@@ -129,12 +135,13 @@ class QueryPostProcessor:
|
|
|
129
135
|
# Check if this is an array operator with comparison
|
|
130
136
|
if "comparison_operator" in requirement.metadata:
|
|
131
137
|
# This is a special case: field | any/all/none eq value
|
|
132
|
-
|
|
133
|
-
|
|
138
|
+
# Safe access - both keys are guaranteed to exist by the if checks
|
|
139
|
+
array_operator = requirement.metadata["operator"] # exists from line 128 check
|
|
140
|
+
comparison_operator = requirement.metadata["comparison_operator"] # exists from line 135 check
|
|
134
141
|
value = requirement.metadata.get("value")
|
|
135
142
|
|
|
136
|
-
# Get the field value
|
|
137
|
-
temp_field_name =
|
|
143
|
+
# Get the field value with proper nested field handling
|
|
144
|
+
temp_field_name = self._get_mutated_field_name(requirement.field_name)
|
|
138
145
|
field_value = self._get_field_value(result, temp_field_name)
|
|
139
146
|
if field_value is None:
|
|
140
147
|
# No mutated value, get original
|
|
@@ -148,18 +155,21 @@ class QueryPostProcessor:
|
|
|
148
155
|
break
|
|
149
156
|
else:
|
|
150
157
|
# Regular operator check
|
|
158
|
+
# Safe access - operator is guaranteed to exist by the if check at line 134
|
|
151
159
|
operator = requirement.metadata["operator"]
|
|
152
160
|
value = requirement.metadata.get("value")
|
|
153
161
|
|
|
154
162
|
# Check if this was originally a different operator (for type-changing mutators)
|
|
155
163
|
if requirement.metadata.get("_original_comparison"):
|
|
164
|
+
# Safe access - validated by .get() check above
|
|
156
165
|
original = requirement.metadata["_original_comparison"]
|
|
157
|
-
operator
|
|
166
|
+
# Validate that operator exists in original
|
|
167
|
+
operator = original.get("operator", operator)
|
|
158
168
|
value = original.get("value", value)
|
|
159
169
|
|
|
160
170
|
# Get the field value - either mutated or original
|
|
161
171
|
# First check for mutated value in temp field
|
|
162
|
-
temp_field_name =
|
|
172
|
+
temp_field_name = self._get_mutated_field_name(requirement.field_name)
|
|
163
173
|
field_value = self._get_field_value(result, temp_field_name)
|
|
164
174
|
if field_value is None:
|
|
165
175
|
# No mutated value, get original
|
|
@@ -373,7 +383,7 @@ class QueryPostProcessor:
|
|
|
373
383
|
return False
|
|
374
384
|
|
|
375
385
|
# Get the field value
|
|
376
|
-
temp_field_name =
|
|
386
|
+
temp_field_name = self._get_mutated_field_name(field_name)
|
|
377
387
|
field_value = self._get_field_value(result, temp_field_name)
|
|
378
388
|
if field_value is None:
|
|
379
389
|
# No mutated value, get original
|
|
@@ -703,7 +713,7 @@ class QueryPostProcessor:
|
|
|
703
713
|
self._set_field_value(result, requirement.field_name, mutated_value)
|
|
704
714
|
elif not is_geo_enrichment:
|
|
705
715
|
# For type-changing mutators with filtering operations, store in temp field
|
|
706
|
-
temp_field_name =
|
|
716
|
+
temp_field_name = self._get_mutated_field_name(requirement.field_name)
|
|
707
717
|
self._set_field_value(result, temp_field_name, mutated_value)
|
|
708
718
|
|
|
709
719
|
# Check if we have any enrichment mutators
|
|
@@ -994,6 +1004,25 @@ class QueryPostProcessor:
|
|
|
994
1004
|
|
|
995
1005
|
return current
|
|
996
1006
|
|
|
1007
|
+
def _get_mutated_field_name(self, field_name: str) -> str:
|
|
1008
|
+
"""Generate the correct mutated field name for nested or flat fields.
|
|
1009
|
+
|
|
1010
|
+
Args:
|
|
1011
|
+
field_name: The original field name (e.g., "user.address.zip" or "status")
|
|
1012
|
+
|
|
1013
|
+
Returns:
|
|
1014
|
+
Mutated field name with proper nesting:
|
|
1015
|
+
- "user.address.zip" -> "user.address.__zip_mutated__"
|
|
1016
|
+
- "status" -> "__status_mutated__"
|
|
1017
|
+
"""
|
|
1018
|
+
field_parts = field_name.split('.')
|
|
1019
|
+
if len(field_parts) > 1:
|
|
1020
|
+
# For nested fields, only mutate the leaf field name
|
|
1021
|
+
return '.'.join(field_parts[:-1] + [f"__{field_parts[-1]}_mutated__"])
|
|
1022
|
+
else:
|
|
1023
|
+
# For flat fields, mutate the entire name
|
|
1024
|
+
return f"__{field_name}_mutated__"
|
|
1025
|
+
|
|
997
1026
|
def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
|
|
998
1027
|
"""Get a field value from a record, supporting nested fields.
|
|
999
1028
|
|
tql/scripts.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
""" Runs pytest, coverage, linters, and security checks. """
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import subprocess # nosec
|
|
4
5
|
|
|
5
6
|
|
|
@@ -29,8 +30,13 @@ def run_coverage():
|
|
|
29
30
|
Run coverage against all files in the `src` directory
|
|
30
31
|
and output an XML report to `reports/coverage.xml`.
|
|
31
32
|
"""
|
|
33
|
+
# Set environment to skip integration tests by default
|
|
34
|
+
env = os.environ.copy()
|
|
35
|
+
if "INTEGRATION_TEST_ENABLE" not in env:
|
|
36
|
+
env["INTEGRATION_TEST_ENABLE"] = "false"
|
|
37
|
+
|
|
32
38
|
# 1. Run pytest with coverage, using `src` as the source
|
|
33
|
-
subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True) # nosec
|
|
39
|
+
subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True, env=env) # nosec
|
|
34
40
|
|
|
35
41
|
# 2. Generate an XML coverage report in `reports/coverage.xml`
|
|
36
42
|
subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True) # nosec
|
|
@@ -40,7 +46,12 @@ def run_coverage():
|
|
|
40
46
|
|
|
41
47
|
def run_tests():
|
|
42
48
|
"""Runs pytests against tests in the `tests` directory."""
|
|
43
|
-
|
|
49
|
+
# Set environment to skip integration tests by default
|
|
50
|
+
env = os.environ.copy()
|
|
51
|
+
if "INTEGRATION_TEST_ENABLE" not in env:
|
|
52
|
+
env["INTEGRATION_TEST_ENABLE"] = "false"
|
|
53
|
+
|
|
54
|
+
subprocess.run(["pytest", "tests"], check=True, env=env) # nosec
|
|
44
55
|
|
|
45
56
|
|
|
46
57
|
def run_lint_all():
|
|
@@ -84,6 +95,11 @@ def run_lint():
|
|
|
84
95
|
|
|
85
96
|
def run_badge():
|
|
86
97
|
"""Generate a badge using genbadge."""
|
|
98
|
+
# Set environment to skip integration tests by default
|
|
99
|
+
env = os.environ.copy()
|
|
100
|
+
if "INTEGRATION_TEST_ENABLE" not in env:
|
|
101
|
+
env["INTEGRATION_TEST_ENABLE"] = "false"
|
|
102
|
+
|
|
87
103
|
subprocess.run( # nosec
|
|
88
104
|
[
|
|
89
105
|
"coverage",
|
|
@@ -94,6 +110,7 @@ def run_badge():
|
|
|
94
110
|
"--junit-xml=reports/junit/junit.xml",
|
|
95
111
|
],
|
|
96
112
|
check=True,
|
|
113
|
+
env=env,
|
|
97
114
|
)
|
|
98
115
|
|
|
99
116
|
# 2. Generate an XML coverage report in `reports/coverage.xml`
|
tql/stats_evaluator.py
CHANGED
|
@@ -6,7 +6,7 @@ aggregation queries against data records in memory.
|
|
|
6
6
|
|
|
7
7
|
import statistics
|
|
8
8
|
from collections import defaultdict
|
|
9
|
-
from typing import Any, Dict, List, Optional, Union
|
|
9
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
|
10
10
|
|
|
11
11
|
from .exceptions import TQLError
|
|
12
12
|
|
|
@@ -180,7 +180,9 @@ class TQLStatsEvaluator:
|
|
|
180
180
|
# Build group key
|
|
181
181
|
key_parts = []
|
|
182
182
|
for field_spec in normalized_fields:
|
|
183
|
-
field_name = field_spec
|
|
183
|
+
field_name = field_spec.get("field")
|
|
184
|
+
if field_name is None:
|
|
185
|
+
continue
|
|
184
186
|
value = self._get_field_value(record, field_name)
|
|
185
187
|
key_parts.append((field_name, value))
|
|
186
188
|
|
|
@@ -407,7 +409,7 @@ class TQLStatsEvaluator:
|
|
|
407
409
|
filtered_results = []
|
|
408
410
|
|
|
409
411
|
# Track unique values at each level
|
|
410
|
-
level_values = {}
|
|
412
|
+
level_values: Dict[int, Dict[Any, Set[Any]]] = {}
|
|
411
413
|
for level, field_spec in enumerate(normalized_fields):
|
|
412
414
|
level_values[level] = {}
|
|
413
415
|
|
|
File without changes
|
|
File without changes
|
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|