tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tellaro-query-language
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A flexible, human-friendly query language for searching and filtering structured data
5
5
  Home-page: https://github.com/tellaro/tellaro-query-language
6
6
  License: MIT
@@ -1,25 +1,25 @@
1
- tql/__init__.py,sha256=mRrrtun-1Xx9k8g0aaiqxYhhNAwWEyRC4zrVMO49Kkg,1260
1
+ tql/__init__.py,sha256=eqti5Fmu8EjD-NbCwqb1UKGJQ0OpWJLREsVMqqn6Hs4,1260
2
2
  tql/analyzer.py,sha256=Sfzj6f7YzqylT8HIL9hDbXdhl0lf8q8DNoafrxkD-F8,15456
3
3
  tql/cache/__init__.py,sha256=GIzIEMZUZEYJj72sAhuVLEG-OJEKUG2srUWNM3Ix-T8,213
4
- tql/cache/base.py,sha256=0b-8uyh3JltayGmXQI45snTqsM5sQu9u0KcNvZIRa-I,687
5
- tql/cache/memory.py,sha256=ibcmQSAxNvqCy6DksbU7gLu6UArYp1u3fW-oLubxtV0,2056
4
+ tql/cache/base.py,sha256=CwLpobv4WR7WSz99JVWAHNn-XQTtqr38Yg5UiWNXPiA,3192
5
+ tql/cache/memory.py,sha256=1kDfyODBCDi5UvSP_f_7UhHVmug-sLnrsMYvm4C4cxo,5978
6
6
  tql/cache/redis.py,sha256=ZU_IsVDvpSYpNvPfnZ4iulJDODpEGx3c4dkXLzPzPVc,2309
7
- tql/core.py,sha256=bMPrcuutY-1yvC-4M7w2y1JxNitMyBSpxPfg8ohjO60,48406
7
+ tql/core.py,sha256=ZNtxNJSZqotJ-3z9kg8Nc2lKDoeR9Bspe8k7VrXp0Ow,49363
8
8
  tql/core_components/README.md,sha256=Rm7w4UHdQ0vPBEFybE5b62IOvSA5Nzq2GRvtBHOapmc,3068
9
9
  tql/core_components/__init__.py,sha256=v8BBybPlqV7dkVY9mw1mblvqyAFJZ7Pf_bEc-jAL7FI,643
10
10
  tql/core_components/file_operations.py,sha256=Jr0kkxz_OP2KHOAsIr7KMtYe_lbu8LuBUySt2LQbjJw,3925
11
- tql/core_components/opensearch_operations.py,sha256=KvmK1FnkGZFBjBysH_sDjzIRnyUcNn7wzLzuRr1rBlg,54264
11
+ tql/core_components/opensearch_operations.py,sha256=zgxGiDpXyPW0ZUX-StpZXxf84s8eLxSymAGM5UUJimk,55253
12
12
  tql/core_components/stats_operations.py,sha256=aqTGAqIFvR6EkSbJEd0qft8Ldy8uiTrK2XI9o5bZUOs,8014
13
13
  tql/core_components/validation_operations.py,sha256=_VPXh0HABBjsXF99jFT7B6-5QAPsADOCy6poinGrxeE,22454
14
- tql/evaluator.py,sha256=_JYr-wK3F1wvBoNGIBiAEaP6Ot1g2qxZ4lOjPdOqvDk,17698
14
+ tql/evaluator.py,sha256=W2PbD0umxKORlb9npPgg985I7eYxc9QsXUtiZsgOPCk,17889
15
15
  tql/evaluator_components/README.md,sha256=c59yf2au34yPhrru7JWgGop_ORteB6w5vfMhsac8j3k,3882
16
16
  tql/evaluator_components/__init__.py,sha256=DourRUSYXWPnCghBFj7W0YfMeymT3X8YTDCwnLIyP1c,535
17
17
  tql/evaluator_components/field_access.py,sha256=BuXvL9jlv4H77neT70Vh7_qokmzs-d4EbSDA2FB1IT0,6435
18
18
  tql/evaluator_components/special_expressions.py,sha256=K6M5pW4Re2kEqxfxj9sc7I_M1tU3pn6LKJ2AfjHeciA,12917
19
- tql/evaluator_components/value_comparison.py,sha256=pL7-hxdNbzJ53DrTSiDdd7KYbVLChuNwFRLjG7P_1KM,17939
19
+ tql/evaluator_components/value_comparison.py,sha256=Woo8bNwIARdfe7F7ApYD5xi4NrrPwhdem_5ZKSik-t4,21369
20
20
  tql/exceptions.py,sha256=hatIixXci6p57J9RrkfdvmKM_2i-JKb8ViL2kU4z7a8,5550
21
21
  tql/geoip_normalizer.py,sha256=tvie-5xevJEeLp2KmjoXDjYdND8AvyVE7lCO8qgUzGY,10486
22
- tql/mutator_analyzer.py,sha256=OzI7t3C4H0IJOonpywE5LWz2cm5Dco5xnp2RTQOiSWg,55638
22
+ tql/mutator_analyzer.py,sha256=OWx3k5lK5aFHWU9Ez6DaIhenEZDxj9CbB0vM71xqUTw,55670
23
23
  tql/mutators/__init__.py,sha256=eTK8sRw4KXXnTZTn5ETIqwcaIek5rSUIVyZsxTwNNHA,6966
24
24
  tql/mutators/base.py,sha256=4Ze_x1sTO11OILXfcF2XN7ttyHcZ4gwn96UXFMMaC6M,2523
25
25
  tql/mutators/dns.py,sha256=1IKgHolFLRMR4TOgK0AiLjz5vDtFiqO328mVF4Vzk3s,14428
@@ -36,21 +36,21 @@ tql/opensearch_components/field_mapping.py,sha256=fj388cKVyDXLJKi8giSiGHL9zg4cFR
36
36
  tql/opensearch_components/lucene_converter.py,sha256=OvYTZHNBktPGow1fsVm4TMlvxHSmWrnqo42lFZNxXTo,13175
37
37
  tql/opensearch_components/query_converter.py,sha256=vLoBqv7W3ntqUH6hcuT4PDJkGkAGSQCxMvAWC482c0g,41971
38
38
  tql/opensearch_mappings.py,sha256=sVLlQlE3eGD7iNNZ_m4F4j5GVzQAJhZyCqDKYRhLRh8,11531
39
- tql/opensearch_stats.py,sha256=aMV__jtlfogGBnFucsNPazORro2mYTz_C_w9uxOqsMI,24384
40
- tql/parser.py,sha256=9kewX4IbBL3W5hbq9Xhi4BGrQ4QaoWqz9AJV0Yuf9YA,78665
39
+ tql/opensearch_stats.py,sha256=l1VsHp1hFzsz8VFFD42M4xwlVgKkjptLs8TFB0wqqRw,24478
40
+ tql/parser.py,sha256=t1bpL1hrHVpLZKeEP_DxMszlYUbh7QiudFDYxZtlc5s,80286
41
41
  tql/parser_components/README.md,sha256=lvQX72ckq2zyotGs8QIHHCIFqaA7bOHwkP44wU8Zoiw,2322
42
42
  tql/parser_components/__init__.py,sha256=zBwHBMPJyHSBbaOojf6qTrJYjJg5A6tPUE8nHFdRiQs,521
43
43
  tql/parser_components/ast_builder.py,sha256=erHoeKAMzobswoRIXB9xcsZbzQ5-2ZwaYfQgRWoUAa8,9653
44
44
  tql/parser_components/error_analyzer.py,sha256=qlCD9vKyW73aeKQYI33P1OjIWSJ3LPd08wuN9cis2fU,4012
45
45
  tql/parser_components/field_extractor.py,sha256=eUEkmiYWX2OexanFqhHeX8hcIkRlfIcgMB667e0HRYs,4629
46
46
  tql/parser_components/grammar.py,sha256=h58RBshZHXgbP1EmNwmf7dny-fgVloNg-qN4Rivross,20599
47
- tql/post_processor.py,sha256=MZOJzuWTL2qdvu-AUNMryYF2D-piv8rYH5vCcrLt5-A,50069
48
- tql/scripts.py,sha256=VOr5vCjIvKlW36kwvJx7JGFIRM16IJZlbJcWlBexBtk,3728
49
- tql/stats_evaluator.py,sha256=OQZuNLwLHAtWrwAh3utdtr1fQt3tftCs6L-1G1NQCGQ,22318
47
+ tql/post_processor.py,sha256=hItSj1VaQwCf2rbRQzLaMfAWR5RqMTpvDP-_X0HVhVo,51697
48
+ tql/scripts.py,sha256=2iryuAWqIxYoFA7R5hIzC6NE72ihTH9kIZd1-thKPTM,4331
49
+ tql/stats_evaluator.py,sha256=xJoTaBCBiKzQ0HuITGhTA41dVpeDhIMU9EMguCh_VG0,22427
50
50
  tql/stats_transformer.py,sha256=MT-4rDWZSySgn4Fuq9H0c-mvwFYLM6FqWpPv2rHX-rE,7588
51
51
  tql/validators.py,sha256=e9MlX-zQ_O3M8YP8vXyMjKU8iiJMTh6mMK0iv0_4gTY,3771
52
- tellaro_query_language-0.2.2.dist-info/LICENSE,sha256=zRhQ85LnW55fWgAjQctckwQ67DX5Jmt64lq343ThZFU,1063
53
- tellaro_query_language-0.2.2.dist-info/METADATA,sha256=QsCXKY_0aHeMorc4PepJ84ViZbTK53suxILmr868Lkk,15740
54
- tellaro_query_language-0.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
- tellaro_query_language-0.2.2.dist-info/entry_points.txt,sha256=H43APfGBMsZkKsUCnFTaqprQPW-Kce2yz2qsBL3dZrw,164
56
- tellaro_query_language-0.2.2.dist-info/RECORD,,
52
+ tellaro_query_language-0.2.3.dist-info/LICENSE,sha256=zRhQ85LnW55fWgAjQctckwQ67DX5Jmt64lq343ThZFU,1063
53
+ tellaro_query_language-0.2.3.dist-info/METADATA,sha256=jRsW9n1nhyQb7m0qZd4F7h-q_ZoH_WzfVcCJMzC9P3c,15740
54
+ tellaro_query_language-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
+ tellaro_query_language-0.2.3.dist-info/entry_points.txt,sha256=H43APfGBMsZkKsUCnFTaqprQPW-Kce2yz2qsBL3dZrw,164
56
+ tellaro_query_language-0.2.3.dist-info/RECORD,,
tql/__init__.py CHANGED
@@ -28,7 +28,7 @@ from .opensearch_mappings import (
28
28
  get_sample_data_from_index,
29
29
  )
30
30
 
31
- __version__ = "0.1.0"
31
+ __version__ = "0.2.2"
32
32
  __all__ = [
33
33
  "TQL",
34
34
  "TQLParseError",
tql/cache/base.py CHANGED
@@ -1,25 +1,97 @@
1
- """Base cache infrastructure."""
1
+ """Base cache infrastructure for TQL.
2
+
3
+ This module provides the base CacheManager class that defines the caching
4
+ interface used throughout TQL. Concrete implementations include LocalCacheManager
5
+ for in-memory caching and RedisCacheManager for distributed caching.
6
+ """
2
7
 
3
8
  from typing import Any, Dict, Optional
4
9
 
5
10
 
6
11
  class CacheManager:
7
- """Base class for cache management."""
12
+ """Base class for cache management.
13
+
14
+ This class defines the interface for all cache implementations in TQL.
15
+ Subclasses should override these methods to provide actual caching functionality.
16
+
17
+ The base implementation provides no-op defaults that can be safely used when
18
+ caching is disabled or not needed.
19
+
20
+ Example:
21
+ >>> cache = LocalCacheManager()
22
+ >>> cache.set("user:123", {"name": "Alice", "age": 30}, ttl=3600)
23
+ >>> user = cache.get("user:123")
24
+ >>> cache.delete("user:123")
25
+ """
8
26
 
9
27
  def get(self, key: str) -> Optional[Any]:
10
- """Retrieve value from cache."""
28
+ """Retrieve a value from the cache.
29
+
30
+ Args:
31
+ key: The cache key to look up. Should be a string identifier.
32
+
33
+ Returns:
34
+ The cached value if it exists and hasn't expired, None otherwise.
35
+
36
+ Example:
37
+ >>> value = cache.get("my_key")
38
+ >>> if value is not None:
39
+ ... print(f"Found: {value}")
40
+ """
11
41
  return None
12
42
 
13
43
  def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
14
- """Store value in cache."""
44
+ """Store a value in the cache.
45
+
46
+ Args:
47
+ key: The cache key under which to store the value.
48
+ value: The value to cache. Can be any Python object.
49
+ ttl: Time-to-live in seconds. If None or 0, the value never expires.
50
+
51
+ Example:
52
+ >>> cache.set("config", {"debug": True}, ttl=300) # Cache for 5 minutes
53
+ >>> cache.set("permanent", {"version": "1.0"}) # Never expires
54
+ """
15
55
 
16
56
  def delete(self, key: str) -> None:
17
- """Remove value from cache."""
57
+ """Remove a value from the cache.
58
+
59
+ Args:
60
+ key: The cache key to delete.
61
+
62
+ Example:
63
+ >>> cache.delete("expired_key")
64
+ """
18
65
 
19
66
  def clear_pattern(self, pattern: str) -> int: # pylint: disable=unused-argument
20
- """Clear all keys matching pattern."""
67
+ """Clear all keys matching a pattern.
68
+
69
+ Args:
70
+ pattern: A pattern string to match keys. Format depends on implementation.
71
+ For Redis: supports wildcards like "user:*" or "session:?123"
72
+ For Local: basic string matching
73
+
74
+ Returns:
75
+ The number of keys that were deleted.
76
+
77
+ Example:
78
+ >>> count = cache.clear_pattern("temp:*")
79
+ >>> print(f"Cleared {count} temporary keys")
80
+ """
21
81
  return 0
22
82
 
23
83
  def get_stats(self) -> Dict[str, Any]:
24
- """Get cache statistics."""
84
+ """Get cache statistics and metrics.
85
+
86
+ Returns:
87
+ Dictionary containing cache statistics such as:
88
+ - hit_rate: Cache hit rate percentage
89
+ - miss_rate: Cache miss rate percentage
90
+ - size: Number of items in cache
91
+ - memory_usage: Memory used by cache (if available)
92
+
93
+ Example:
94
+ >>> stats = cache.get_stats()
95
+ >>> print(f"Hit rate: {stats.get('hit_rate', 0)}%")
96
+ """
25
97
  return {}
tql/cache/memory.py CHANGED
@@ -1,63 +1,171 @@
1
- """In-memory cache implementation."""
1
+ """In-memory cache implementation for TQL.
2
+
3
+ This module provides a simple in-memory cache with TTL (time-to-live) support
4
+ and basic LRU (Least Recently Used) eviction when the cache reaches its size limit.
5
+ """
2
6
 
3
7
  import time
4
- from typing import Any, Dict, Optional, Tuple
8
+ from typing import Any, Dict, Optional
5
9
 
6
10
  from .base import CacheManager
7
11
 
8
12
 
9
13
  class LocalCacheManager(CacheManager):
10
- """Local in-memory cache using LRU."""
14
+ """Local in-memory cache with TTL and LRU eviction.
15
+
16
+ This implementation provides thread-safe in-memory caching suitable for
17
+ single-process applications. For distributed caching across multiple
18
+ processes or servers, use RedisCacheManager instead.
19
+
20
+ Features:
21
+ - TTL-based expiration
22
+ - LRU eviction when cache is full
23
+ - Hit/miss statistics tracking
24
+ - Pattern-based key clearing
25
+
26
+ Args:
27
+ max_size: Maximum number of items to store (default: 10000)
28
+ default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
29
+
30
+ Example:
31
+ >>> cache = LocalCacheManager(max_size=1000, default_ttl=600)
32
+ >>> cache.set("user:123", {"name": "Alice"}, ttl=300)
33
+ >>> user = cache.get("user:123")
34
+ >>> stats = cache.get_stats()
35
+ >>> print(f"Hit rate: {stats['hit_rate']:.2%}")
36
+
37
+ Attributes:
38
+ max_size: Maximum cache size
39
+ default_ttl: Default TTL for cached items
40
+ """
11
41
 
12
42
  def __init__(self, max_size: int = 10000, default_ttl: int = 3600):
43
+ """Initialize the local cache.
44
+
45
+ Args:
46
+ max_size: Maximum number of items to cache before eviction starts.
47
+ default_ttl: Default expiration time in seconds for cached items.
48
+ """
13
49
  self.max_size = max_size
14
50
  self.default_ttl = default_ttl
15
- self._cache: Dict[str, Tuple[Any, float]] = {}
51
+ self._cache: Dict[str, Any] = {}
52
+ self._expiry: Dict[str, float] = {}
16
53
  self._hits = 0
17
54
  self._misses = 0
18
55
 
19
56
  def get(self, key: str) -> Optional[Any]:
20
- """Retrieve value from cache if not expired."""
57
+ """Retrieve value from cache if not expired.
58
+
59
+ Args:
60
+ key: The cache key to retrieve.
61
+
62
+ Returns:
63
+ The cached value if present and not expired, None otherwise.
64
+
65
+ Note:
66
+ This method automatically removes expired keys when accessed.
67
+ Hit/miss statistics are updated on each call.
68
+ """
21
69
  if key in self._cache:
22
- value, expiry = self._cache[key]
23
- if expiry > time.time():
70
+ expiry = self._expiry.get(key, float('inf'))
71
+ if expiry == 0 or expiry > time.time():
24
72
  self._hits += 1
25
- return value
73
+ return self._cache[key]
26
74
  else:
27
- # Expired
75
+ # Expired - clean up
28
76
  del self._cache[key]
77
+ del self._expiry[key]
29
78
  self._misses += 1
30
79
  return None
31
80
 
32
81
  def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
33
- """Store value in cache with TTL."""
34
- if len(self._cache) >= self.max_size:
35
- # Simple eviction: remove oldest
82
+ """Store value in cache with optional TTL.
83
+
84
+ Args:
85
+ key: The cache key under which to store the value.
86
+ value: The value to cache (any Python object).
87
+ ttl: Time-to-live in seconds. If None, uses default_ttl.
88
+ If 0, the item never expires.
89
+
90
+ Note:
91
+ When the cache is full (reaches max_size), the oldest item
92
+ is evicted to make room for the new one (LRU eviction).
93
+ """
94
+ if len(self._cache) >= self.max_size and key not in self._cache:
95
+ # Simple eviction: remove oldest (first in dict)
36
96
  oldest_key = next(iter(self._cache))
37
97
  del self._cache[oldest_key]
98
+ self._expiry.pop(oldest_key, None)
38
99
 
39
- expiry = time.time() + (ttl or self.default_ttl)
40
- self._cache[key] = (value, expiry)
100
+ self._cache[key] = value
101
+ if ttl == 0:
102
+ # Never expires
103
+ self._expiry[key] = 0
104
+ else:
105
+ expiry_time = time.time() + (ttl if ttl is not None else self.default_ttl)
106
+ self._expiry[key] = expiry_time
41
107
 
42
108
  def delete(self, key: str) -> None:
43
- """Remove value from cache."""
109
+ """Remove value from cache.
110
+
111
+ Args:
112
+ key: The cache key to delete.
113
+
114
+ Note:
115
+ If the key doesn't exist, this method does nothing (no error raised).
116
+ """
44
117
  self._cache.pop(key, None)
118
+ self._expiry.pop(key, None)
45
119
 
46
120
  def clear_pattern(self, pattern: str) -> int:
47
- """Clear all keys matching pattern."""
121
+ """Clear all keys matching a glob pattern.
122
+
123
+ Args:
124
+ pattern: A glob pattern to match keys. Supports wildcards:
125
+ - '*' matches any sequence of characters
126
+ - '?' matches any single character
127
+ - '[seq]' matches any character in seq
128
+ - '[!seq]' matches any character not in seq
129
+
130
+ Returns:
131
+ The number of keys that were deleted.
132
+
133
+ Example:
134
+ >>> cache.set("user:123", data1)
135
+ >>> cache.set("user:456", data2)
136
+ >>> cache.set("session:789", data3)
137
+ >>> count = cache.clear_pattern("user:*") # Deletes user:123 and user:456
138
+ >>> print(count) # 2
139
+ """
48
140
  import fnmatch
49
141
 
50
142
  keys_to_delete = [k for k in self._cache.keys() if fnmatch.fnmatch(k, pattern)]
51
143
  for key in keys_to_delete:
52
144
  del self._cache[key]
145
+ self._expiry.pop(key, None)
53
146
  return len(keys_to_delete)
54
147
 
55
148
  def get_stats(self) -> Dict[str, Any]:
56
- """Get cache statistics."""
149
+ """Get cache performance statistics.
150
+
151
+ Returns:
152
+ Dictionary containing:
153
+ - hits: Number of successful cache retrievals
154
+ - misses: Number of cache misses
155
+ - hit_rate: Ratio of hits to total requests (0.0 to 1.0)
156
+ - size: Current number of items in cache
157
+ - max_size: Maximum cache capacity
158
+
159
+ Example:
160
+ >>> stats = cache.get_stats()
161
+ >>> print(f"Cache is {stats['hit_rate']:.2%} effective")
162
+ >>> print(f"Using {stats['size']}/{stats['max_size']} slots")
163
+ """
164
+ total_requests = self._hits + self._misses
57
165
  return {
58
166
  "hits": self._hits,
59
167
  "misses": self._misses,
60
- "hit_rate": self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
168
+ "hit_rate": self._hits / total_requests if total_requests > 0 else 0.0,
61
169
  "size": len(self._cache),
62
170
  "max_size": self.max_size,
63
171
  }
tql/core.py CHANGED
@@ -100,8 +100,25 @@ class TQL:
100
100
  # This is an OpenSearch-style mapping, map field to itself
101
101
  self._simple_mappings[k] = k
102
102
  else:
103
- # Extract the first key as the simple mapping
104
- self._simple_mappings[k] = next(iter(v.keys()))
103
+ # Intelligent field mapping extraction for complex mappings
104
+ # Priority: 1) Key matching field name, 2) Key without dots (primary field), 3) First key
105
+
106
+ if k in v:
107
+ # Field name exists as key in mapping (e.g., {"username": {"username": "keyword", ...}})
108
+ self._simple_mappings[k] = k
109
+ else:
110
+ # Find primary field (keys without dots, not starting with underscore)
111
+ primary_fields = [
112
+ field_key for field_key in v.keys()
113
+ if '.' not in field_key and not field_key.startswith('_')
114
+ ]
115
+
116
+ if primary_fields:
117
+ # Use first primary field
118
+ self._simple_mappings[k] = primary_fields[0]
119
+ else:
120
+ # Fallback to first key (maintain backward compatibility)
121
+ self._simple_mappings[k] = next(iter(v.keys()))
105
122
  else:
106
123
  # Default to mapping field to itself
107
124
  self._simple_mappings[k] = k
@@ -239,7 +239,7 @@ class OpenSearchOperations:
239
239
  analysis_result = self.analyze_opensearch_query(query)
240
240
  has_mutators = isinstance(analysis_result, MutatorAnalysisResult)
241
241
  needs_post_processing_for_stats = (
242
- has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False
242
+ has_mutators and bool(analysis_result.post_processing_requirements) if has_mutators else False # type: ignore[union-attr]
243
243
  )
244
244
 
245
245
  # Handle stats queries differently
@@ -258,7 +258,7 @@ class OpenSearchOperations:
258
258
  if filter_ast:
259
259
  # Use the optimized AST if we have mutators
260
260
  if has_mutators and needs_post_processing_for_stats:
261
- filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"]
261
+ filter_query = backend.convert(analysis_result.optimized_ast.get("filter", filter_ast))["query"] # type: ignore[union-attr]
262
262
  else:
263
263
  filter_query = backend.convert(filter_ast)["query"]
264
264
  else:
@@ -529,6 +529,8 @@ class OpenSearchOperations:
529
529
  stats_evaluator = TQLStatsEvaluator()
530
530
 
531
531
  # Execute the stats aggregation in memory
532
+ if stats_ast_for_post_processing is None:
533
+ raise ValueError("Stats AST is None but phase2 processing was requested")
532
534
  stats_results = stats_evaluator.evaluate_stats(filtered_docs, stats_ast_for_post_processing, {})
533
535
 
534
536
  # Format response for stats-only (no documents)
@@ -547,7 +549,7 @@ class OpenSearchOperations:
547
549
  "performance_impact": {
548
550
  "overhead_ms": 0, # Would need timing to calculate
549
551
  "documents_processed": len(all_documents),
550
- "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0,
552
+ "mutators_applied": len(analysis_result.post_processing_requirements) if has_mutators else 0, # type: ignore[union-attr]
551
553
  },
552
554
  "opensearch_query": complete_opensearch_query,
553
555
  }
@@ -580,6 +582,8 @@ class OpenSearchOperations:
580
582
  translator = OpenSearchStatsTranslator()
581
583
 
582
584
  # Transform the response using the translator
585
+ if stats_ast is None:
586
+ raise ValueError("Stats AST is None but grouping was detected")
583
587
  transformed_response = translator.transform_response(response, stats_ast)
584
588
 
585
589
  # The transformed response already has the correct structure
@@ -925,6 +929,21 @@ class OpenSearchOperations:
925
929
  # Get opensearch total before filtering
926
930
  opensearch_total = total_hits
927
931
 
932
+ # Track optimization features used in this query
933
+ optimizations_applied = []
934
+ if scan_all:
935
+ optimizations_applied.append("scroll_api")
936
+ if needs_phase2 and pagination_stats and pagination_stats.get("pages_checked", 0) > 1:
937
+ optimizations_applied.append("auto_pagination")
938
+ if request_cache:
939
+ optimizations_applied.append("request_cache")
940
+ if preference:
941
+ optimizations_applied.append("preference_routing")
942
+ if routing:
943
+ optimizations_applied.append("custom_routing")
944
+ if terminate_after:
945
+ optimizations_applied.append("early_termination")
946
+
928
947
  result = {
929
948
  "results": results,
930
949
  "total": len(results),
@@ -934,7 +953,7 @@ class OpenSearchOperations:
934
953
  "health_status": health_status,
935
954
  "health_reasons": health_reasons,
936
955
  "performance_impact": performance_impact,
937
- "optimizations_applied": [], # TODO: Track actual optimizations # noqa: W0511
956
+ "optimizations_applied": optimizations_applied,
938
957
  "opensearch_query": (
939
958
  complete_opensearch_query if "complete_opensearch_query" in locals() else {}
940
959
  ), # Include the full query body
tql/evaluator.py CHANGED
@@ -350,6 +350,8 @@ class TQLEvaluator:
350
350
  return left_missing or right_missing
351
351
  elif node_type == "unary_op":
352
352
  # Don't recurse through NOT operators - they handle missing fields themselves
353
+ # The NOT operator has special logic at lines 213-254 that handles missing fields correctly
354
+ # Recursing here would cause double-handling and incorrect results
353
355
  return False
354
356
  elif node_type == "collection_op":
355
357
  field_name = node["field"]
@@ -6,6 +6,7 @@ operator implementations, and special cases like CIDR matching.
6
6
 
7
7
  import ipaddress
8
8
  import re
9
+ from functools import lru_cache
9
10
  from typing import Any
10
11
 
11
12
 
@@ -15,6 +16,23 @@ class ValueComparator:
15
16
  # Sentinel value to distinguish missing fields from None values
16
17
  _MISSING_FIELD = object()
17
18
 
19
+ @staticmethod
20
+ @lru_cache(maxsize=256)
21
+ def _compile_regex(pattern: str) -> re.Pattern:
22
+ """Compile and cache regex patterns for performance.
23
+
24
+ Args:
25
+ pattern: Regex pattern string
26
+
27
+ Returns:
28
+ Compiled regex pattern
29
+
30
+ Note:
31
+ Uses LRU cache with max 256 patterns. This significantly improves
32
+ performance when the same regex patterns are used repeatedly in queries.
33
+ """
34
+ return re.compile(pattern)
35
+
18
36
  def compare_values(self, field_value: Any, operator: str, expected_value: Any) -> bool: # noqa: C901
19
37
  """Compare a field value against an expected value using the given operator.
20
38
 
@@ -49,9 +67,17 @@ class ValueComparator:
49
67
  return False
50
68
 
51
69
  # Handle None field values (field exists but is None)
70
+ # IMPORTANT: None is a valid value, distinct from missing fields.
71
+ # For 'exists' operator: This code path should NOT be reached because 'exists'
72
+ # checks field presence in the record, not the value. The evaluator handles
73
+ # 'exists' before calling compare_values. If we reach here with None, it means
74
+ # the field exists but has None value, which should NOT match 'exists'.
52
75
  if field_value is None:
53
76
  if operator in ["exists"]:
54
- return True # Field exists, even if value is None
77
+ # Field key exists in record but value is None
78
+ # Semantics: 'exists' means "field has a non-null value"
79
+ # This matches database behavior where NULL != EXISTS
80
+ return False # None value does not satisfy 'exists'
55
81
  elif operator in ["is"]:
56
82
  # Check for null comparison - expected_value can be None or "null"
57
83
  return expected_value is None or (isinstance(expected_value, str) and expected_value.lower() == "null")
@@ -59,6 +85,10 @@ class ValueComparator:
59
85
  return False
60
86
 
61
87
  # Convert numeric strings to numbers for comparison
88
+ # IMPORTANT: Store original values to check if conversion succeeded
89
+ field_value_original = field_value
90
+ expected_value_original = expected_value
91
+
62
92
  field_value = self._convert_numeric(field_value)
63
93
  expected_value = self._convert_numeric(expected_value)
64
94
 
@@ -68,6 +98,20 @@ class ValueComparator:
68
98
  if isinstance(field_value, str) and field_value.lower() in ["true", "false"]:
69
99
  field_value = field_value.lower() == "true"
70
100
 
101
+ # Type compatibility check for numeric operators
102
+ # If operator requires numeric comparison, both values must be numeric
103
+ # Exception: Arrays are handled specially in the operator logic below
104
+ if operator in ["gt", "gte", "lt", "lte", ">", ">=", "<", "<="]:
105
+ # Skip check if field_value is an array - handled by array logic below
106
+ if not isinstance(field_value, (list, tuple)):
107
+ field_is_numeric = isinstance(field_value, (int, float)) and not isinstance(field_value, bool)
108
+ expected_is_numeric = isinstance(expected_value, (int, float)) and not isinstance(expected_value, bool)
109
+
110
+ if not (field_is_numeric and expected_is_numeric):
111
+ # At least one value failed numeric conversion
112
+ # Cannot perform numeric comparison - return False
113
+ return False
114
+
71
115
  try:
72
116
  if operator in ["eq", "="]:
73
117
  # Handle array fields - check if ANY element equals expected value
@@ -104,27 +148,30 @@ class ValueComparator:
104
148
  if isinstance(expected_value, list) and len(expected_value) == 1:
105
149
  expected_value = expected_value[0]
106
150
  # Handle list fields by checking if ANY element contains the expected value
151
+ # Case-insensitive comparison to match post-processor behavior
107
152
  if isinstance(field_value, list):
108
153
  # For arrays, check if ANY element contains the expected value
109
- return any(str(expected_value) in str(elem) for elem in field_value)
154
+ return any(str(expected_value).lower() in str(elem).lower() for elem in field_value)
110
155
  else:
111
- return str(expected_value) in str(field_value)
156
+ return str(expected_value).lower() in str(field_value).lower()
112
157
  elif operator == "startswith":
113
158
  # Unwrap single-element lists for string operators
114
159
  if isinstance(expected_value, list) and len(expected_value) == 1:
115
160
  expected_value = expected_value[0]
116
161
  # Handle array fields - check if ANY element starts with expected value
162
+ # Case-insensitive comparison to match post-processor behavior
117
163
  if isinstance(field_value, (list, tuple)):
118
- return any(str(elem).startswith(str(expected_value)) for elem in field_value)
119
- return str(field_value).startswith(str(expected_value))
164
+ return any(str(elem).lower().startswith(str(expected_value).lower()) for elem in field_value)
165
+ return str(field_value).lower().startswith(str(expected_value).lower())
120
166
  elif operator == "endswith":
121
167
  # Unwrap single-element lists for string operators
122
168
  if isinstance(expected_value, list) and len(expected_value) == 1:
123
169
  expected_value = expected_value[0]
124
170
  # Handle array fields - check if ANY element ends with expected value
171
+ # Case-insensitive comparison to match post-processor behavior
125
172
  if isinstance(field_value, (list, tuple)):
126
- return any(str(elem).endswith(str(expected_value)) for elem in field_value)
127
- return str(field_value).endswith(str(expected_value))
173
+ return any(str(elem).lower().endswith(str(expected_value).lower()) for elem in field_value)
174
+ return str(field_value).lower().endswith(str(expected_value).lower())
128
175
  elif operator == "in":
129
176
  if isinstance(expected_value, list):
130
177
  if len(expected_value) == 1 and isinstance(field_value, list):
@@ -143,7 +190,13 @@ class ValueComparator:
143
190
  # Unwrap single-element lists for string operators
144
191
  if isinstance(expected_value, list) and len(expected_value) == 1:
145
192
  expected_value = expected_value[0]
146
- return bool(re.search(str(expected_value), str(field_value)))
193
+ # Use cached regex compilation for performance
194
+ try:
195
+ pattern = self._compile_regex(str(expected_value))
196
+ return bool(pattern.search(str(field_value)))
197
+ except (re.error, TypeError):
198
+ # Invalid regex pattern, fall back to no match
199
+ return False
147
200
  elif operator == "cidr":
148
201
  # Unwrap single-element lists for CIDR
149
202
  if isinstance(expected_value, list) and len(expected_value) == 1:
@@ -194,22 +247,31 @@ class ValueComparator:
194
247
  # Unwrap single-element lists for string operators
195
248
  if isinstance(expected_value, list) and len(expected_value) == 1:
196
249
  expected_value = expected_value[0]
197
- return str(expected_value) not in str(field_value)
250
+ # Case-insensitive comparison to match post-processor behavior
251
+ return str(expected_value).lower() not in str(field_value).lower()
198
252
  elif operator == "not_startswith":
199
253
  # Unwrap single-element lists for string operators
200
254
  if isinstance(expected_value, list) and len(expected_value) == 1:
201
255
  expected_value = expected_value[0]
202
- return not str(field_value).startswith(str(expected_value))
256
+ # Case-insensitive comparison to match post-processor behavior
257
+ return not str(field_value).lower().startswith(str(expected_value).lower())
203
258
  elif operator == "not_endswith":
204
259
  # Unwrap single-element lists for string operators
205
260
  if isinstance(expected_value, list) and len(expected_value) == 1:
206
261
  expected_value = expected_value[0]
207
- return not str(field_value).endswith(str(expected_value))
262
+ # Case-insensitive comparison to match post-processor behavior
263
+ return not str(field_value).lower().endswith(str(expected_value).lower())
208
264
  elif operator == "not_regexp":
209
265
  # Unwrap single-element lists for string operators
210
266
  if isinstance(expected_value, list) and len(expected_value) == 1:
211
267
  expected_value = expected_value[0]
212
- return not bool(re.search(str(expected_value), str(field_value)))
268
+ # Use cached regex compilation for performance
269
+ try:
270
+ pattern = self._compile_regex(str(expected_value))
271
+ return not bool(pattern.search(str(field_value)))
272
+ except (re.error, TypeError):
273
+ # Invalid regex pattern, fall back to match (not regexp succeeds)
274
+ return True
213
275
  elif operator == "not_cidr":
214
276
  # Unwrap single-element lists for CIDR
215
277
  if isinstance(expected_value, list) and len(expected_value) == 1:
tql/mutator_analyzer.py CHANGED
@@ -491,7 +491,7 @@ class MutatorAnalyzer:
491
491
  for param_name, param_value in geo_params.items():
492
492
  mutator_params.append([param_name, param_value])
493
493
 
494
- geo_mutator = {"name": "geoip_lookup"}
494
+ geo_mutator: Dict[str, Any] = {"name": "geoip_lookup"}
495
495
  if mutator_params:
496
496
  geo_mutator["params"] = mutator_params
497
497
 
@@ -539,7 +539,7 @@ class MutatorAnalyzer:
539
539
  for param_name, param_value in nslookup_params.items():
540
540
  mutator_params.append([param_name, param_value])
541
541
 
542
- nslookup_mutator = {"name": "nslookup"}
542
+ nslookup_mutator: Dict[str, Any] = {"name": "nslookup"}
543
543
  if mutator_params:
544
544
  nslookup_mutator["params"] = mutator_params
545
545
 
tql/opensearch_stats.py CHANGED
@@ -449,7 +449,7 @@ class OpenSearchStatsTranslator:
449
449
  aggregations: List[Dict[str, Any]],
450
450
  normalized_fields: List[Dict[str, Any]],
451
451
  level: int,
452
- ) -> Optional[Dict[str, Any]]:
452
+ ) -> Union[Dict[str, Any], List[Dict[str, Any]], None]:
453
453
  """Transform a bucket recursively for multi-level grouping.
454
454
 
455
455
  Args:
@@ -502,6 +502,8 @@ class OpenSearchStatsTranslator:
502
502
  result["aggregations"][agg_key] = value
503
503
  else:
504
504
  # Handle nested buckets
505
+ if next_level_field is None:
506
+ return None
505
507
  sub_buckets = bucket[next_level_field].get("buckets", [])
506
508
  sub_results = []
507
509
  print(
tql/parser.py CHANGED
@@ -24,6 +24,9 @@ class TQLParser:
24
24
  evaluated against data or converted to backend-specific query formats.
25
25
  """
26
26
 
27
+ # Maximum query depth to prevent stack overflow and DoS attacks
28
+ MAX_QUERY_DEPTH = 50
29
+
27
30
  def __init__(self):
28
31
  """Initialize the parser with TQL grammar."""
29
32
  self.grammar = TQLGrammar()
@@ -53,7 +56,8 @@ class TQLParser:
53
56
  parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
54
57
 
55
58
  # Convert to our AST format
56
- return self._build_ast(parsed_result.asList()[0])
59
+ # Start depth counting at 0 from parse() entry point
60
+ return self._build_ast(parsed_result.asList()[0], depth=0)
57
61
 
58
62
  except ParseException as e:
59
63
  # Extract position and context from pyparsing exception
@@ -114,15 +118,29 @@ class TQLParser:
114
118
  # Extract fields using the field extractor
115
119
  return self.field_extractor.extract_fields(ast)
116
120
 
117
- def _build_ast(self, parsed: Any) -> Dict[str, Any]: # noqa: C901
121
+ def _build_ast(self, parsed: Any, depth: int = 0) -> Dict[str, Any]: # noqa: C901
118
122
  """Build AST from parsed pyparsing result.
119
123
 
120
124
  Args:
121
125
  parsed: The parsed result from pyparsing
126
+ depth: Current recursion depth (for DoS prevention)
122
127
 
123
128
  Returns:
124
129
  Dictionary representing the AST node
130
+
131
+ Raises:
132
+ TQLSyntaxError: If query depth exceeds maximum allowed depth
125
133
  """
134
+ # Check depth limit to prevent stack overflow and DoS attacks
135
+ if depth > self.MAX_QUERY_DEPTH:
136
+ raise TQLSyntaxError(
137
+ f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
138
+ "Please simplify your query to reduce nesting.",
139
+ position=0,
140
+ query="",
141
+ suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"]
142
+ )
143
+
126
144
  if isinstance(parsed, list):
127
145
  if len(parsed) == 1:
128
146
  # Single item, check if it's a field with is_private/is_global mutator
@@ -162,7 +180,7 @@ class TQLParser:
162
180
  }
163
181
  return result
164
182
  # Single item, unwrap it
165
- return self._build_ast(parsed[0])
183
+ return self._build_ast(parsed[0], depth + 1)
166
184
  elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
167
185
  # This is a stats expression without filter (applies to all records)
168
186
  return self._build_stats_ast(parsed)
@@ -210,7 +228,7 @@ class TQLParser:
210
228
  # Check for NOT operator first (before field | mutator check)
211
229
  elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
212
230
  # Unary logical operator (NOT or !)
213
- return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
231
+ return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second, depth + 1)}
214
232
 
215
233
  # Check for field | mutator without operator
216
234
  # This happens when we have a field with mutator(s) as the last element
@@ -267,12 +285,12 @@ class TQLParser:
267
285
  # This is filter | stats
268
286
  return {
269
287
  "type": "query_with_stats",
270
- "filter": self._build_ast(first),
288
+ "filter": self._build_ast(first, depth + 1),
271
289
  "stats": self._build_stats_ast(second),
272
290
  }
273
291
  else:
274
292
  # Fallback to treating as unary logical operator
275
- return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
293
+ return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second, depth + 1)}
276
294
  elif len(parsed) >= 3:
277
295
  # Check if this is a field with multiple mutators
278
296
  if isinstance(parsed[0], str) and all(
@@ -419,7 +437,7 @@ class TQLParser:
419
437
  "field": field_name,
420
438
  "type_hint": type_hint,
421
439
  "field_mutators": field_mutators,
422
- "conditions": self._build_ast(conditions) if conditions else None,
440
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
423
441
  }
424
442
 
425
443
  # Add geo parameters if any
@@ -497,7 +515,7 @@ class TQLParser:
497
515
  "field": field_name,
498
516
  "type_hint": type_hint,
499
517
  "field_mutators": field_mutators,
500
- "conditions": self._build_ast(conditions) if conditions else None,
518
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
501
519
  }
502
520
 
503
521
  # Add nslookup parameters if any
@@ -638,7 +656,7 @@ class TQLParser:
638
656
  "field": field_name,
639
657
  "type_hint": type_hint,
640
658
  "field_mutators": field_mutators,
641
- "conditions": self._build_ast(conditions) if conditions else None,
659
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
642
660
  }
643
661
 
644
662
  # Add geo parameters if any
@@ -715,7 +733,7 @@ class TQLParser:
715
733
  "field": field_name,
716
734
  "type_hint": type_hint,
717
735
  "field_mutators": field_mutators,
718
- "conditions": self._build_ast(conditions) if conditions else None,
736
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
719
737
  }
720
738
 
721
739
  # Add nslookup parameters if any
@@ -725,7 +743,7 @@ class TQLParser:
725
743
  return result
726
744
  else:
727
745
  # This is a chained operation, not a between operation
728
- return self._build_chained_ast(parsed)
746
+ return self._build_chained_ast(parsed, depth + 1)
729
747
 
730
748
  elif len(parsed) == 6:
731
749
  # Check for "field not between value1 and value2" or "field ! between value1 and value2"
@@ -814,7 +832,7 @@ class TQLParser:
814
832
  "field": field_name,
815
833
  "type_hint": type_hint,
816
834
  "field_mutators": field_mutators,
817
- "conditions": self._build_ast(conditions) if conditions else None,
835
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
818
836
  }
819
837
 
820
838
  # Add geo parameters if any
@@ -824,7 +842,7 @@ class TQLParser:
824
842
  return result
825
843
  else:
826
844
  # This is a chained operation, not a not_between operation
827
- return self._build_chained_ast(parsed)
845
+ return self._build_chained_ast(parsed, depth + 1)
828
846
 
829
847
  elif len(parsed) == 3:
830
848
  # Binary operation or comparison (including negated unary operators like "field not exists")
@@ -869,7 +887,7 @@ class TQLParser:
869
887
  "field": field_name,
870
888
  "type_hint": type_hint,
871
889
  "field_mutators": field_mutators,
872
- "conditions": self._build_ast(conditions) if conditions else None,
890
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
873
891
  }
874
892
 
875
893
  # Add geo parameters if any
@@ -923,7 +941,7 @@ class TQLParser:
923
941
  "field": field_name,
924
942
  "type_hint": type_hint,
925
943
  "field_mutators": field_mutators,
926
- "conditions": self._build_ast(conditions) if conditions else None,
944
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
927
945
  }
928
946
 
929
947
  # Add nslookup parameters if any
@@ -937,8 +955,8 @@ class TQLParser:
937
955
  return {
938
956
  "type": "logical_op",
939
957
  "operator": operator.lower(),
940
- "left": self._build_ast(left),
941
- "right": self._build_ast(right),
958
+ "left": self._build_ast(left, depth + 1),
959
+ "right": self._build_ast(right, depth + 1),
942
960
  }
943
961
  elif (
944
962
  isinstance(operator, str)
@@ -1189,7 +1207,7 @@ class TQLParser:
1189
1207
  # Handle longer lists (chained operations)
1190
1208
  # This happens with infixNotation for multiple AND/OR operations
1191
1209
  # The structure will be flattened, so we need to reconstruct the tree
1192
- return self._build_chained_ast(parsed)
1210
+ return self._build_chained_ast(parsed, depth + 1)
1193
1211
  else:
1194
1212
  # Single value - should already be a proper AST node
1195
1213
  if isinstance(parsed, dict):
@@ -1201,21 +1219,34 @@ class TQLParser:
1201
1219
  # This should be unreachable, but helps mypy understand all paths return
1202
1220
  raise AssertionError("Unreachable code in _build_ast")
1203
1221
 
1204
- def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
1222
+ def _build_chained_ast(self, parsed_list: List[Any], depth: int = 0) -> Dict[str, Any]:
1205
1223
  """Build AST from chained operations (e.g., A AND B AND C).
1206
1224
 
1207
1225
  Args:
1208
1226
  parsed_list: List of alternating operands and operators
1227
+ depth: Current recursion depth (for DoS prevention)
1209
1228
 
1210
1229
  Returns:
1211
1230
  Dictionary representing the AST node
1231
+
1232
+ Raises:
1233
+ TQLSyntaxError: If query depth exceeds maximum allowed depth
1212
1234
  """
1235
+ # Check depth limit to prevent stack overflow
1236
+ if depth > self.MAX_QUERY_DEPTH:
1237
+ raise TQLSyntaxError(
1238
+ f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
1239
+ "Please simplify your query to reduce nesting.",
1240
+ position=0,
1241
+ query="",
1242
+ suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"]
1243
+ )
1213
1244
  if len(parsed_list) < 3:
1214
1245
  # Not enough elements for a chained operation
1215
1246
  return {"type": "unknown", "value": parsed_list}
1216
1247
 
1217
1248
  # Start with the first operand
1218
- result = self._build_ast(parsed_list[0])
1249
+ result = self._build_ast(parsed_list[0], depth + 1)
1219
1250
 
1220
1251
  # Process pairs of (operator, operand)
1221
1252
  i = 1
@@ -1228,7 +1259,7 @@ class TQLParser:
1228
1259
  "type": "logical_op",
1229
1260
  "operator": operator.lower(),
1230
1261
  "left": result,
1231
- "right": self._build_ast(operand),
1262
+ "right": self._build_ast(operand, depth + 1),
1232
1263
  }
1233
1264
  else:
1234
1265
  # This shouldn't happen in a well-formed chained expression
tql/post_processor.py CHANGED
@@ -73,12 +73,17 @@ class QueryPostProcessor:
73
73
  field_accessor.get_field_value, evaluator._evaluate_node
74
74
  )
75
75
 
76
+ # Safe access with validation
77
+ # conditions is guaranteed to exist by the if check above
78
+ conditions = requirement.metadata["conditions"]
79
+ nslookup_params = requirement.metadata.get("nslookup_params", {})
80
+
76
81
  # Build node for evaluation
77
82
  node = {
78
83
  "type": "nslookup_expr",
79
84
  "field": requirement.field_name,
80
- "conditions": requirement.metadata["conditions"],
81
- "nslookup_params": requirement.metadata.get("nslookup_params", {}),
85
+ "conditions": conditions,
86
+ "nslookup_params": nslookup_params,
82
87
  }
83
88
 
84
89
  # Evaluate the nslookup expression
@@ -91,6 +96,7 @@ class QueryPostProcessor:
91
96
  and requirement.metadata
92
97
  and "conditions" in requirement.metadata
93
98
  ):
99
+ # Safe access - conditions is guaranteed to exist by the if check
94
100
  conditions = requirement.metadata["conditions"]
95
101
  if conditions:
96
102
  # Get the geo data that was enriched
@@ -129,12 +135,13 @@ class QueryPostProcessor:
129
135
  # Check if this is an array operator with comparison
130
136
  if "comparison_operator" in requirement.metadata:
131
137
  # This is a special case: field | any/all/none eq value
132
- array_operator = requirement.metadata["operator"]
133
- comparison_operator = requirement.metadata["comparison_operator"]
138
+ # Safe access - both keys are guaranteed to exist by the if checks
139
+ array_operator = requirement.metadata["operator"] # exists from line 128 check
140
+ comparison_operator = requirement.metadata["comparison_operator"] # exists from line 135 check
134
141
  value = requirement.metadata.get("value")
135
142
 
136
- # Get the field value
137
- temp_field_name = f"__{requirement.field_name}_mutated__"
143
+ # Get the field value with proper nested field handling
144
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
138
145
  field_value = self._get_field_value(result, temp_field_name)
139
146
  if field_value is None:
140
147
  # No mutated value, get original
@@ -148,18 +155,21 @@ class QueryPostProcessor:
148
155
  break
149
156
  else:
150
157
  # Regular operator check
158
+ # Safe access - operator is guaranteed to exist by the if check at line 134
151
159
  operator = requirement.metadata["operator"]
152
160
  value = requirement.metadata.get("value")
153
161
 
154
162
  # Check if this was originally a different operator (for type-changing mutators)
155
163
  if requirement.metadata.get("_original_comparison"):
164
+ # Safe access - validated by .get() check above
156
165
  original = requirement.metadata["_original_comparison"]
157
- operator = original["operator"]
166
+ # Validate that operator exists in original
167
+ operator = original.get("operator", operator)
158
168
  value = original.get("value", value)
159
169
 
160
170
  # Get the field value - either mutated or original
161
171
  # First check for mutated value in temp field
162
- temp_field_name = f"__{requirement.field_name}_mutated__"
172
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
163
173
  field_value = self._get_field_value(result, temp_field_name)
164
174
  if field_value is None:
165
175
  # No mutated value, get original
@@ -373,7 +383,7 @@ class QueryPostProcessor:
373
383
  return False
374
384
 
375
385
  # Get the field value
376
- temp_field_name = f"__{field_name}_mutated__"
386
+ temp_field_name = self._get_mutated_field_name(field_name)
377
387
  field_value = self._get_field_value(result, temp_field_name)
378
388
  if field_value is None:
379
389
  # No mutated value, get original
@@ -703,7 +713,7 @@ class QueryPostProcessor:
703
713
  self._set_field_value(result, requirement.field_name, mutated_value)
704
714
  elif not is_geo_enrichment:
705
715
  # For type-changing mutators with filtering operations, store in temp field
706
- temp_field_name = f"__{requirement.field_name}_mutated__"
716
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
707
717
  self._set_field_value(result, temp_field_name, mutated_value)
708
718
 
709
719
  # Check if we have any enrichment mutators
@@ -994,6 +1004,25 @@ class QueryPostProcessor:
994
1004
 
995
1005
  return current
996
1006
 
1007
+ def _get_mutated_field_name(self, field_name: str) -> str:
1008
+ """Generate the correct mutated field name for nested or flat fields.
1009
+
1010
+ Args:
1011
+ field_name: The original field name (e.g., "user.address.zip" or "status")
1012
+
1013
+ Returns:
1014
+ Mutated field name with proper nesting:
1015
+ - "user.address.zip" -> "user.address.__zip_mutated__"
1016
+ - "status" -> "__status_mutated__"
1017
+ """
1018
+ field_parts = field_name.split('.')
1019
+ if len(field_parts) > 1:
1020
+ # For nested fields, only mutate the leaf field name
1021
+ return '.'.join(field_parts[:-1] + [f"__{field_parts[-1]}_mutated__"])
1022
+ else:
1023
+ # For flat fields, mutate the entire name
1024
+ return f"__{field_name}_mutated__"
1025
+
997
1026
  def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
998
1027
  """Get a field value from a record, supporting nested fields.
999
1028
 
tql/scripts.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """ Runs pytest, coverage, linters, and security checks. """
2
2
 
3
+ import os
3
4
  import subprocess # nosec
4
5
 
5
6
 
@@ -29,8 +30,13 @@ def run_coverage():
29
30
  Run coverage against all files in the `src` directory
30
31
  and output an XML report to `reports/coverage.xml`.
31
32
  """
33
+ # Set environment to skip integration tests by default
34
+ env = os.environ.copy()
35
+ if "INTEGRATION_TEST_ENABLE" not in env:
36
+ env["INTEGRATION_TEST_ENABLE"] = "false"
37
+
32
38
  # 1. Run pytest with coverage, using `src` as the source
33
- subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True) # nosec
39
+ subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True, env=env) # nosec
34
40
 
35
41
  # 2. Generate an XML coverage report in `reports/coverage.xml`
36
42
  subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True) # nosec
@@ -40,7 +46,12 @@ def run_coverage():
40
46
 
41
47
  def run_tests():
42
48
  """Runs pytests against tests in the `tests` directory."""
43
- subprocess.run(["pytest", "tests"], check=True) # nosec
49
+ # Set environment to skip integration tests by default
50
+ env = os.environ.copy()
51
+ if "INTEGRATION_TEST_ENABLE" not in env:
52
+ env["INTEGRATION_TEST_ENABLE"] = "false"
53
+
54
+ subprocess.run(["pytest", "tests"], check=True, env=env) # nosec
44
55
 
45
56
 
46
57
  def run_lint_all():
@@ -84,6 +95,11 @@ def run_lint():
84
95
 
85
96
  def run_badge():
86
97
  """Generate a badge using genbadge."""
98
+ # Set environment to skip integration tests by default
99
+ env = os.environ.copy()
100
+ if "INTEGRATION_TEST_ENABLE" not in env:
101
+ env["INTEGRATION_TEST_ENABLE"] = "false"
102
+
87
103
  subprocess.run( # nosec
88
104
  [
89
105
  "coverage",
@@ -94,6 +110,7 @@ def run_badge():
94
110
  "--junit-xml=reports/junit/junit.xml",
95
111
  ],
96
112
  check=True,
113
+ env=env,
97
114
  )
98
115
 
99
116
  # 2. Generate an XML coverage report in `reports/coverage.xml`
tql/stats_evaluator.py CHANGED
@@ -6,7 +6,7 @@ aggregation queries against data records in memory.
6
6
 
7
7
  import statistics
8
8
  from collections import defaultdict
9
- from typing import Any, Dict, List, Optional, Union
9
+ from typing import Any, Dict, List, Optional, Set, Union
10
10
 
11
11
  from .exceptions import TQLError
12
12
 
@@ -180,7 +180,9 @@ class TQLStatsEvaluator:
180
180
  # Build group key
181
181
  key_parts = []
182
182
  for field_spec in normalized_fields:
183
- field_name = field_spec["field"]
183
+ field_name = field_spec.get("field")
184
+ if field_name is None:
185
+ continue
184
186
  value = self._get_field_value(record, field_name)
185
187
  key_parts.append((field_name, value))
186
188
 
@@ -407,7 +409,7 @@ class TQLStatsEvaluator:
407
409
  filtered_results = []
408
410
 
409
411
  # Track unique values at each level
410
- level_values = {}
412
+ level_values: Dict[int, Dict[Any, Set[Any]]] = {}
411
413
  for level, field_spec in enumerate(normalized_fields):
412
414
  level_values[level] = {}
413
415